diff --git a/.github/scripts/node-addon/package-optional.json b/.github/scripts/node-addon/package-optional.json
index b3c71f9dad..d2db2e1920 100644
--- a/.github/scripts/node-addon/package-optional.json
+++ b/.github/scripts/node-addon/package-optional.json
@@ -1,7 +1,7 @@
 {
   "name": "sherpa-onnx-PLATFORM2-ARCH",
   "version": "SHERPA_ONNX_VERSION",
-  "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
   "main": "index.js",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
     "transcription",
     "real-time speech recognition",
     "without internet connection",
+    "locally",
+    "local",
     "embedded systems",
     "open source",
+    "diarization",
+    "speaker diarization",
+    "speaker recognition",
+    "speaker",
+    "speaker segmentation",
+    "speaker verification",
+    "spoken language identification",
+    "sherpa",
     "zipformer",
     "asr",
     "tts",
@@ -30,13 +40,13 @@
     "offline",
     "privacy",
     "open source",
-    "vad",
-    "speaker id",
-    "language id",
-    "node-addon-api",
     "streaming speech recognition",
     "speech",
-    "recognition"
+    "recognition",
+    "vad",
+    "node-addon-api",
+    "speaker id",
+    "language id"
   ],
   "author": "The next-gen Kaldi team",
   "license": "Apache-2.0",
diff --git a/.github/scripts/node-addon/package.json b/.github/scripts/node-addon/package.json
index 0444552fc3..bc2d89e89c 100644
--- a/.github/scripts/node-addon/package.json
+++ b/.github/scripts/node-addon/package.json
@@ -1,7 +1,7 @@
 {
   "name": "sherpa-onnx-node",
   "version": "SHERPA_ONNX_VERSION",
-  "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
   "main": "sherpa-onnx.js",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
     "transcription",
     "real-time speech recognition",
     "without internet connection",
+    "locally",
+    "local",
     "embedded systems",
     "open source",
+    "diarization",
+    "speaker diarization",
+    "speaker recognition",
+    "speaker",
+    "speaker segmentation",
+    "speaker verification",
+    "spoken language identification",
+    "sherpa",
     "zipformer",
     "asr",
     "tts",
@@ -30,13 +40,13 @@
     "offline",
     "privacy",
     "open source",
-    "vad",
-    "speaker id",
-    "language id",
-    "node-addon-api",
     "streaming speech recognition",
     "speech",
-    "recognition"
+    "recognition",
+    "vad",
+    "node-addon-api",
+    "speaker id",
+    "language id"
   ],
   "author": "The next-gen Kaldi team",
   "license": "Apache-2.0",
diff --git a/.github/scripts/test-cxx-api.sh b/.github/scripts/test-cxx-api.sh
new file mode 100755
index 0000000000..aedf161337
--- /dev/null
+++ b/.github/scripts/test-cxx-api.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE"
+echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE"
+echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE"
+echo "PATH: $PATH"
+
+log "------------------------------------------------------------"
+log "Test streaming zipformer CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+$CXX_STREAMING_ZIPFORMER_EXE
+rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+
+log "------------------------------------------------------------"
+log "Test Whisper CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+$CXX_WHISPER_EXE
+rm -rf sherpa-onnx-whisper-tiny.en
+
+log "------------------------------------------------------------"
+log "Test SenseVoice CXX API"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+$CXX_SENSE_VOICE_EXE
+rm -rf sherpa-onnx-sense-voice-*
diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh
index 0aff2085e7..27199ae9f7 100755
--- a/.github/scripts/test-dart.sh
+++ b/.github/scripts/test-dart.sh
@@ -4,6 +4,38 @@ set -ex
 
 cd dart-api-examples
 
+pushd tts
+
+echo '----------matcha tts----------'
+./run-kokoro-zh-en.sh
+./run-kokoro-en.sh
+./run-matcha-zh.sh
+./run-matcha-en.sh
+ls -lh *.wav
+rm -rf matcha-icefall-*
+rm *.onnx
+
+echo '----------piper tts----------'
+./run-piper.sh
+rm -rf vits-piper-*
+
+echo '----------coqui tts----------'
+./run-coqui.sh
+rm -rf vits-coqui-*
+
+echo '----------zh tts----------'
+./run-vits-zh.sh
+rm -rf sherpa-onnx-*
+
+ls -lh *.wav
+
+popd # tts
+
+pushd speaker-diarization
+echo '----------speaker diarization----------'
+./run.sh
+popd
+
 pushd speaker-identification
 echo '----------3d speaker----------'
 ./run-3d-speaker.sh
@@ -31,6 +63,10 @@ echo "----zipformer transducer----"
 ./run-zipformer-transducer.sh
 rm -rf sherpa-onnx-*
 
+echo "----moonshine----"
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
 echo "----whisper----"
 ./run-whisper.sh
 rm -rf sherpa-onnx-*
@@ -72,6 +108,10 @@ echo '----------TeleSpeech CTC----------'
 ./run-telespeech-ctc.sh
 rm -rf sherpa-onnx-*
 
+echo '----------moonshine----------'
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
 echo '----------whisper----------'
 ./run-whisper.sh
 rm -rf sherpa-onnx-*
@@ -93,22 +133,6 @@ rm -rf sherpa-onnx-*
 
 popd # non-streaming-asr
 
-pushd tts
-
-echo '----------piper tts----------'
-./run-piper.sh
-rm -rf vits-piper-*
-
-echo '----------coqui tts----------'
-./run-coqui.sh
-rm -rf vits-coqui-*
-
-echo '----------zh tts----------'
-./run-zh.sh
-rm -rf sherpa-onnx-*
-
-popd # tts
-
 pushd streaming-asr
 
 echo '----------streaming zipformer ctc HLG----------'
diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
index c397fc0cdf..aa41ad985d 100755
--- a/.github/scripts/test-dot-net.sh
+++ b/.github/scripts/test-dot-net.sh
@@ -2,7 +2,41 @@
 
 cd dotnet-examples/
 
-cd ./offline-decode-files
+cd ./kokoro-tts
+./run-kokoro.sh
+ls -lh
+
+cd ../offline-tts
+./run-matcha-zh.sh
+ls -lh *.wav
+./run-matcha-en.sh
+ls -lh *.wav
+./run-aishell3.sh
+ls -lh *.wav
+./run-piper.sh
+ls -lh *.wav
+./run-hf-fanchen.sh
+ls -lh *.wav
+ls -lh
+
+pushd ../..
+
+mkdir tts
+
+cp -v dotnet-examples/kokoro-tts/*.wav ./tts
+cp -v dotnet-examples/offline-tts/*.wav ./tts
+popd
+
+cd ../offline-speaker-diarization
+./run.sh
+rm -rfv *.onnx
+rm -fv *.wav
+rm -rfv sherpa-onnx-pyannote-*
+
+cd ../offline-decode-files
+./run-moonshine.sh
+rm -rf sherpa-onnx-*
+
 ./run-sense-voice-ctc.sh
 rm -rf sherpa-onnx-*
 
@@ -67,14 +101,4 @@ cd ../spoken-language-identification
 ./run.sh
 rm -rf sherpa-onnx-*
 
-cd ../offline-tts
-./run-aishell3.sh
-./run-piper.sh
-./run-hf-fanchen.sh
-ls -lh
-
-cd ../..
-
-mkdir tts
 
-cp dotnet-examples/offline-tts/*.wav ./tts
diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh
index a46e2de8ed..53db04d739 100755
--- a/.github/scripts/test-nodejs-addon-npm.sh
+++ b/.github/scripts/test-nodejs-addon-npm.sh
@@ -10,7 +10,34 @@ arch=$(node -p "require('os').arch()")
 platform=$(node -p "require('os').platform()")
 node_version=$(node -p "process.versions.node.split('.')[0]")
 
-echo "----------non-streaming asr + vad----------"
+echo "----------non-streaming asr moonshine + vad----------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+
+node ./test_vad_with_non_streaming_asr_moonshine.js
+rm -rf sherpa-onnx-*
+rm *.wav
+rm *.onnx
+
+echo "----------non-streaming speaker diarization----------"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test_offline_speaker_diarization.js
+
+rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
+
+echo "----------non-streaming asr whisper + vad----------"
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
 tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
 rm sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -58,6 +85,41 @@ fi
 
 echo "----------tts----------"
 
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_zh_en.js
+ls -lh *.wav
+rm -rf kokoro-multi-lang-v1_0
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_en.js
+ls -lh *.wav
+rm -rf kokoro-en-v0_19
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_en.js
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_zh.js
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-zh-baker
+ls -lh *.wav
+
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
 tar xf vits-piper-en_GB-cori-medium.tar.bz2
 rm vits-piper-en_GB-cori-medium.tar.bz2
@@ -204,6 +266,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
 node ./test_asr_non_streaming_whisper.js
 rm -rf sherpa-onnx-whisper-tiny.en
 
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
 
+node ./test_asr_non_streaming_moonshine.js
+rm -rf sherpa-onnx-*
 
 ls -lh
diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh
index c41a0de658..536310af7b 100755
--- a/.github/scripts/test-nodejs-npm.sh
+++ b/.github/scripts/test-nodejs-npm.sh
@@ -9,6 +9,94 @@ git status
 ls -lh
 ls -lh node_modules
 
+# offline tts
+#
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+node ./test-offline-tts-kokoro-zh-en.js
+ls -lh *.wav
+rm -rf kokoro-multi-lang-v1_0
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test-offline-tts-kokoro-en.js
+rm -rf kokoro-en-v0_19
+
+ls -lh
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-zh.js
+
+rm -rf matcha-icefall-zh-baker
+rm hifigan_v2.onnx
+
+echo "---"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-en.js
+
+rm -rf matcha-icefall-en_US-ljspeech
+rm hifigan_v2.onnx
+
+echo "---"
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
+tar xf vits-piper-en_US-amy-low.tar.bz2
+node ./test-offline-tts-vits-en.js
+rm -rf vits-piper-en_US-amy-low*
+
+echo "---"
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
+tar xvf vits-icefall-zh-aishell3.tar.bz2
+node ./test-offline-tts-vits-zh.js
+rm -rf vits-icefall-zh-aishell3*
+
+ls -lh *.wav
+
+echo '-----speaker diarization----------'
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test-offline-speaker-diarization.js
+rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*
+
+echo '-----vad+moonshine----------'
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+node ./test-vad-with-non-streaming-asr-whisper.js
+rm Obama.wav
+rm silero_vad.onnx
+rm -rf sherpa-onnx-moonshine-*
+
 echo '-----vad+whisper----------'
 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -78,6 +166,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2
 node ./test-offline-whisper.js
 rm -rf sherpa-onnx-whisper-tiny.en
 
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+node ./test-offline-moonshine.js
+rm -rf sherpa-onnx-moonshine-*
+
 # online asr
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
 tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
@@ -111,15 +206,3 @@ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 node ./test-online-zipformer2-ctc-hlg.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
-
-# offline tts
-
-curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
-tar xf vits-piper-en_US-amy-low.tar.bz2
-node ./test-offline-tts-en.js
-rm -rf vits-piper-en_US-amy-low*
-
-curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
-tar xvf vits-icefall-zh-aishell3.tar.bz2
-node ./test-offline-tts-zh.js
-rm -rf vits-icefall-zh-aishell3*
diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh
index 57208e9da2..f85b585398 100755
--- a/.github/scripts/test-offline-ctc.sh
+++ b/.github/scripts/test-offline-ctc.sh
@@ -15,6 +15,21 @@ echo "PATH: $PATH"
 
 which $EXE
 
+log "------------------------------------------------------------"
+log "Run NeMo GigaAM Russian models"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+tar xvf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+rm sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+
+$EXE \
+  --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \
+  --tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt \
+  --debug=1 \
+  ./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav
+
+rm -rf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24
+
 log "------------------------------------------------------------"
 log "Run SenseVoice models"
 log "------------------------------------------------------------"
diff --git a/.github/scripts/test-offline-moonshine.sh b/.github/scripts/test-offline-moonshine.sh
new file mode 100755
index 0000000000..1768e82ecd
--- /dev/null
+++ b/.github/scripts/test-offline-moonshine.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+export GIT_CLONE_PROTECTION_ACTIVE=false
+
+echo "EXE is $EXE"
+echo "PATH: $PATH"
+
+which $EXE
+
+names=(
+tiny
+base
+)
+
+for name in ${names[@]}; do
+  log "------------------------------------------------------------"
+  log "Run $name"
+  log "------------------------------------------------------------"
+
+  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2
+  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+  curl -SL -O $repo_url
+  tar xvf sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-$name-en-int8.tar.bz2
+  repo=sherpa-onnx-moonshine-$name-en-int8
+  log "Start testing ${repo_url}"
+
+  log "test int8 onnx"
+
+  time $EXE \
+    --moonshine-preprocessor=$repo/preprocess.onnx \
+    --moonshine-encoder=$repo/encode.int8.onnx \
+    --moonshine-uncached-decoder=$repo/uncached_decode.int8.onnx \
+    --moonshine-cached-decoder=$repo/cached_decode.int8.onnx \
+    --tokens=$repo/tokens.txt \
+    --num-threads=2 \
+    $repo/test_wavs/0.wav \
+    $repo/test_wavs/1.wav \
+    $repo/test_wavs/8k.wav
+
+  rm -rf $repo
+done
diff --git a/.github/scripts/test-offline-tts.sh b/.github/scripts/test-offline-tts.sh
index d3d35df2cb..baa2b37bb9 100755
--- a/.github/scripts/test-offline-tts.sh
+++ b/.github/scripts/test-offline-tts.sh
@@ -18,6 +18,87 @@ which $EXE
 # test waves are saved in ./tts
 mkdir ./tts
 
+log "------------------------------------------------------------"
+log "kokoro-en-v0_19"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+# mapping of sid to voice name
+# 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+# 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+
+for sid in $(seq 0 10); do
+  $EXE \
+    --debug=1 \
+    --kokoro-model=./kokoro-en-v0_19/model.onnx \
+    --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+    --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+    --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+    --num-threads=2 \
+    --sid=$sid \
+    --output-filename="./tts/kokoro-$sid.wav" \
+    "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."
+done
+rm -rf kokoro-en-v0_19
+
+log "------------------------------------------------------------"
+log "matcha-icefall-en_US-ljspeech"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+$EXE \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --num-threads=2 \
+  --output-filename=./tts/matcha-ljspeech-1.wav \
+  --debug=1 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+log "------------------------------------------------------------"
+log "matcha-icefall-zh-baker"
+log "------------------------------------------------------------"
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+$EXE \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+  --num-threads=2 \
+  --debug=1 \
+  --output-filename=./tts/matcha-baker-zh-1.wav \
+  '小米的使命是，始终坚持做"感动人心、价格厚道"的好产品，让全球每个人都能享受科技带来的美好生活'
+
+$EXE \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+  --num-threads=2 \
+  --debug=1 \
+  --output-filename=./tts/matcha-baker-zh-2.wav \
+  "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。"
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-zh-baker
+
 log "------------------------------------------------------------"
 log "vits-piper-en_US-amy-low"
 log "------------------------------------------------------------"
diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh
index de7297f2c3..dd4da51207 100755
--- a/.github/scripts/test-python.sh
+++ b/.github/scripts/test-python.sh
@@ -8,6 +8,52 @@ log() {
   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 
+log "test offline zipformer (byte-level bpe, Chinese+English)"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
+
+repo=sherpa-onnx-zipformer-zh-en-2023-11-22
+
+./python-api-examples/offline-decode-files.py  \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-34-avg-19.int8.onnx \
+  --decoder=$repo/decoder-epoch-34-avg-19.onnx \
+  --joiner=$repo/joiner-epoch-34-avg-19.int8.onnx \
+  --num-threads=2 \
+  --decoding-method=greedy_search \
+  --debug=true \
+  $repo/test_wavs/0.wav \
+  $repo/test_wavs/1.wav \
+  $repo/test_wavs/2.wav
+
+rm -rf sherpa-onnx-zipformer-zh-en-2023-11-22
+
+log "test offline Moonshine"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+python3 ./python-api-examples/offline-moonshine-decode-files.py
+
+rm -rf sherpa-onnx-moonshine-tiny-en-int8
+
+log "test offline speaker diarization"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+python3 ./python-api-examples/offline-speaker-diarization.py
+
+rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0
+
+
 log "test_clustering"
 pushd /tmp/
 mkdir test-cluster
@@ -221,6 +267,87 @@ log "Offline TTS test"
 # test waves are saved in ./tts
 mkdir ./tts
 
+log "kokoro-multi-lang-v1_0 test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+  --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+  --num-threads=2 \
+  --sid=18 \
+  --output-filename="./tts/kokoro-18-zh-en.wav" \
+  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
+
+rm -rf kokoro-multi-lang-v1_0
+
+log "kokoro-en-v0_19 test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-en-v0_19/model.onnx \
+  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+  --num-threads=2 \
+  --sid=10 \
+  --output-filename="./tts/kokoro-10.wav" \
+  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."
+
+rm -rf kokoro-en-v0_19
+
+log "matcha-ljspeech-en test"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --output-filename=./tts/test-matcha-ljspeech-en.wav \
+  --num-threads=2 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+rm hifigan_v2.onnx
+rm -rf matcha-icefall-en_US-ljspeech
+
+log "matcha-baker-zh test"
+
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --output-filename=./tts/test-matcha-baker-zh.wav \
+ "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+
+rm -rf matcha-icefall-zh-baker
+rm hifigan_v2.onnx
+
 log "vits-ljs test"
 
 curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx
@@ -468,53 +595,19 @@ echo "sherpa_onnx version: $sherpa_onnx_version"
 pwd
 ls -lh
 
-repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
-log "Start testing ${repo}"
-
-pushd $dir
-curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-tar xf sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-rm sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
-popd
-
-repo=$dir/$repo
-ls -lh $repo
-
-python3 ./python-api-examples/keyword-spotter.py \
-  --tokens=$repo/tokens.txt \
-  --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
-  --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
-  --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
-  --keywords-file=$repo/test_wavs/test_keywords.txt \
-  $repo/test_wavs/0.wav \
-  $repo/test_wavs/1.wav
-
-rm -rf $repo
-
 if [[ x$OS != x'windows-latest' ]]; then
   echo "OS: $OS"
 
   repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
   log "Start testing ${repo}"
 
-  pushd $dir
   curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
   tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
   rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
-  popd
 
-  repo=$dir/$repo
   ls -lh $repo
 
-  python3 ./python-api-examples/keyword-spotter.py \
-    --tokens=$repo/tokens.txt \
-    --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
-    --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
-    --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
-    --keywords-file=$repo/test_wavs/test_keywords.txt \
-    $repo/test_wavs/3.wav \
-    $repo/test_wavs/4.wav \
-    $repo/test_wavs/5.wav
+  python3 ./python-api-examples/keyword-spotter.py
 
   python3 sherpa-onnx/python/tests/test_keyword_spotter.py --verbose
 
diff --git a/.github/scripts/test-speaker-diarization.sh b/.github/scripts/test-speaker-diarization.sh
new file mode 100755
index 0000000000..6d7b2effd0
--- /dev/null
+++ b/.github/scripts/test-speaker-diarization.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "EXE is $EXE"
+echo "PATH: $PATH"
+
+which $EXE
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+log "specify number of clusters"
+$EXE \
+  --clustering.num-clusters=4 \
+  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+  ./0-four-speakers-zh.wav
+
+log "specify threshold for clustering"
+
+$EXE \
+  --clustering.cluster-threshold=0.90 \
+  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+  ./0-four-speakers-zh.wav
+
+rm -rf sherpa-onnx-pyannote-*
+rm -fv *.onnx
+rm -fv *.wav
diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh
index 18c9bed418..65fe4588aa 100755
--- a/.github/scripts/test-swift.sh
+++ b/.github/scripts/test-swift.sh
@@ -7,6 +7,31 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+./run-tts-vits.sh
+ls -lh
+rm -rf vits-piper-*
+
+./run-tts-kokoro-zh-en.sh
+ls -lh
+rm -rf kokoro-multi-*
+
+./run-tts-kokoro-en.sh
+ls -lh
+rm -rf kokoro-en-*
+
+./run-tts-matcha-zh.sh
+ls -lh
+rm -rf matcha-icefall-*
+
+./run-tts-matcha-en.sh
+ls -lh
+rm -rf matcha-icefall-*
+
+./run-speaker-diarization.sh
+rm -rf *.onnx
+rm -rf sherpa-onnx-pyannote-segmentation-3-0
+rm -fv *.wav
+
 ./run-add-punctuations.sh
 rm ./add-punctuations
 rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
@@ -33,8 +58,9 @@ popd
 ls -lh /Users/fangjun/Desktop
 cat /Users/fangjun/Desktop/Obama.srt
 
-./run-tts.sh
-ls -lh
+rm -rf sherpa-onnx-whisper*
+rm -f *.onnx
+rm /Users/fangjun/Desktop/Obama.wav
 
 ./run-decode-file.sh
 rm decode-file
@@ -43,5 +69,4 @@ sed -i.bak  '20d' ./decode-file.swift
 
 ./run-decode-file-non-streaming.sh
 
-
 ls -lh
diff --git a/.github/workflows/aarch64-linux-gnu-shared.yaml b/.github/workflows/aarch64-linux-gnu-shared.yaml
index 5e82d9b3ad..1851645251 100644
--- a/.github/workflows/aarch64-linux-gnu-shared.yaml
+++ b/.github/workflows/aarch64-linux-gnu-shared.yaml
@@ -9,7 +9,6 @@ on:
       - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/aarch64-linux-gnu-shared.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/aarch64-linux-gnu-shared.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -34,11 +32,20 @@ concurrency:
 jobs:
   aarch64_linux_gnu_shared:
     runs-on: ${{ matrix.os }}
-    name: aarch64 shared lib test
+    name: aarch64 shared GPU ${{ matrix.gpu }} ${{ matrix.onnxruntime_version }}
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
+        include:
+          - os: ubuntu-latest
+            gpu: ON
+            onnxruntime_version: "1.11.0"
+          - os: ubuntu-latest
+            gpu: ON
+            onnxruntime_version: "1.16.0"
+          - os: ubuntu-latest
+            gpu: OFF
+            onnxruntime_version: ""
 
     steps:
       - uses: actions/checkout@v4
@@ -61,7 +68,7 @@ jobs:
         if: steps.cache-qemu.outputs.cache-hit != 'true'
         run: |
           sudo apt-get update
-          sudo apt-get install autoconf automake autotools-dev ninja-build
+          sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
 
       - name: checkout-qemu
         if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -79,15 +86,24 @@ jobs:
           make -j2
           make install
 
-      - name: cache-toolchain
-        id: cache-toolchain
+      - name: cache-toolchain (CPU)
+        if: matrix.gpu == 'OFF'
+        id: cache-toolchain-cpu
         uses: actions/cache@v4
         with:
           path: toolchain
           key: gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
 
-      - name: Download toolchain
-        if: steps.cache-toolchain.outputs.cache-hit != 'true'
+      - name: cache-toolchain (GPU)
+        if: matrix.gpu == 'ON'
+        id: cache-toolchain-gpu
+        uses: actions/cache@v4
+        with:
+          path: toolchain
+          key: gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz
+
+      - name: Download toolchain (CPU, gcc 7.5)
+        if: steps.cache-toolchain-cpu.outputs.cache-hit != 'true' && matrix.gpu == 'OFF'
         shell: bash
         run: |
           wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz
@@ -95,6 +111,15 @@ jobs:
           mkdir $GITHUB_WORKSPACE/toolchain
           tar xf ./gcc-linaro-7.5.0-2019.12-x86_64_aarch64-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
 
+      - name: Download toolchain (GPU, gcc 10.3)
+        if: steps.cache-toolchain-gpu.outputs.cache-hit != 'true' && matrix.gpu == 'ON'
+        shell: bash
+        run: |
+          wget -qq https://huggingface.co/csukuangfj/sherpa-ncnn-toolchains/resolve/main/gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz
+
+          mkdir $GITHUB_WORKSPACE/toolchain
+          tar xf ./gcc-arm-10.3-2021.07-x86_64-aarch64-none-linux-gnu.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
+
       - name: Set environment variable
         if: steps.cache-build-result.outputs.cache-hit != 'true'
         shell: bash
@@ -103,19 +128,31 @@ jobs:
           echo "$GITHUB_WORKSPACE/bin"  >> "$GITHUB_PATH"
           ls -lh "$GITHUB_WORKSPACE/toolchain/bin"
 
-          echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
-          echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV"
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            echo "CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
+            echo "CXX=aarch64-linux-gnu-g++" >> "$GITHUB_ENV"
+          else
+            echo "CC=aarch64-none-linux-gnu-gcc" >> "$GITHUB_ENV"
+            echo "CXX=aarch64-none-linux-gnu-g++" >> "$GITHUB_ENV"
+          fi
 
       - name: Display toolchain info
         shell: bash
         run: |
-          aarch64-linux-gnu-gcc --version
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            which aarch64-linux-gnu-gcc
+            aarch64-linux-gnu-gcc --version
+          else
+            which aarch64-none-linux-gnu-gcc
+            aarch64-none-linux-gnu-gcc --version
+          fi
 
       - name: Display qemu-aarch64 -h
         shell: bash
         run: |
           export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
           export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc
           qemu-aarch64 -h
 
       - name: build aarch64-linux-gnu
@@ -127,6 +164,8 @@ jobs:
           cmake --version
 
           export BUILD_SHARED_LIBS=ON
+          export SHERPA_ONNX_ENABLE_GPU=${{ matrix.gpu }}
+          export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=${{ matrix.onnxruntime_version }}
 
           ./build-aarch64-linux-gnu.sh
 
@@ -140,7 +179,11 @@ jobs:
         run: |
           export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
           export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
-          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-linux-gnu/libc
+          else
+            export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/aarch64-none-linux-gnu/libc
+          fi
 
           ls -lh ./build-aarch64-linux-gnu/bin
 
@@ -151,11 +194,20 @@ jobs:
       - name: Copy files
         shell: bash
         run: |
-          aarch64-linux-gnu-strip --version
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            aarch64-linux-gnu-strip --version
+          else
+            aarch64-none-linux-gnu-strip --version
+          fi
 
           SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
 
           dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-shared
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            dst=${dst}-cpu
+          else
+            dst=${dst}-gpu-onnxruntime-${{ matrix.onnxruntime_version }}
+          fi
           mkdir $dst
 
           cp -a build-aarch64-linux-gnu/install/bin $dst/
@@ -166,7 +218,11 @@ jobs:
 
           ls -lh $dst/bin/
           echo "strip"
-          aarch64-linux-gnu-strip $dst/bin/*
+          if [[ ${{ matrix.gpu }} == OFF ]]; then
+            aarch64-linux-gnu-strip $dst/bin/*
+          else
+            aarch64-none-linux-gnu-strip $dst/bin/*
+          fi
 
           tree $dst
 
@@ -174,8 +230,8 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
-          name: sherpa-onnx-linux-aarch64-shared
-          path: sherpa-onnx-*linux-aarch64-shared.tar.bz2
+          name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }}-onnxruntime-${{ matrix.onnxruntime_version }}
+          path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2
 
       # https://huggingface.co/docs/hub/spaces-github-actions
       - name: Publish to huggingface
@@ -193,12 +249,12 @@ jobs:
 
             rm -rf huggingface
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
             mkdir -p aarch64
 
-            cp -v ../sherpa-onnx-*-shared.tar.bz2 ./aarch64
+            cp -v ../sherpa-onnx-*-shared*.tar.bz2 ./aarch64
 
             git status
             git lfs track "*.bz2"
diff --git a/.github/workflows/aarch64-linux-gnu-static.yaml b/.github/workflows/aarch64-linux-gnu-static.yaml
index 765e2422f3..66ce6ec244 100644
--- a/.github/workflows/aarch64-linux-gnu-static.yaml
+++ b/.github/workflows/aarch64-linux-gnu-static.yaml
@@ -9,7 +9,6 @@ on:
       - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/aarch64-linux-gnu-static.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/aarch64-linux-gnu-static.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -61,7 +59,7 @@ jobs:
         if: steps.cache-qemu.outputs.cache-hit != 'true'
         run: |
           sudo apt-get update
-          sudo apt-get install autoconf automake autotools-dev ninja-build
+          sudo apt-get install build-essential zlib1g-dev pkg-config libglib2.0-dev binutils-dev libboost-all-dev autoconf libtool libssl-dev libpixman-1-dev ninja-build
 
       - name: checkout-qemu
         if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -184,7 +182,7 @@ jobs:
 
             rm -rf huggingface
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
             mkdir -p aarch64
diff --git a/.github/workflows/add-new-asr-models.yaml b/.github/workflows/add-new-asr-models.yaml
new file mode 100644
index 0000000000..6bd2230f15
--- /dev/null
+++ b/.github/workflows/add-new-asr-models.yaml
@@ -0,0 +1,61 @@
+name: add-new-asr-models
+
+on:
+  # push:
+  #   branches:
+  #     - new-asr-models
+  workflow_dispatch:
+
+concurrency:
+  group: add-new-asr-models-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  add-new-asr-models:
+    runs-on: ${{ matrix.os }}
+    name: New asr models
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Download icefall-asr-zipformer-multi-zh-en-2023-11-22
+        shell: bash
+        run: |
+          d=sherpa-onnx-zipformer-zh-en-2023-11-22
+          mkdir $d
+          pushd $d
+
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/tokens.txt
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/bbpe.model
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/decoder-epoch-34-avg-19.onnx
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.int8.onnx
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.onnx
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.int8.onnx
+          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.onnx
+
+          mkdir test_wavs
+          cd test_wavs
+          wget -O 0.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav
+          wget -O 1.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav
+
+          wget -O 2.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
+          popd
+          tar cvjf $d.tar.bz2 $d
+          ls -lh $d
+          rm -rf $d
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models
diff --git a/.github/workflows/android-static.yaml b/.github/workflows/android-static.yaml
new file mode 100644
index 0000000000..7dad8128be
--- /dev/null
+++ b/.github/workflows/android-static.yaml
@@ -0,0 +1,296 @@
+# static means we link onnxruntime statically
+# but we still have libsherpa-onnx-jni.so
+name: android-static
+
+on:
+  push:
+    branches:
+      - master
+      - android-link-onnxruntime-statically
+    paths:
+      - '.github/workflows/android-static.yaml'
+      - 'cmake/**'
+      - 'sherpa-onnx/csrc/*'
+      - 'sherpa-onnx/jni/*'
+      - 'build-android*.sh'
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+  pull_request:
+    branches:
+      - master
+    paths:
+      - '.github/workflows/android-static.yaml'
+      - 'cmake/**'
+      - 'sherpa-onnx/csrc/*'
+      - 'sherpa-onnx/jni/*'
+      - 'build-android*.sh'
+
+  workflow_dispatch:
+
+concurrency:
+  group: android-static-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-android-static-libs:
+    name: Android static libs
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-android-jni-static
+
+      - name: Display NDK HOME
+        shell: bash
+        run: |
+          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+          ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+      - name: build android arm64-v8a
+        shell: bash
+        run: |
+          export BUILD_SHARED_LIBS=OFF
+
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          ./build-android-arm64-v8a.sh
+          mkdir -p jniLibs/arm64-v8a/
+          cp -v ./build-android-arm64-v8a-static/install/lib/*.so ./jniLibs/arm64-v8a/
+          rm -rf  ./build-android-arm64-v8a-static/
+
+      - name: build android armv7-eabi
+        shell: bash
+        run: |
+          export BUILD_SHARED_LIBS=OFF
+
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          ./build-android-armv7-eabi.sh
+          mkdir -p ./jniLibs/armeabi-v7a/
+          cp -v ./build-android-armv7-eabi-static/install/lib/*.so ./jniLibs/armeabi-v7a/
+          rm -rf ./build-android-armv7-eabi-static
+
+      - name: build android x86_64
+        shell: bash
+        run: |
+          export BUILD_SHARED_LIBS=OFF
+
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          ./build-android-x86-64.sh
+          mkdir -p ./jniLibs/x86_64
+          cp -v ./build-android-x86-64-static/install/lib/*.so ./jniLibs/x86_64
+          rm -rf ./build-android-x86-64-static
+
+      - name: build android x86
+        shell: bash
+        run: |
+          export BUILD_SHARED_LIBS=OFF
+
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
+          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          ./build-android-x86.sh
+          mkdir -p ./jniLibs/x86
+          cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
+          rm -rf ./build-android-x86
+
+      - name: Copy files
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android-static-link-onnxruntime.tar.bz2
+
+          tar cjvf $filename ./jniLibs
+
+          ls -lh
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-android-libs-static
+          path: ./jniLibs
+
+      # https://huggingface.co/docs/hub/spaces-github-actions
+      - name: Publish to huggingface
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+            du -h -d1 .
+            ls -lh
+
+            rm -rf huggingface
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+            cd huggingface
+
+            cp -v ../sherpa-onnx-*-android*.tar.bz2 ./
+
+            git status
+            git lfs track "*.bz2"
+
+            git add .
+
+            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2"
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+      - name: Release android libs
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: sherpa-onnx-*-android*.tar.bz2
+
+  build-android-aar-static:
+    needs: [build-android-static-libs]
+    name: Android AAR
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # https://github.com/actions/setup-java
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: '21'
+
+      - name: Display NDK HOME
+        shell: bash
+        run: |
+          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+          ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+      - name: Retrieve artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: sherpa-onnx-android-libs-static
+          path: /tmp/jniLibs
+
+      - name: Show jni libs
+        shell: bash
+        run: |
+          ls -lh /tmp/jniLibs
+
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
+          #
+      - name: Copy libs
+        shell: bash
+        run: |
+          for arch in arm64-v8a armeabi-v7a x86 x86_64; do
+            cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
+          done
+
+      - name: Check libs
+        shell: bash
+        run: |
+          ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*
+
+      - name: Build aar
+        shell: bash
+        run: |
+          cd android/SherpaOnnxAar
+
+          ./gradlew :sherpa_onnx:assembleRelease
+
+      - name: Display aar
+        shell: bash
+        run: |
+          cd android/SherpaOnnxAar
+
+          ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+          cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../
+
+      - name: Rename aar
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+          mv sherpa_onnx-release.aar sherpa-onnx-static-link-onnxruntime-${SHERPA_ONNX_VERSION}.aar
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-android-aar-static
+          path: ./*.aar
+
+      # https://huggingface.co/docs/hub/spaces-github-actions
+      - name: Publish to huggingface
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+            du -h -d1 .
+            ls -lh
+
+            rm -rf huggingface
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+            cd huggingface
+            dst=android/aar
+            mkdir -p $dst
+
+            cp -v ../*.aar $dst
+
+            git status
+            git lfs track "*.aar"
+
+            git add .
+
+            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+      - name: Release android aar
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.aar
diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml
index 35dfd6b26f..b7da9b8a60 100644
--- a/.github/workflows/android.yaml
+++ b/.github/workflows/android.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/android.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/jni/*'
@@ -18,7 +17,6 @@ on:
       - master
     paths:
       - '.github/workflows/android.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/jni/*'
@@ -32,7 +30,7 @@ concurrency:
 
 jobs:
   build-android-libs:
-    name: Android for ${{ matrix.os }}
+    name: Android libs
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -44,6 +42,11 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-android-jni
+
       - name: Display NDK HOME
         shell: bash
         run: |
@@ -53,37 +56,57 @@ jobs:
       - name: build android arm64-v8a
         shell: bash
         run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
           export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          export SHERPA_ONNX_ENABLE_C_API=ON
           ./build-android-arm64-v8a.sh
           mkdir -p jniLibs/arm64-v8a/
           cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/
+          cp -v ./build-android-arm64-v8a/install/lib/README.md ./jniLibs/arm64-v8a/
           rm -rf  ./build-android-arm64-v8a/
 
       - name: build android armv7-eabi
         shell: bash
         run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
           export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          export SHERPA_ONNX_ENABLE_C_API=ON
           ./build-android-armv7-eabi.sh
           mkdir -p ./jniLibs/armeabi-v7a/
           cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/
+          cp -v ./build-android-armv7-eabi/install/lib/README.md ./jniLibs/armeabi-v7a/
           rm -rf ./build-android-armv7-eabi
 
       - name: build android x86_64
         shell: bash
         run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
           export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          export SHERPA_ONNX_ENABLE_C_API=ON
           ./build-android-x86-64.sh
           mkdir -p ./jniLibs/x86_64
           cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64
+          cp -v ./build-android-x86-64/install/lib/README.md ./jniLibs/x86_64
           rm -rf ./build-android-x86-64
 
       - name: build android x86
         shell: bash
         run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+
           export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          export SHERPA_ONNX_ENABLE_C_API=ON
           ./build-android-x86.sh
           mkdir -p ./jniLibs/x86
           cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
+          cp -v ./build-android-x86/install/lib/README.md ./jniLibs/x86
           rm -rf ./build-android-x86
 
       - name: Copy files
@@ -121,7 +144,7 @@ jobs:
 
             rm -rf huggingface
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
 
@@ -143,3 +166,129 @@ jobs:
           file_glob: true
           overwrite: true
           file: sherpa-onnx-*-android.tar.bz2
+
+  build-android-aar:
+    needs: [build-android-libs]
+    name: Android AAR
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # https://github.com/actions/setup-java
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: '21'
+
+      - name: Display NDK HOME
+        shell: bash
+        run: |
+          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+          ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+      - name: Retrieve artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: sherpa-onnx-android-libs
+          path: /tmp/jniLibs
+
+      - name: Show jni libs
+        shell: bash
+        run: |
+          ls -lh /tmp/jniLibs
+
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
+          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
+          #
+      - name: Copy libs
+        shell: bash
+        run: |
+          for arch in arm64-v8a armeabi-v7a x86 x86_64; do
+            cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
+          done
+
+      - name: Check libs
+        shell: bash
+        run: |
+          ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*
+
+      - name: Build aar
+        shell: bash
+        run: |
+          cd android/SherpaOnnxAar
+
+          ./gradlew :sherpa_onnx:assembleRelease
+
+      - name: Display aar
+        shell: bash
+        run: |
+          cd android/SherpaOnnxAar
+
+          ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+          cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../
+
+
+      - name: Rename aar
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+          mv sherpa_onnx-release.aar sherpa-onnx-${SHERPA_ONNX_VERSION}.aar
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-android-aar
+          path: ./*.aar
+
+      # https://huggingface.co/docs/hub/spaces-github-actions
+      - name: Publish to huggingface
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+            du -h -d1 .
+            ls -lh
+
+            rm -rf huggingface
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+            cd huggingface
+            dst=android/aar
+            mkdir -p $dst
+
+            cp -v ../*.aar $dst
+
+            git status
+            git lfs track "*.aar"
+
+            git add .
+
+            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+      - name: Release android aar
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.aar
diff --git a/.github/workflows/apk-asr-2pass.yaml b/.github/workflows/apk-asr-2pass.yaml
index bbe61060a8..72885db45e 100644
--- a/.github/workflows/apk-asr-2pass.yaml
+++ b/.github/workflows/apk-asr-2pass.yaml
@@ -23,8 +23,8 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        total: ["2"]
-        index: ["0", "1"]
+        total: ["4"]
+        index: ["0", "1", "2", "3"]
 
     steps:
       - uses: actions/checkout@v4
@@ -163,7 +163,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-asr.yaml b/.github/workflows/apk-asr.yaml
index fc1cd1f5d8..e49b179c8b 100644
--- a/.github/workflows/apk-asr.yaml
+++ b/.github/workflows/apk-asr.yaml
@@ -23,8 +23,8 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        total: ["3"]
-        index: ["0", "1", "2"]
+        total: ["6"]
+        index: ["0", "1", "2", "3", "4", "5"]
 
     steps:
       - uses: actions/checkout@v4
@@ -163,7 +163,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-audio-tagging-wearos.yaml b/.github/workflows/apk-audio-tagging-wearos.yaml
index 0ed8230769..bfe9f9ac7c 100644
--- a/.github/workflows/apk-audio-tagging-wearos.yaml
+++ b/.github/workflows/apk-audio-tagging-wearos.yaml
@@ -163,7 +163,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-audio-tagging.yaml b/.github/workflows/apk-audio-tagging.yaml
index f6b85c3b2f..c11180c4ae 100644
--- a/.github/workflows/apk-audio-tagging.yaml
+++ b/.github/workflows/apk-audio-tagging.yaml
@@ -160,7 +160,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-kws.yaml b/.github/workflows/apk-kws.yaml
index 524622de85..43cdef49e0 100644
--- a/.github/workflows/apk-kws.yaml
+++ b/.github/workflows/apk-kws.yaml
@@ -160,7 +160,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-speaker-diarization.yaml b/.github/workflows/apk-speaker-diarization.yaml
new file mode 100644
index 0000000000..90bcc7323a
--- /dev/null
+++ b/.github/workflows/apk-speaker-diarization.yaml
@@ -0,0 +1,179 @@
+name: apk-speaker-diarization
+
+on:
+  push:
+    branches:
+      - apk
+
+  workflow_dispatch:
+
+concurrency:
+  group: apk-speaker-diarization-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: write
+
+jobs:
+  apk_speaker_identification:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    runs-on: ${{ matrix.os }}
+    name: apk for speaker diarization ${{ matrix.index }}/${{ matrix.total }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        total: ["1"]
+        index: ["0"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # https://github.com/actions/setup-java
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: '21'
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-android
+
+      - name: Display NDK HOME
+        shell: bash
+        run: |
+          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
+          ls -lh ${ANDROID_NDK_LATEST_HOME}
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          python3 -m pip install --upgrade pip jinja2
+
+      - name: Setup build tool version variable
+        shell: bash
+        run: |
+          echo "---"
+          ls -lh /usr/local/lib/android/
+          echo "---"
+
+          ls -lh /usr/local/lib/android/sdk
+          echo "---"
+
+          ls -lh /usr/local/lib/android/sdk/build-tools
+          echo "---"
+
+          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
+          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
+          echo "Last build tool version is: $BUILD_TOOL_VERSION"
+
+      - name: Generate build script
+        shell: bash
+        run: |
+          cd scripts/apk
+
+          total=${{ matrix.total }}
+          index=${{ matrix.index }}
+
+          python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index
+
+          chmod +x build-apk-speaker-diarization.sh
+          mv -v ./build-apk-speaker-diarization.sh ../..
+
+      - name: build APK
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
+          ./build-apk-speaker-diarization.sh
+
+      - name: Display APK
+        shell: bash
+        run: |
+          ls -lh ./apks/
+          du -h -d1 .
+
+      # https://github.com/marketplace/actions/sign-android-release
+      - uses: r0adkll/sign-android-release@v1
+        name: Sign app APK
+        with:
+          releaseDirectory: ./apks
+          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
+          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
+          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
+        env:
+          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
+
+      - name: Display APK after signing
+        shell: bash
+        run: |
+          ls -lh ./apks/
+          du -h -d1 .
+
+      - name: Rename APK after signing
+        shell: bash
+        run: |
+          cd apks
+          rm -fv signingKey.jks
+          rm -fv *.apk.idsig
+          rm -fv *-aligned.apk
+
+          all_apks=$(ls -1 *-signed.apk)
+          echo "----"
+          echo $all_apks
+          echo "----"
+          for apk in ${all_apks[@]}; do
+            n=$(echo $apk | sed -e s/-signed//)
+            mv -v $apk $n
+          done
+
+          cd ..
+
+          ls -lh ./apks/
+          du -h -d1 .
+
+      - name: Display APK after rename
+        shell: bash
+        run: |
+          ls -lh ./apks/
+          du -h -d1 .
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            cd huggingface
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            d=speaker-diarization/$SHERPA_ONNX_VERSION
+            mkdir -p $d/
+            cp -v ../apks/*.apk $d/
+            git status
+            git lfs track "*.apk"
+            git add .
+            git commit -m "add more apks"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
diff --git a/.github/workflows/apk-speaker-identification.yaml b/.github/workflows/apk-speaker-identification.yaml
index ca89ec49f6..c88718d6e0 100644
--- a/.github/workflows/apk-speaker-identification.yaml
+++ b/.github/workflows/apk-speaker-identification.yaml
@@ -53,6 +53,23 @@ jobs:
         run: |
           python3 -m pip install --upgrade pip jinja2
 
+      - name: Setup build tool version variable
+        shell: bash
+        run: |
+          echo "---"
+          ls -lh /usr/local/lib/android/
+          echo "---"
+
+          ls -lh /usr/local/lib/android/sdk
+          echo "---"
+
+          ls -lh /usr/local/lib/android/sdk/build-tools
+          echo "---"
+
+          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
+          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
+          echo "Last build tool version is: $BUILD_TOOL_VERSION"
+
       - name: Generate build script
         shell: bash
         run: |
@@ -82,6 +99,51 @@ jobs:
           ls -lh ./apks/
           du -h -d1 .
 
+      # https://github.com/marketplace/actions/sign-android-release
+      - uses: r0adkll/sign-android-release@v1
+        name: Sign app APK
+        with:
+          releaseDirectory: ./apks
+          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
+          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
+          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
+        env:
+          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
+
+      - name: Display APK after signing
+        shell: bash
+        run: |
+          ls -lh ./apks/
+          du -h -d1 .
+
+      - name: Rename APK after signing
+        shell: bash
+        run: |
+          cd apks
+          rm -fv signingKey.jks
+          rm -fv *.apk.idsig
+          rm -fv *-aligned.apk
+
+          all_apks=$(ls -1 *-signed.apk)
+          echo "----"
+          echo $all_apks
+          echo "----"
+          for apk in ${all_apks[@]}; do
+            n=$(echo $apk | sed -e s/-signed//)
+            mv -v $apk $n
+          done
+
+          cd ..
+
+          ls -lh ./apks/
+          du -h -d1 .
+
+      - name: Display APK after rename
+        shell: bash
+        run: |
+          ls -lh ./apks/
+          du -h -d1 .
+
       - name: Publish to huggingface
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -101,7 +163,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-spoken-language-identification.yaml b/.github/workflows/apk-spoken-language-identification.yaml
index 3cb9c83b28..cc7525cd42 100644
--- a/.github/workflows/apk-spoken-language-identification.yaml
+++ b/.github/workflows/apk-spoken-language-identification.yaml
@@ -163,7 +163,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-tts-engine.yaml b/.github/workflows/apk-tts-engine.yaml
index d251483e4a..b8614cb76c 100644
--- a/.github/workflows/apk-tts-engine.yaml
+++ b/.github/workflows/apk-tts-engine.yaml
@@ -164,7 +164,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-tts.yaml b/.github/workflows/apk-tts.yaml
index dd0aa3f775..1609739c69 100644
--- a/.github/workflows/apk-tts.yaml
+++ b/.github/workflows/apk-tts.yaml
@@ -164,7 +164,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/apk-vad-asr.yaml b/.github/workflows/apk-vad-asr.yaml
index 8310043a9c..fe706aa14d 100644
--- a/.github/workflows/apk-vad-asr.yaml
+++ b/.github/workflows/apk-vad-asr.yaml
@@ -23,8 +23,8 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        total: ["5"]
-        index: ["0", "1", "2", "3", "4"]
+        total: ["10"]
+        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
 
     steps:
       - uses: actions/checkout@v4
@@ -163,8 +163,9 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
+            du -h -d1 .
             git fetch
             git pull
             git merge -m "merge remote" --ff origin main
diff --git a/.github/workflows/apk-vad.yaml b/.github/workflows/apk-vad.yaml
index 8253145b68..f1a4364fc0 100644
--- a/.github/workflows/apk-vad.yaml
+++ b/.github/workflows/apk-vad.yaml
@@ -160,13 +160,13 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
             cd huggingface
             git fetch
             git pull
             git merge -m "merge remote" --ff origin main
 
-            d=vad/SHERPA_ONNX_VERSION
+            d=vad/$SHERPA_ONNX_VERSION
             mkdir -p $d
             cp -v ../apks/*.apk $d/
             git status
diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml
index a56b2cdad4..63a5cf414a 100644
--- a/.github/workflows/arm-linux-gnueabihf.yaml
+++ b/.github/workflows/arm-linux-gnueabihf.yaml
@@ -7,7 +7,6 @@ on:
       - master
     paths:
       - '.github/workflows/arm-linux-gnueabihf.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/arm-linux-gnueabihf.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -62,7 +60,7 @@ jobs:
         if: steps.cache-qemu.outputs.cache-hit != 'true'
         run: |
           sudo apt-get update
-          sudo apt-get install autoconf automake autotools-dev ninja-build
+          sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
 
       - name: checkout-qemu
         if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -205,7 +203,7 @@ jobs:
             git config --global user.name "Fangjun Kuang"
 
             rm -rf huggingface
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
             mkdir -p arm32
diff --git a/.github/workflows/build-wheels-aarch64-cuda.yaml b/.github/workflows/build-wheels-aarch64-cuda.yaml
new file mode 100644
index 0000000000..a221553a4a
--- /dev/null
+++ b/.github/workflows/build-wheels-aarch64-cuda.yaml
@@ -0,0 +1,118 @@
+name: build-wheels-aarch64-cuda
+
+on:
+  push:
+    branches:
+      - wheel
+  workflow_dispatch:
+
+env:
+  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1
+
+concurrency:
+  group: build-wheels-aarch64-cuda-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build_wheels_aarch64_cuda:
+    name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-20.04]
+        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
+        manylinux: [manylinux2014] #, manylinux_2_28]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: all
+
+      # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
+      # for a list of versions
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.21.3
+        env:
+          CIBW_BEFORE_ALL: |
+            git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
+            cd alsa-lib
+            ./gitcompile
+            cd ..
+            echo "PWD"
+            ls -lh /project/alsa-lib/src/.libs
+
+          CIBW_ENVIRONMENT: CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR SHERPA_ONNX_MAKE_ARGS="VERBOSE=1" SHERPA_ONNX_ENABLE_ALSA=1 SHERPA_ONNX_ENABLE_GPU=ON
+          CIBW_BUILD: "${{ matrix.python-version}}-* "
+          CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
+          CIBW_BUILD_VERBOSITY: 3
+          CIBW_ARCHS_LINUX: aarch64
+          CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64
+          # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28.
+          # manylinux_2_24 is no longer supported
+
+      - name: Display wheels
+        shell: bash
+        run: |
+          ls -lh ./wheelhouse/
+
+      - name: Install patchelf
+        shell: bash
+        run: |
+          sudo apt-get update -q
+          sudo apt-get install -q -y patchelf
+          patchelf --help
+
+      - name: Patch wheels
+        shell: bash
+        run: |
+          mkdir ./wheels
+          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels
+
+          ls -lh ./wheels/
+          rm -rf ./wheelhouse
+          mv ./wheels ./wheelhouse
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+            d=cuda/$SHERPA_ONNX_VERSION
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            cd huggingface
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            mkdir -p $d
+
+            cp -v ../wheelhouse/*.whl $d/
+
+            git status
+            git add .
+            git commit -m "add more wheels"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}
+          path: ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml
index 9d4ac571e5..1ba8ebd682 100644
--- a/.github/workflows/build-wheels-aarch64.yaml
+++ b/.github/workflows/build-wheels-aarch64.yaml
@@ -20,8 +20,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
-        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+        os: [ubuntu-20.04]
+        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
         manylinux: [manylinux2014] #, manylinux_2_28]
 
     steps:
@@ -35,7 +35,7 @@ jobs:
       # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
       # for a list of versions
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16.5
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_BEFORE_ALL: |
             git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
@@ -60,7 +60,6 @@ jobs:
           ls -lh ./wheelhouse/
 
       - name: Install patchelf
-        if: matrix.os == 'ubuntu-latest'
         shell: bash
         run: |
           sudo apt-get update -q
@@ -69,7 +68,6 @@ jobs:
 
       - name: Patch wheels
         shell: bash
-        if: matrix.os == 'ubuntu-latest'
         run: |
           mkdir ./wheels
           sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels
@@ -99,7 +97,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -125,6 +123,6 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-armv7l.yaml b/.github/workflows/build-wheels-armv7l.yaml
index 05c3b196dc..58a7cc8973 100644
--- a/.github/workflows/build-wheels-armv7l.yaml
+++ b/.github/workflows/build-wheels-armv7l.yaml
@@ -102,7 +102,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -129,6 +129,6 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-linux-cuda.yaml b/.github/workflows/build-wheels-linux-cuda.yaml
index b1ee898250..1801840abc 100644
--- a/.github/workflows/build-wheels-linux-cuda.yaml
+++ b/.github/workflows/build-wheels-linux-cuda.yaml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-20.04]
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
@@ -34,7 +34,7 @@ jobs:
       - name: Install Python dependencies
         shell: bash
         run: |
-          pip install -U pip wheel setuptools twine
+          pip install -U pip wheel setuptools twine==5.0.0
 
       - name: Build alsa-lib
         shell: bash
@@ -113,7 +113,7 @@ jobs:
 
             d=cuda/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml
index e16f5bb9a7..0380e2a993 100644
--- a/.github/workflows/build-wheels-linux.yaml
+++ b/.github/workflows/build-wheels-linux.yaml
@@ -20,8 +20,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
-        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+        os: [ubuntu-20.04]
+        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
         manylinux: [manylinux2014] #, manylinux_2_28]
 
 
@@ -31,7 +31,7 @@ jobs:
       # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
       # for a list of versions
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16.5
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_BEFORE_ALL: |
             git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
@@ -96,7 +96,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -118,7 +118,7 @@ jobs:
         shell: bash
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
 
diff --git a/.github/workflows/build-wheels-macos-arm64.yaml b/.github/workflows/build-wheels-macos-arm64.yaml
index ce899c5d19..fe1d316281 100644
--- a/.github/workflows/build-wheels-macos-arm64.yaml
+++ b/.github/workflows/build-wheels-macos-arm64.yaml
@@ -21,13 +21,13 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-13]
-        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.15.0
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_BUILD: "${{ matrix.python-version}}-* "
           CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'"
@@ -68,7 +68,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -95,6 +95,6 @@ jobs:
           fi
 
           python3 -m pip install $opts --upgrade pip
-          python3 -m pip install $opts wheel twine setuptools
+          python3 -m pip install $opts wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-macos-universal2.yaml b/.github/workflows/build-wheels-macos-universal2.yaml
index 4578d370e9..0f9dcedc78 100644
--- a/.github/workflows/build-wheels-macos-universal2.yaml
+++ b/.github/workflows/build-wheels-macos-universal2.yaml
@@ -21,13 +21,13 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest]
-        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
+        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.15.0
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_BUILD: "${{ matrix.python-version}}-* "
           CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'"
@@ -68,7 +68,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -89,6 +89,6 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
           python3 -m pip install --break-system-packages --upgrade pip
-          python3 -m pip install --break-system-packages wheel twine setuptools
+          python3 -m pip install --break-system-packages wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-macos-x64.yaml b/.github/workflows/build-wheels-macos-x64.yaml
index b7bf6ff54b..cbb4792e93 100644
--- a/.github/workflows/build-wheels-macos-x64.yaml
+++ b/.github/workflows/build-wheels-macos-x64.yaml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-13]
-        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
 
     steps:
       - uses: actions/checkout@v4
@@ -42,7 +42,7 @@ jobs:
 
       - name: Build wheels
         if: matrix.python-version != 'cp37'
-        uses: pypa/cibuildwheel@v2.15.0
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_BUILD: "${{ matrix.python-version}}-* "
           CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64'"
@@ -83,7 +83,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -110,6 +110,6 @@ jobs:
           fi
 
           python3 -m pip install $opts --upgrade pip
-          python3 -m pip install $opts wheel twine setuptools
+          python3 -m pip install $opts wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-win32.yaml b/.github/workflows/build-wheels-win32.yaml
index 2560847830..732a17d7b5 100644
--- a/.github/workflows/build-wheels-win32.yaml
+++ b/.github/workflows/build-wheels-win32.yaml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [windows-latest]
-        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
+        python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
 
     steps:
       - uses: actions/checkout@v4
@@ -29,7 +29,7 @@ jobs:
       # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
       # for a list of versions
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16.5
+        uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-A Win32"
           CIBW_BUILD: "${{ matrix.python-version}}-* "
@@ -67,7 +67,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -88,6 +88,6 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-wheels-win64-cuda.yaml b/.github/workflows/build-wheels-win64-cuda.yaml
index f0a17da8cc..27b4fb87eb 100644
--- a/.github/workflows/build-wheels-win64-cuda.yaml
+++ b/.github/workflows/build-wheels-win64-cuda.yaml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [windows-2019]
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
@@ -75,7 +75,7 @@ jobs:
 
             d=cuda/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/build-wheels-win64.yaml b/.github/workflows/build-wheels-win64.yaml
index 14e3e2ac4d..f2cc7c157a 100644
--- a/.github/workflows/build-wheels-win64.yaml
+++ b/.github/workflows/build-wheels-win64.yaml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [windows-2019]
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
@@ -73,7 +73,7 @@ jobs:
 
             d=cpu/$SHERPA_ONNX_VERSION
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
             cd huggingface
             git fetch
             git pull
@@ -94,6 +94,6 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
           python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine==5.0.0 setuptools
 
           twine upload ./wheelhouse/*.whl
diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml
index 2afd95cab9..8fcfafd43d 100644
--- a/.github/workflows/build-xcframework.yaml
+++ b/.github/workflows/build-xcframework.yaml
@@ -43,6 +43,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - name: Build iOS shared
+        if: matrix.with_tts == 'ON'
+        shell: bash
+        run: |
+          export CMAKE_VERBOSE_MAKEFILE=ON
+          ./build-ios-shared.sh
+
       - name: Build iOS
         if: matrix.with_tts == 'ON'
         shell: bash
@@ -135,7 +142,7 @@ jobs:
 
             rm -rf huggingface
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
 
diff --git a/.github/workflows/c-api-from-buffer.yaml b/.github/workflows/c-api-from-buffer.yaml
index 4352cd7ce9..5d9bc11db0 100644
--- a/.github/workflows/c-api-from-buffer.yaml
+++ b/.github/workflows/c-api-from-buffer.yaml
@@ -8,7 +8,6 @@ on:
       - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/c-api-from-buffer.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/c-api-from-buffer.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -215,4 +213,4 @@ jobs:
 
           ./keywords-spotter-buffered-tokens-keywords-c-api
 
-          rm -rf sherpa-onnx-kws-zipformer-*
\ No newline at end of file
+          rm -rf sherpa-onnx-kws-zipformer-*
diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml
index 589bda71f1..4a4108c989 100644
--- a/.github/workflows/c-api.yaml
+++ b/.github/workflows/c-api.yaml
@@ -4,11 +4,8 @@ on:
   push:
     branches:
       - master
-    tags:
-      - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/c-api.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +16,6 @@ on:
       - master
     paths:
       - '.github/workflows/c-api.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -83,6 +79,201 @@ jobs:
             otool -L ./install/lib/libsherpa-onnx-c-api.dylib
           fi
 
+      - name: Test kws (zh)
+        shell: bash
+        run: |
+          gcc -o kws-c-api ./c-api-examples/kws-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+          tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+          rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kws-c-api
+
+          rm ./kws-c-api
+          rm -rf sherpa-onnx-kws-*
+
+      - name: Test Kokoro TTS (zh+en)
+        shell: bash
+        run: |
+          gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+          tar xf kokoro-multi-lang-v1_0.tar.bz2
+          rm kokoro-multi-lang-v1_0.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kokoro-tts-zh-en-c-api
+
+          rm ./kokoro-tts-zh-en-c-api
+          rm -rf kokoro-zh-en-*
+
+      - name: Test Kokoro TTS (en)
+        shell: bash
+        run: |
+          gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+          tar xf kokoro-en-v0_19.tar.bz2
+          rm kokoro-en-v0_19.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kokoro-tts-en-c-api
+
+          rm ./kokoro-tts-en-c-api
+          rm -rf kokoro-en-*
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: kokoro-tts-${{ matrix.os }}
+          path: ./generated-kokoro-*.wav
+
+      - name: Test Matcha TTS (zh)
+        shell: bash
+        run: |
+          gcc -o matcha-tts-zh-c-api ./c-api-examples/matcha-tts-zh-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+          tar xvf matcha-icefall-zh-baker.tar.bz2
+          rm matcha-icefall-zh-baker.tar.bz2
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./matcha-tts-zh-c-api
+
+          rm ./matcha-tts-zh-c-api
+          rm -rf matcha-icefall-*
+          rm hifigan_v2.onnx
+
+      - name: Test Matcha TTS (en)
+        shell: bash
+        run: |
+          gcc -o matcha-tts-en-c-api ./c-api-examples/matcha-tts-en-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+          tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+          rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./matcha-tts-en-c-api
+
+          rm ./matcha-tts-en-c-api
+          rm -rf matcha-icefall-*
+          rm hifigan_v2.onnx
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: matcha-tts-${{ matrix.os }}
+          path: ./generated-matcha-*.wav
+
+      - name: Test vad + Whisper tiny.en
+        shell: bash
+        run: |
+          gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          # Now download models
+          #
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+          rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./vad-whisper-c-api
+
+          rm -rf sherpa-onnx-*
+          rm -rf *.onnx
+          rm *.wav
+
+      - name: Test vad + Moonshine
+        shell: bash
+        run: |
+          gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          # Now download models
+          #
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./vad-moonshine-c-api
+
+          rm -rf sherpa-onnx-*
+          rm -rf *.onnx
+          rm *.wav
+
+      - name: Test Moonshine
+        shell: bash
+        run: |
+          gcc -o moonshine-c-api ./c-api-examples/moonshine-c-api.c \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./moonshine-c-api
+
+          rm -rf sherpa-onnx-*
+
       - name: Test ffmpeg
         if: matrix.os == 'macos-latest'
         shell: bash
diff --git a/.github/workflows/checksum.yaml b/.github/workflows/checksum.yaml
new file mode 100644
index 0000000000..e500209d60
--- /dev/null
+++ b/.github/workflows/checksum.yaml
@@ -0,0 +1,21 @@
+name: Create checksum
+
+on:
+  schedule:
+    - cron: "0 1 * * *" # Runs at 1:00 AM UTC daily
+  workflow_dispatch:
+
+jobs:
+  checksum:
+    if: github.repository_owner == 'k2-fsa'
+    runs-on: macos-latest
+    strategy:
+      matrix:
+        tag: [null, asr-models, tts-models, kws-models, speaker-recongition-models, audio-tagging-models, punctuation-models]
+    steps:
+      - name: Run checksum action
+        uses: thewh1teagle/checksum@v1
+        with:
+          tag: ${{ matrix.tag }}
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/cxx-api.yaml b/.github/workflows/cxx-api.yaml
new file mode 100644
index 0000000000..e5a99fb09b
--- /dev/null
+++ b/.github/workflows/cxx-api.yaml
@@ -0,0 +1,332 @@
+name: cxx-api
+
+on:
+  push:
+    branches:
+      - master
+      - cxx-api-asr-non-streaming
+    paths:
+      - '.github/workflows/cxx-api.yaml'
+      - 'cmake/**'
+      - 'sherpa-onnx/csrc/*'
+      - 'sherpa-onnx/c-api/*'
+      - 'cxx-api-examples/**'
+  pull_request:
+    branches:
+      - master
+    paths:
+      - '.github/workflows/cxx-api.yaml'
+      - 'cmake/**'
+      - 'sherpa-onnx/csrc/*'
+      - 'sherpa-onnx/c-api/*'
+      - 'cxx-api-examples/**'
+
+  workflow_dispatch:
+
+concurrency:
+  group: cxx-api-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  cxx_api:
+    name: ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-cxx-api-shared
+
+      - name: Build sherpa-onnx
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          mkdir build
+          cd build
+
+          cmake \
+            -D CMAKE_BUILD_TYPE=Release \
+            -D BUILD_SHARED_LIBS=ON \
+            -D CMAKE_INSTALL_PREFIX=./install \
+            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
+            ..
+
+          make -j2 install
+
+          ls -lh install/lib
+          ls -lh install/include
+
+          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+            ldd ./install/lib/libsherpa-onnx-c-api.so
+            ldd ./install/lib/libsherpa-onnx-cxx-api.so
+            echo "---"
+            readelf -d ./install/lib/libsherpa-onnx-c-api.so
+            readelf -d ./install/lib/libsherpa-onnx-cxx-api.so
+          fi
+
+          if [[ ${{ matrix.os }} == macos-latest ]]; then
+            otool -L ./install/lib/libsherpa-onnx-c-api.dylib
+            otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
+          fi
+
+      - name: Test KWS (zh)
+        shell: bash
+        run: |
+          g++ -std=c++17 -o kws-cxx-api ./cxx-api-examples/kws-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+          tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+          rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kws-cxx-api
+
+          rm kws-cxx-api
+          rm -rf sherpa-onnx-kws-*
+
+      - name: Test Kokoro TTS (zh+en)
+        shell: bash
+        run: |
+          g++ -std=c++17 -o kokoro-tts-zh-en-cxx-api ./cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+          tar xf kokoro-multi-lang-v1_0.tar.bz2
+          rm kokoro-multi-lang-v1_0.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kokoro-tts-zh-en-cxx-api
+
+          rm kokoro-tts-zh-en-cxx-api
+          rm -rf kokoro-*
+
+      - name: Test Kokoro TTS (en)
+        shell: bash
+        run: |
+          g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+          tar xf kokoro-en-v0_19.tar.bz2
+          rm kokoro-en-v0_19.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./kokoro-tts-en-cxx-api
+
+          rm kokoro-tts-en-cxx-api
+          rm -rf kokoro-en-*
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: kokoro-tts-${{ matrix.os }}
+          path: ./generated-kokoro-*.wav
+
+      - name: Test Matcha TTS (zh)
+        shell: bash
+        run: |
+          g++ -std=c++17 -o matcha-tts-zh-cxx-api ./cxx-api-examples/matcha-tts-zh-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+          tar xvf matcha-icefall-zh-baker.tar.bz2
+          rm matcha-icefall-zh-baker.tar.bz2
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./matcha-tts-zh-cxx-api
+
+          rm -rf matcha-icefall-*
+          rm hifigan_v2.onnx
+          rm matcha-tts-zh-cxx-api
+
+      - name: Test Matcha TTS (en)
+        shell: bash
+        run: |
+          g++ -std=c++17 -o matcha-tts-en-cxx-api ./cxx-api-examples/matcha-tts-en-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+          tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+          rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./matcha-tts-en-cxx-api
+
+          rm matcha-tts-en-cxx-api
+          rm -rf matcha-icefall-*
+          rm hifigan_v2.onnx
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: matcha-tts-${{ matrix.os }}
+          path: ./generated-matcha-*.wav
+
+      - name: Test Moonshine tiny
+        shell: bash
+        run: |
+          g++ -std=c++17 -o moonshine-cxx-api ./cxx-api-examples/moonshine-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./moonshine-cxx-api
+
+          rm -rf sherpa-onnx-*
+          rm ./moonshine-cxx-api
+
+      - name: Test whisper
+        shell: bash
+        run: |
+          g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          ls -lh whisper-cxx-api
+
+          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+            ldd ./whisper-cxx-api
+            echo "----"
+            readelf -d ./whisper-cxx-api
+          fi
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+          rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+          ls -lh sherpa-onnx-whisper-tiny.en
+          echo "---"
+          ls -lh sherpa-onnx-whisper-tiny.en/test_wavs
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./whisper-cxx-api
+
+          rm -rf sherpa-onnx-whisper-*
+          rm ./whisper-cxx-api
+
+      - name: Test SenseVoice
+        shell: bash
+        run: |
+          g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          ls -lh sense-voice-cxx-api
+
+          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+            ldd ./sense-voice-cxx-api
+            echo "----"
+            readelf -d ./sense-voice-cxx-api
+          fi
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+          ls -lh sherpa-onnx-sense-voice-*
+          echo "---"
+          ls -lh sherpa-onnx-sense-voice-*/test_wavs
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./sense-voice-cxx-api
+
+          rm -rf sherpa-onnx-sense-voice-*
+          rm ./sense-voice-cxx-api
+
+      - name: Test streaming zipformer
+        shell: bash
+        run: |
+          g++ -std=c++17 -o streaming-zipformer-cxx-api ./cxx-api-examples/streaming-zipformer-cxx-api.cc \
+            -I ./build/install/include \
+            -L ./build/install/lib/ \
+            -l sherpa-onnx-cxx-api \
+            -l sherpa-onnx-c-api \
+            -l onnxruntime
+
+          ls -lh streaming-zipformer-cxx-api
+
+          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
+            ldd ./streaming-zipformer-cxx-api
+            echo "----"
+            readelf -d ./streaming-zipformer-cxx-api
+          fi
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+          echo "---"
+          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs
+
+          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
+          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
+
+          ./streaming-zipformer-cxx-api
+
+          rm -rf sherpa-onnx-streaming-zipformer-*
+          rm ./streaming-zipformer-cxx-api
diff --git a/.github/workflows/dot-net.yaml b/.github/workflows/dot-net.yaml
index 36637a9e2c..899cb99956 100644
--- a/.github/workflows/dot-net.yaml
+++ b/.github/workflows/dot-net.yaml
@@ -90,7 +90,7 @@ jobs:
             export GIT_CLONE_PROTECTION_ACTIVE=false
             export GIT_LFS_SKIP_SMUDGE=1
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
 
             cd huggingface
             git fetch
@@ -125,9 +125,7 @@ jobs:
       - name: Setup .NET
         uses: actions/setup-dotnet@v4
         with:
-          dotnet-version: |
-            6.0.x
-            7.0.x
+          dotnet-version: 8.0.x
 
       - name: Install Python dependencies
         shell: bash
diff --git a/.github/workflows/export-3dspeaker-to-onnx.yaml b/.github/workflows/export-3dspeaker-to-onnx.yaml
index 42c965c909..e62d42784b 100644
--- a/.github/workflows/export-3dspeaker-to-onnx.yaml
+++ b/.github/workflows/export-3dspeaker-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-latest]
+        os: [ubuntu-latest]
         python-version: ["3.8"]
 
     steps:
@@ -43,3 +43,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-ced-to-onnx.yaml b/.github/workflows/export-ced-to-onnx.yaml
index 70c4cc5fb5..2f714bb80b 100644
--- a/.github/workflows/export-ced-to-onnx.yaml
+++ b/.github/workflows/export-ced-to-onnx.yaml
@@ -66,7 +66,7 @@ jobs:
               export GIT_LFS_SKIP_SMUDGE=1
               d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19
               export GIT_CLONE_PROTECTION_ACTIVE=false
-              git clone https://huggingface.co/k2-fsa/$d huggingface
+              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d huggingface
               mv -v $d/* huggingface
               cd huggingface
               git lfs track "*.onnx"
diff --git a/.github/workflows/export-kokoro.yaml b/.github/workflows/export-kokoro.yaml
new file mode 100644
index 0000000000..e6aae1da62
--- /dev/null
+++ b/.github/workflows/export-kokoro.yaml
@@ -0,0 +1,226 @@
+name: export-kokoro-to-onnx
+
+on:
+  push:
+    branches:
+      - export-kokoro
+
+  workflow_dispatch:
+
+concurrency:
+  group: export-kokoro-to-onnx-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-kokoro-to-onnx:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: export kokoro ${{ matrix.version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        version: ["0.19", "1.0"]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch
+
+      - name: Run
+        shell: bash
+        run: |
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+          tar xf espeak-ng-data.tar.bz2
+          rm espeak-ng-data.tar.bz2
+          cd scripts/kokoro
+          v=${{ matrix.version }}
+          if [[ $v = "0.19" ]]; then
+            ./run.sh
+          elif [[ $v == "1.0" ]]; then
+            cd v1.0
+            ./run.sh
+          fi
+
+      - name: Collect results ${{ matrix.version }}
+        if: matrix.version == '0.19'
+        shell: bash
+        run: |
+          src=scripts/kokoro
+
+          d=kokoro-en-v0_19
+          mkdir $d
+          cp -a LICENSE $d/LICENSE
+          cp -a espeak-ng-data $d/
+          cp -v $src/kokoro-v0_19.onnx $d/model.onnx
+          cp -v $src/voices.bin $d/
+          cp -v $src/tokens.txt $d/
+          cp -v $src/README-new.md $d/README.md
+          ls -lh $d/
+          tar cjfv $d.tar.bz2 $d
+          rm -rf $d
+
+          ls -lh $d.tar.bz2
+
+      - name: Collect results ${{ matrix.version }}
+        if: matrix.version == '1.0'
+        shell: bash
+        run: |
+          curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
+          tar xvf dict.tar.bz2
+          rm dict.tar.bz2
+
+          curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
+          curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
+          curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
+
+          src=scripts/kokoro/v1.0
+
+          d=kokoro-multi-lang-v1_0
+          mkdir $d
+          cp -a LICENSE $d/LICENSE
+          cp -a espeak-ng-data $d/
+          cp -v $src/kokoro.onnx $d/model.onnx
+          cp -v $src/voices.bin $d/
+          cp -v $src/tokens.txt $d/
+          cp -v $src/lexicon*.txt $d/
+          cp -v $src/README.md $d/README.md
+          cp -av dict $d/
+          cp -v ./*.fst $d/
+          ls -lh $d/
+          echo "---"
+          ls -lh $d/dict
+
+          tar cjfv $d.tar.bz2 $d
+          rm -rf $d
+
+          ls -lh $d.tar.bz2
+
+      - name: Publish to huggingface ${{ matrix.version }}
+        if: matrix.version == '0.19'
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface
+            cd huggingface
+            rm -rf ./*
+            git fetch
+            git pull
+
+            git lfs track "cmn_dict"
+            git lfs track "ru_dict"
+            git lfs track "*.wav"
+
+            cp -a ../espeak-ng-data ./
+            mkdir -p test_wavs
+
+            cp -v ../scripts/kokoro/kokoro-v0_19.onnx ./model.onnx
+
+            cp -v ../scripts/kokoro/kokoro-v0_19-*.wav ./test_wavs/
+
+            cp -v ../scripts/kokoro/tokens.txt .
+            cp -v ../scripts/kokoro/voices.bin .
+            cp -v ../scripts/kokoro/README-new.md ./README.md
+            cp -v ../LICENSE ./
+
+            git lfs track "*.onnx"
+            git add .
+
+            ls -lh
+
+            git status
+
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
+
+      - name: Publish to huggingface ${{ matrix.version }}
+        if: matrix.version == '1.0'
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 huggingface
+            cd huggingface
+            rm -rf ./*
+            git fetch
+            git pull
+
+            git lfs track "cmn_dict"
+            git lfs track "ru_dict"
+            git lfs track "*.wav"
+            git lfs track "lexicon*.txt"
+
+            cp -a ../espeak-ng-data ./
+
+            cp -v ../scripts/kokoro/v1.0/kokoro.onnx ./model.onnx
+
+
+            cp -v ../scripts/kokoro/v1.0/tokens.txt .
+            cp -v ../scripts/kokoro/v1.0/voices.bin .
+            cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
+            cp -v ../scripts/kokoro/v1.0/README.md ./README.md
+            cp -v ../LICENSE ./
+            cp -av ../dict ./
+            cp -v ../*.fst ./
+
+            git lfs track "*.onnx"
+            git add .
+
+            ls -lh
+
+            git status
+
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
+
+      - name: Release
+        if: github.repository_owner == 'csukuangfj'
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: tts-models
+
+      - name: Release
+        if: github.repository_owner == 'k2-fsa'
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          tag: tts-models
diff --git a/.github/workflows/export-libriheavy.yaml b/.github/workflows/export-libriheavy.yaml
index cfe0a28d20..69c22ef243 100644
--- a/.github/workflows/export-libriheavy.yaml
+++ b/.github/workflows/export-libriheavy.yaml
@@ -56,7 +56,7 @@ jobs:
               src=sherpa-onnx-zipformer-en-libriheavy-20230926-$m
               echo "Process $src"
 
-              git clone https://huggingface.co/csukuangfj/$src huggingface
+              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
               cd huggingface
               git fetch
               git pull
@@ -100,7 +100,7 @@ jobs:
               src=sherpa-onnx-zipformer-en-libriheavy-20230830-$m-punct-case
               echo "Process $src"
 
-              git clone https://huggingface.co/csukuangfj/$src huggingface
+              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
               cd huggingface
               git fetch
               git pull
diff --git a/.github/workflows/export-melo-tts-to-onnx.yaml b/.github/workflows/export-melo-tts-to-onnx.yaml
index 0dc9bfe9d7..d0715b95a3 100644
--- a/.github/workflows/export-melo-tts-to-onnx.yaml
+++ b/.github/workflows/export-melo-tts-to-onnx.yaml
@@ -40,7 +40,7 @@ jobs:
           name: test.wav
           path: scripts/melo-tts/test.wav
 
-      - name: Publish to huggingface
+      - name: Publish to huggingface (Chinese + English)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         uses: nick-fields/retry@v3
@@ -56,19 +56,19 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface
             cd huggingface
             git fetch
             git pull
             echo "pwd: $PWD"
-            ls -lh ../scripts/melo-tts
+            ls -lh ../scripts/melo-tts/zh_en
 
             rm -rf ./
 
-            cp -v ../scripts/melo-tts/*.onnx .
-            cp -v ../scripts/melo-tts/lexicon.txt .
-            cp -v ../scripts/melo-tts/tokens.txt .
-            cp -v ../scripts/melo-tts/README.md .
+            cp -v ../scripts/melo-tts/zh_en/*.onnx .
+            cp -v ../scripts/melo-tts/zh_en/lexicon.txt .
+            cp -v ../scripts/melo-tts/zh_en/tokens.txt .
+            cp -v ../scripts/melo-tts/zh_en/README.md .
 
             curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
 
@@ -102,6 +102,60 @@ jobs:
             tar cjvf $dst.tar.bz2 $dst
             rm -rf $dst
 
+      - name: Publish to huggingface (English)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en huggingface
+            cd huggingface
+            git fetch
+            git pull
+            echo "pwd: $PWD"
+            ls -lh ../scripts/melo-tts/en
+
+            rm -rf ./
+
+            cp -v ../scripts/melo-tts/en/*.onnx .
+            cp -v ../scripts/melo-tts/en/lexicon.txt .
+            cp -v ../scripts/melo-tts/en/tokens.txt .
+            cp -v ../scripts/melo-tts/en/README.md .
+
+            curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
+
+            git lfs track "*.onnx"
+            git add .
+
+            ls -lh
+
+            git status
+
+            git diff
+
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en main || true
+
+            cd ..
+
+            rm -rf huggingface/.git*
+            dst=vits-melo-tts-en
+
+            mv huggingface $dst
+
+            tar cjvf $dst.tar.bz2 $dst
+            rm -rf $dst
+
       - name: Release
         uses: svenstaro/upload-release-action@v2
         with:
diff --git a/.github/workflows/export-moonshine-to-onnx.yaml b/.github/workflows/export-moonshine-to-onnx.yaml
new file mode 100644
index 0000000000..2e73c2e049
--- /dev/null
+++ b/.github/workflows/export-moonshine-to-onnx.yaml
@@ -0,0 +1,106 @@
+name: export-moonshine-to-onnx
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: export-moonshine-to-onnx-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-moonshine-to-onnx:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: export moonshine models to ONNX
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          pip install -q onnx onnxruntime librosa tokenizers soundfile
+
+      - name: Run
+        shell: bash
+        run: |
+          pushd scripts/moonshine
+          ./run.sh
+          popd
+
+          mv -v scripts/moonshine/*.tar.bz2 .
+          mv -v scripts/moonshine/sherpa-onnx-* ./
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models
+
+      - name: Publish to huggingface (tiny)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=sherpa-onnx-moonshine-tiny-en-int8
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v $d/* ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git lfs track "*.wav"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
+            rm -rf huggingface
+
+      - name: Publish to huggingface (base)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=sherpa-onnx-moonshine-base-en-int8
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v $d/* ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git lfs track "*.wav"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
+            rm -rf huggingface
diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
index 138c708ad7..bbabfb60cb 100644
--- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
+++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
@@ -67,7 +67,7 @@ jobs:
               rm -rf huggingface
               export GIT_LFS_SKIP_SMUDGE=1
               export GIT_CLONE_PROTECTION_ACTIVE=false
-              git clone https://huggingface.co/csukuangfj/$m huggingface
+              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
               cp -av $m/* huggingface
               cd huggingface
               git lfs track "*.onnx"
diff --git a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
index 7a7b7fc4eb..4a7e2339ed 100644
--- a/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
+++ b/.github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
@@ -67,7 +67,7 @@ jobs:
               rm -rf huggingface
               export GIT_LFS_SKIP_SMUDGE=1
               export GIT_CLONE_PROTECTION_ACTIVE=false
-              git clone https://huggingface.co/csukuangfj/$m huggingface
+              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
               cp -av $m/* huggingface
               cd huggingface
               git lfs track "*.onnx"
diff --git a/.github/workflows/export-nemo-giga-am-to-onnx.yaml b/.github/workflows/export-nemo-giga-am-to-onnx.yaml
new file mode 100644
index 0000000000..1af754d0b4
--- /dev/null
+++ b/.github/workflows/export-nemo-giga-am-to-onnx.yaml
@@ -0,0 +1,116 @@
+name: export-nemo-giga-am-to-onnx
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: export-nemo-giga-am-to-onnx-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-nemo-am-giga-to-onnx:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: export nemo GigaAM models to ONNX
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Run CTC
+        shell: bash
+        run: |
+          pushd scripts/nemo/GigaAM
+          ./run-ctc.sh
+          popd
+
+          d=sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24
+          mkdir $d
+          mkdir $d/test_wavs
+          rm scripts/nemo/GigaAM/model.onnx
+          mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
+          cp -v scripts/nemo/GigaAM/*.md $d/
+          mv -v scripts/nemo/GigaAM/*.pdf $d/
+          mv -v scripts/nemo/GigaAM/tokens.txt $d/
+          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
+          mv -v scripts/nemo/GigaAM/run-ctc.sh $d/
+          mv -v scripts/nemo/GigaAM/*-ctc.py $d/
+
+          ls -lh scripts/nemo/GigaAM/
+
+          ls -lh $d
+
+          tar cjvf ${d}.tar.bz2 $d
+
+      - name: Run Transducer
+        shell: bash
+        run: |
+          pushd scripts/nemo/GigaAM
+          ./run-rnnt.sh
+          popd
+
+          d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24
+          mkdir $d
+          mkdir $d/test_wavs
+
+          mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
+          mv -v scripts/nemo/GigaAM/decoder.onnx $d/
+          mv -v scripts/nemo/GigaAM/joiner.onnx $d/
+
+          cp -v scripts/nemo/GigaAM/*.md $d/
+          mv -v scripts/nemo/GigaAM/*.pdf $d/
+          mv -v scripts/nemo/GigaAM/tokens.txt $d/
+          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
+          mv -v scripts/nemo/GigaAM/run-rnnt.sh $d/
+          mv -v scripts/nemo/GigaAM/*-rnnt.py $d/
+
+          ls -lh scripts/nemo/GigaAM/
+
+          ls -lh $d
+
+          tar cjvf ${d}.tar.bz2 $d
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models
+
+      - name: Publish to huggingface (Transducer)
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v $d/* ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git lfs track "*.wav"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
index 180c3dc12a..5059664130 100644
--- a/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
+++ b/.github/workflows/export-nemo-speaker-verification-to-onnx.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
+        os: [macos-latest]
         python-version: ["3.10"]
 
     steps:
@@ -43,3 +43,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
index 300aca500c..53f8dac7d4 100644
--- a/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
+++ b/.github/workflows/export-pyannote-segmentation-to-onnx.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install pyannote
         shell: bash
         run: |
-          pip install pyannote.audio onnx onnxruntime
+          pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3
 
       - name: Run
         shell: bash
@@ -75,7 +75,7 @@ jobs:
             d=sherpa-onnx-pyannote-segmentation-3-0
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
-            git clone https://huggingface.co/csukuangfj/$d huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
             cp -v $d/* ./huggingface
             cd huggingface
             git lfs track "*.onnx"
diff --git a/.github/workflows/export-revai-segmentation-to-onnx.yaml b/.github/workflows/export-revai-segmentation-to-onnx.yaml
new file mode 100644
index 0000000000..d82f7c4e09
--- /dev/null
+++ b/.github/workflows/export-revai-segmentation-to-onnx.yaml
@@ -0,0 +1,86 @@
+name: export-revai-segmentation-to-onnx
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: export-revai-segmentation-to-onnx-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-revai-segmentation-to-onnx:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: export revai segmentation models to ONNX
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install pyannote
+        shell: bash
+        run: |
+          pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3
+
+      - name: Run
+        shell: bash
+        run: |
+          d=sherpa-onnx-reverb-diarization-v1
+          src=$PWD/$d
+          mkdir -p $src
+
+          pushd scripts/pyannote/segmentation
+          ./run-revai.sh
+          cp ./*.onnx $src/
+          cp ./README.md $src/
+          cp ./LICENSE $src/
+          cp ./run-revai.sh $src/run.sh
+          cp ./*.py $src/
+
+          popd
+          ls -lh $d
+          tar cjfv $d.tar.bz2 $d
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: speaker-segmentation-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=sherpa-onnx-reverb-diarization-v1
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            cp -v $d/* ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-sense-voice-to-onnx.yaml b/.github/workflows/export-sense-voice-to-onnx.yaml
index 41a9a31a64..1c3e917296 100644
--- a/.github/workflows/export-sense-voice-to-onnx.yaml
+++ b/.github/workflows/export-sense-voice-to-onnx.yaml
@@ -66,7 +66,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/export-telespeech-ctc.yaml b/.github/workflows/export-telespeech-ctc.yaml
index 102c3884eb..4f66d7ca4b 100644
--- a/.github/workflows/export-telespeech-ctc.yaml
+++ b/.github/workflows/export-telespeech-ctc.yaml
@@ -60,7 +60,7 @@ jobs:
 
           export GIT_CLONE_PROTECTION_ACTIVE=false
 
-          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
+          GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
           cp -a $src/* hf/
           cd hf
           git lfs track "*.pdf"
@@ -84,7 +84,7 @@ jobs:
           export GIT_CLONE_PROTECTION_ACTIVE=false
 
           rm -rf hf
-          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
+          GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
           cp -a $src/* hf/
           cd hf
           git lfs track "*.pdf"
diff --git a/.github/workflows/export-wenet-to-onnx.yaml b/.github/workflows/export-wenet-to-onnx.yaml
index 626f477e61..7ef3a54b64 100644
--- a/.github/workflows/export-wenet-to-onnx.yaml
+++ b/.github/workflows/export-wenet-to-onnx.yaml
@@ -49,7 +49,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface
             cd huggingface
             git fetch
             git pull
@@ -98,7 +98,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface
             cd huggingface
             git fetch
             git pull
@@ -147,7 +147,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface
             cd huggingface
             git fetch
             git pull
@@ -196,7 +196,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface
             cd huggingface
             git fetch
             git pull
@@ -245,7 +245,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface
             cd huggingface
             git fetch
             git pull
@@ -295,7 +295,7 @@ jobs:
             export GIT_LFS_SKIP_SMUDGE=1
             export GIT_CLONE_PROTECTION_ACTIVE=false
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/export-wespeaker-to-onnx.yaml b/.github/workflows/export-wespeaker-to-onnx.yaml
index fd167ab211..05694f693a 100644
--- a/.github/workflows/export-wespeaker-to-onnx.yaml
+++ b/.github/workflows/export-wespeaker-to-onnx.yaml
@@ -48,3 +48,28 @@ jobs:
           repo_name: k2-fsa/sherpa-onnx
           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
           tag: speaker-recongition-models
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            d=speaker-embedding-models
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
+            mv -v ./*.onnx ./huggingface
+            cd huggingface
+            git lfs track "*.onnx"
+            git status
+            git add .
+            git status
+            git commit -m "add models"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml
index a50aa99d74..53aebdd3b6 100644
--- a/.github/workflows/export-whisper-to-onnx.yaml
+++ b/.github/workflows/export-whisper-to-onnx.yaml
@@ -145,7 +145,7 @@ jobs:
 
           export GIT_LFS_SKIP_SMUDGE=1
 
-          git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
+          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface
 
           rm -rf huggingface/*
 
diff --git a/.github/workflows/flutter-android.yaml b/.github/workflows/flutter-android.yaml
index 9752a82c6c..c2b1d01db1 100644
--- a/.github/workflows/flutter-android.yaml
+++ b/.github/workflows/flutter-android.yaml
@@ -214,7 +214,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/flutter-linux.yaml b/.github/workflows/flutter-linux.yaml
index b6b1fb9c84..f1fdd5ec71 100644
--- a/.github/workflows/flutter-linux.yaml
+++ b/.github/workflows/flutter-linux.yaml
@@ -261,7 +261,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/flutter-macos.yaml b/.github/workflows/flutter-macos.yaml
index 7c8a38e4c9..e85ff1644f 100644
--- a/.github/workflows/flutter-macos.yaml
+++ b/.github/workflows/flutter-macos.yaml
@@ -101,7 +101,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
@@ -207,7 +207,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/flutter-windows-x64.yaml b/.github/workflows/flutter-windows-x64.yaml
index f4d296b709..59f6a6af92 100644
--- a/.github/workflows/flutter-windows-x64.yaml
+++ b/.github/workflows/flutter-windows-x64.yaml
@@ -94,7 +94,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
@@ -192,7 +192,7 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
             cd huggingface
             git fetch
             git pull
diff --git a/.github/workflows/hap-vad-asr.yaml b/.github/workflows/hap-vad-asr.yaml
new file mode 100644
index 0000000000..9e64a9ab16
--- /dev/null
+++ b/.github/workflows/hap-vad-asr.yaml
@@ -0,0 +1,173 @@
+name: hap-vad-asr
+
+on:
+  push:
+    branches:
+      - hap
+      - hap-ci
+
+  workflow_dispatch:
+
+concurrency:
+  group: hap-vad-asr-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: write
+
+jobs:
+  hap_vad_asr:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    runs-on: ${{ matrix.os }}
+    name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        total: ["10"]
+        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # https://github.com/actions/setup-java
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: '17' # it requires jdk 17 to sigh the hap
+
+      - name: Show java version
+        shell: bash
+        run: |
+          which java
+          java --version
+
+      - name: cache-toolchain
+        id: cache-toolchain-ohos
+        uses: actions/cache@v4
+        with:
+          path: command-line-tools
+          key: commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Download toolchain
+        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+          unzip commandline-tools-linux-x64-5.0.5.200.zip
+          rm commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Set environment variable
+        shell: bash
+        run: |
+          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
+          which cmake
+
+          cmake --version
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          python3 -m pip install --upgrade pip jinja2
+
+      - name: Generate build script
+        shell: bash
+        run: |
+          cd scripts/hap
+
+          total=${{ matrix.total }}
+          index=${{ matrix.index }}
+
+          ./generate-vad-asr-hap-script.py --total $total --index $index
+          ls -lh
+
+          chmod +x build-hap-vad-asr.sh
+          mv -v ./build-hap-vad-asr.sh ../..
+
+      - name: Generate secrets
+        shell: bash
+        run: |
+          echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer
+          shasum -a 256 /tmp/sherpa_onnx.cer
+          ls -lh /tmp/sherpa_onnx.cer
+
+          # macos
+          # base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64
+          #
+          # linux
+          # base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64
+          #
+          # cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b
+          #
+          echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}"   | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b
+          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64
+          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12
+
+          ls -l /tmp/sherpa_onnx_profileRelease.p7b
+          ls -l /tmp/sherpa_onnx_ohos_key.p12
+
+          ls -lh ./sherpa_onnx_ohos_key.p12.base64
+          shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64
+          wc ./sherpa_onnx_ohos_key.p12.base64
+          rm ./sherpa_onnx_ohos_key.p12.base64
+
+          shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b
+          shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12
+
+      - name: build HAP
+        env:
+          HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }}
+          HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }}
+          HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }}
+        shell: bash
+        run: |
+          export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools
+          ./build-hap-vad-asr.sh
+
+          # remove secrets
+          rm /tmp/sherpa_onnx.cer
+          rm /tmp/sherpa_onnx_profileRelease.p7b
+          rm /tmp/sherpa_onnx_ohos_key.p12
+
+      - name: Display HAPs
+        shell: bash
+        run: |
+          ls -lh ./haps/
+          du -h -d1 .
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
+            cd huggingface
+            du -h -d1 .
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            d=hap/vad-asr/$SHERPA_ONNX_VERSION
+            mkdir -p $d
+            cp -v ../haps/*.hap $d/
+            git status
+            git lfs track "*.hap"
+            git add .
+            git commit -m "add more HAPs"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main
diff --git a/.github/workflows/har.yaml b/.github/workflows/har.yaml
new file mode 100644
index 0000000000..7b5b2e5141
--- /dev/null
+++ b/.github/workflows/har.yaml
@@ -0,0 +1,214 @@
+name: har
+
+on:
+  push:
+    branches:
+      - master
+      # - ohos-har
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+  workflow_dispatch:
+
+concurrency:
+  group: har-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  har:
+    name: Har
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: har-linux
+
+      - name: cache-toolchain
+        id: cache-toolchain-ohos
+        uses: actions/cache@v4
+        with:
+          path: command-line-tools
+          key: commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Download toolchain
+        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+          unzip commandline-tools-linux-x64-5.0.5.200.zip
+          rm commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Set environment variable
+        shell: bash
+        run: |
+          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
+          which cmake
+
+          cmake --version
+
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+
+          echo "===="
+          cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+          echo "===="
+
+          # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin"  >> "$GITHUB_PATH"
+
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
+          echo "--"
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*
+
+          cat $GITHUB_PATH
+
+          # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
+          export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
+          echo "path: $PATH"
+
+          which aarch64-unknown-linux-ohos-clang++ || true
+          which aarch64-unknown-linux-ohos-clang || true
+
+          aarch64-unknown-linux-ohos-clang++ --version || true
+          aarch64-unknown-linux-ohos-clang --version || true
+
+          which armv7-unknown-linux-ohos-clang++
+          which armv7-unknown-linux-ohos-clang
+
+          armv7-unknown-linux-ohos-clang++ --version
+          armv7-unknown-linux-ohos-clang --version
+
+          which x86_64-unknown-linux-ohos-clang++
+          which x86_64-unknown-linux-ohos-clang
+
+          x86_64-unknown-linux-ohos-clang++ --version
+          x86_64-unknown-linux-ohos-clang --version
+
+      - name: Install tree
+        shell: bash
+        run: |
+          sudo apt-get update -q
+          sudo apt-get install -y -q tree
+
+      - name: Build libraries
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"
+
+          ./build-ohos-arm64-v8a.sh
+          ./build-ohos-x86-64.sh
+
+      - name: Build Har
+        shell: bash
+        run: |
+          export PATH="$GITHUB_WORKSPACE/command-line-tools/bin:$PATH"
+
+          which hvigorw
+
+          pushd harmony-os/SherpaOnnxHar
+
+          cp -fv ../../LICENSE ./sherpa_onnx
+          cp -fv ../../CHANGELOG.md ./sherpa_onnx
+
+          hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon
+          ls -lh ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har
+          cp -v ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har ../../
+
+          popd
+
+          ls -lh *.har
+
+      - name: View Har
+        shell: bash
+        run: |
+          file sherpa_onnx.har
+          tar xvf sherpa_onnx.har
+
+          cd package
+          ls -lh
+
+          ls -lh libs
+          echo "---libs/x86_64---"
+          ls -lh libs/x86_64
+
+          echo "---libs/arm64-v8a---"
+          ls -lh libs/arm64-v8a
+
+          echo "---src/main/ets/components---"
+          ls -lh src/main/ets/components/
+
+          echo "---src/main/cpp/types/libsherpa_onnx/---"
+          ls -lh src/main/cpp/types/libsherpa_onnx/
+
+          tree .
+
+      - name: Collect result
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+          mv sherpa_onnx.har sherpa_onnx-$SHERPA_ONNX_VERSION.har
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-har
+          path: ./sherpa_onnx*.har
+
+      - name: Release jar
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.har
+          # repo_name: k2-fsa/sherpa-onnx
+          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          # tag: v1.10.32
+
+      - name: Publish to huggingface
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
+            cd huggingface
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            d=har
+            mkdir -p $d
+            cp -v ../*.har $d/
+            git status
+            git lfs track "*.har"
+            git add .
+            git commit -m "add more hars"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main
diff --git a/.github/workflows/harmony-os.yaml b/.github/workflows/harmony-os.yaml
new file mode 100644
index 0000000000..e1a2ae1a2e
--- /dev/null
+++ b/.github/workflows/harmony-os.yaml
@@ -0,0 +1,159 @@
+name: harmony-os
+
+on:
+  push:
+    branches:
+      - master
+      - ohos
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+  workflow_dispatch:
+
+concurrency:
+  group: harmony-os-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  harmony_os:
+    name: Harmony OS ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        arch: [arm64-v8a, armeabi-v7a, x86_64]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ohos-${{ matrix.arch }}
+
+      - name: cache-toolchain
+        id: cache-toolchain-ohos
+        uses: actions/cache@v4
+        with:
+          path: command-line-tools
+          key: commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Download toolchain
+        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+          unzip commandline-tools-linux-x64-5.0.5.200.zip
+          rm commandline-tools-linux-x64-5.0.5.200.zip
+
+      - name: Set environment variable
+        shell: bash
+        run: |
+          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
+          which cmake
+
+          cmake --version
+
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+
+          echo "===="
+          cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
+          echo "===="
+
+          # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin"  >> "$GITHUB_PATH"
+
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
+          echo "--"
+          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*
+
+          cat $GITHUB_PATH
+
+          # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
+          export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
+          echo "path: $PATH"
+
+          which aarch64-unknown-linux-ohos-clang++ || true
+          which aarch64-unknown-linux-ohos-clang || true
+
+          aarch64-unknown-linux-ohos-clang++ --version || true
+          aarch64-unknown-linux-ohos-clang --version || true
+
+          which armv7-unknown-linux-ohos-clang++
+          which armv7-unknown-linux-ohos-clang
+
+          armv7-unknown-linux-ohos-clang++ --version
+          armv7-unknown-linux-ohos-clang --version
+
+          which x86_64-unknown-linux-ohos-clang++
+          which x86_64-unknown-linux-ohos-clang
+
+          x86_64-unknown-linux-ohos-clang++ --version
+          x86_64-unknown-linux-ohos-clang --version
+
+      - name: Build ${{ matrix.arch }}
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          arch=${{ matrix.arch }}
+
+          echo "arch: $arch"
+
+          export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"
+
+          if [[ $arch == arm64-v8a ]]; then
+            ./build-ohos-arm64-v8a.sh
+          elif [[ $arch == armeabi-v7a ]]; then
+            ./build-ohos-armeabi-v7a.sh
+          elif [[ $arch == x86_64 ]]; then
+            ./build-ohos-x86-64.sh
+          else
+            echo "Unknown arch $arch"
+          fi
+
+      - name: Collect result for ${{ matrix.arch }}
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"
+
+          arch=${{ matrix.arch }}
+          d=sherpa-onnx-$SHERPA_ONNX_VERSION-ohos-$arch
+          if [[ $arch == x86_64 ]]; then
+            cd ./build-ohos-x86-64
+          else
+            cd ./build-ohos-$arch
+          fi
+
+          mv install $d
+          tar cjfv $d.tar.bz2 $d
+
+          ls -lh $d/lib
+
+
+          file $d/lib/*
+
+          readelf -d $d/lib/libsherpa-onnx-c-api.so
+
+          mv $d.tar.bz2 ../
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-ohos-${{ matrix.arch }}
+          path: ./*.tar.bz2
+
+      - name: Release jar
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+          # repo_name: k2-fsa/sherpa-onnx
+          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          # tag: v1.10.23
diff --git a/.github/workflows/jni.yaml b/.github/workflows/jni.yaml
index a0f7693937..3bce5cdcd6 100644
--- a/.github/workflows/jni.yaml
+++ b/.github/workflows/jni.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/jni.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'kotlin-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -16,7 +15,6 @@ on:
       - master
     paths:
       - '.github/workflows/jni.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'kotlin-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -75,3 +73,8 @@ jobs:
 
           cd ./kotlin-api-examples
           ./run.sh
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: tts-files-${{ matrix.os }}
+          path: kotlin-api-examples/test-*.wav
diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml
index 11df536449..d28b7cba45 100644
--- a/.github/workflows/lazarus.yaml
+++ b/.github/workflows/lazarus.yaml
@@ -7,7 +7,6 @@ on:
       - lazarus
     paths:
       - '.github/workflows/lazarus.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'lazarus-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/lazarus.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'lazarus-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -43,7 +41,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macos-latest, macos-13, windows-latest]
+        os: [ubuntu-22.04, macos-latest, macos-13, windows-latest]
 
     steps:
       - uses: actions/checkout@v4
@@ -56,10 +54,10 @@ jobs:
           key: ${{ matrix.os }}
 
       # See https://github.com/gcarreno/setup-lazarus
-      - uses: gcarreno/setup-lazarus@v3
+      - uses: gcarreno/setup-lazarus@v3.3.1
         with:
           lazarus-version: "stable"
-          with-cache: true
+          with-cache: false
 
       - name: Lazarus info
         shell: bash
@@ -79,14 +77,14 @@ jobs:
           uname -a
 
       - name: Install patchelf for ubuntu
-        if: matrix.os == 'ubuntu-20.04'
+        if: matrix.os == 'ubuntu-22.04'
         shell: bash
         run: |
           sudo apt-get update -q
           sudo apt-get install -q -y patchelf
 
       - name: Show Patchelf version (ubuntu)
-        if: matrix.os == 'ubuntu-20.04'
+        if: matrix.os == 'ubuntu-22.04'
         shell: bash
         run: |
           patchelf --version
@@ -104,7 +102,7 @@ jobs:
           cd build
           os=${{ matrix.os }}
 
-          if [[ $os == 'windows-latest' || $os == 'ubuntu-20.04' ]]; then
+          if [[ $os == 'windows-latest' || $os == 'ubuntu-22.04' ]]; then
             BUILD_SHARED_LIBS=ON
           else
             BUILD_SHARED_LIBS=OFF
@@ -139,7 +137,7 @@ jobs:
             lazbuild --verbose --build-mode=Release --widgetset=cocoa ./generate_subtitles.lpi
           elif [[ $os == macos-latest ]]; then
             lazbuild --verbose --build-mode=Release --widgetset=cocoa --cpu=aarch64 ./generate_subtitles.lpi
-          elif [[ $os == 'ubuntu-20.04' ]]; then
+          elif [[ $os == 'ubuntu-22.04' ]]; then
             lazbuild --verbose --build-mode=Release-Linux ./generate_subtitles.lpi
           else
             lazbuild --verbose --build-mode=Release ./generate_subtitles.lpi
@@ -152,7 +150,7 @@ jobs:
           ls -lh
 
       - name: Collect generating subtitles (Ubuntu)
-        if: matrix.os == 'ubuntu-20.04'
+        if: matrix.os == 'ubuntu-22.04'
         shell: bash
         run: |
           SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
@@ -223,7 +221,7 @@ jobs:
           ls -lh /tmp/macos-*
 
       - uses: actions/upload-artifact@v4
-        if: matrix.os == 'ubuntu-20.04'
+        if: matrix.os == 'ubuntu-22.04'
         with:
           name: linux-x64
           path: /tmp/linux-x64
@@ -355,8 +353,9 @@ jobs:
             SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
             echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
 
-            git clone https://huggingface.co/csukuangfj/sherpa-onnx-bin huggingface
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin huggingface
             cd huggingface
+            git remote set-url origin https://csukuangfj:$HF_TOKEN@huggingface.co/sherpa-onnx-bin
             git fetch
             git pull
             git merge -m "merge remote" --ff origin main
diff --git a/.github/workflows/linux-gpu.yaml b/.github/workflows/linux-gpu.yaml
index 2a9d0529d7..c1a97aa730 100644
--- a/.github/workflows/linux-gpu.yaml
+++ b/.github/workflows/linux-gpu.yaml
@@ -14,7 +14,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -31,7 +30,6 @@ on:
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/linux-jni-aarch64.yaml b/.github/workflows/linux-jni-aarch64.yaml
new file mode 100644
index 0000000000..19d1e09cf5
--- /dev/null
+++ b/.github/workflows/linux-jni-aarch64.yaml
@@ -0,0 +1,176 @@
+name: linux-jni-aarch64
+
+on:
+  push:
+    branches:
+      - jni
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+  workflow_dispatch:
+
+concurrency:
+  group: linux-jni-aarch64-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  linux-jni-aarch64:
+    name: linux jni aarch64
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        # java-version: ['8', '11', '16', '17', '21']
+        java-version: ['21']
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin' # See 'Supported distributions' for available options
+          java-version: ${{ matrix.java-version }}
+
+      - name: Set up QEMU
+        if: steps.cache-build-result.outputs.cache-hit != 'true'
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: all
+
+      - name: Display PWD
+        shell: bash
+        run: |
+          echo "pwd: $PWD"
+          ls -lh
+          du -h -d1 .
+
+      - name: Build sherpa-onnx
+        if: matrix.java-version == '21'
+        uses: addnab/docker-run-action@v3
+        with:
+            image: quay.io/pypa/manylinux2014_aarch64
+            options: |
+              --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx
+            shell: bash
+            run: |
+              uname -a
+              gcc --version
+              cmake --version
+              cat /etc/*release
+              id
+              pwd
+
+              yum install -y java-11-openjdk-devel
+              java -version
+              which java
+              ls -lh $(which java)
+              ls -lrt /etc/alternatives/java
+
+              export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-2.el7_9.aarch64
+              echo "JAVA_HOME: $JAVA_HOME"
+              find $JAVA_HOME -name jni.h
+
+              cd /home/runner/work/sherpa-onnx/sherpa-onnx
+
+              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
+              pushd alsa-lib
+              ./gitcompile
+              popd
+
+              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
+              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
+
+              mkdir build
+              cd build
+
+              cmake \
+                -D SHERPA_ONNX_ENABLE_TTS=ON \
+                -D CMAKE_BUILD_TYPE=Release \
+                -D BUILD_SHARED_LIBS=ON \
+                -D CMAKE_INSTALL_PREFIX=./install \
+                -D SHERPA_ONNX_ENABLE_BINARY=OFF \
+                -D SHERPA_ONNX_ENABLE_JNI=ON \
+                ..
+
+              make -j2
+              make install
+
+              ls -lh lib
+              rm -rf ./install/lib/pkgconfig
+              rm -rf ./install/lib/share
+              rm -rf ./install/lib/cargs.h
+              rm -rf ./install/include/cargs.h
+              rm -rf ./install/lib/libcargs.so
+              rm -rf ./install/lib/libsherpa-onnx-c-api.so
+
+              echo "----"
+              ls -lh install/lib
+
+              echo "----"
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.java-version == '21'
+        with:
+          name: release-jni-linux-${{ matrix.java-version }}
+          path: build/install/*
+
+      - name: Copy files
+        if: matrix.java-version == '21'
+        shell: bash
+        run: |
+          du -h -d1 .
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-jni
+          mkdir $dst
+
+          cp -a build/install/lib $dst/
+          cp -a build/install/include $dst/
+
+          tree $dst
+
+          tar cjvf ${dst}.tar.bz2 $dst
+          du -h -d1 .
+
+      - name: Publish to huggingface
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && matrix.java-version == '21'
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
+
+            cd huggingface
+            mkdir -p jni
+
+            cp -v ../sherpa-onnx-*.tar.bz2 ./jni
+            cp -v ../*.jar ./jni
+
+            git status
+            git lfs track "*.bz2"
+
+            git add .
+
+            git commit -m "add more files"
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-libs main
+
+      - name: Release pre-compiled binaries and libs for linux aarch64
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.java-version == '21'
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: sherpa-onnx-*.tar.bz2
+
diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml
index 0e1eca0990..ea3bd2b4a5 100644
--- a/.github/workflows/linux.yaml
+++ b/.github/workflows/linux.yaml
@@ -18,7 +18,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -38,7 +40,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -143,14 +147,34 @@ jobs:
           name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
           path: install/*
 
-      - name: Test offline transducer
+      - name: Test offline TTS
+        if: matrix.with_tts == 'ON'
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline-tts
+
+          .github/scripts/test-offline-tts.sh
+          du -h -d1 .
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.with_tts == 'ON'
+        with:
+          name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
+          path: tts
+
+      - name: Test offline Moonshine
+        if: matrix.build_type != 'Debug'
         shell: bash
         run: |
           du -h -d1 .
           export PATH=$PWD/build/bin:$PATH
           export EXE=sherpa-onnx-offline
 
-          .github/scripts/test-offline-transducer.sh
+          readelf -d build/bin/sherpa-onnx-offline
+
+          .github/scripts/test-offline-moonshine.sh
           du -h -d1 .
 
       - name: Test offline CTC
@@ -163,6 +187,37 @@ jobs:
           .github/scripts/test-offline-ctc.sh
           du -h -d1 .
 
+      - name: Test C++ API
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+          export CXX_WHISPER_EXE=whisper-cxx-api
+          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
+
+          .github/scripts/test-cxx-api.sh
+          du -h -d1 .
+
+      - name: Test offline speaker diarization
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline-speaker-diarization
+
+          .github/scripts/test-speaker-diarization.sh
+
+      - name: Test offline transducer
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline
+
+          .github/scripts/test-offline-transducer.sh
+          du -h -d1 .
+
       - name: Test online punctuation
         shell: bash
         run: |
@@ -269,16 +324,7 @@ jobs:
           .github/scripts/test-offline-whisper.sh
           du -h -d1 .
 
-      - name: Test offline TTS
-        if: matrix.with_tts == 'ON'
-        shell: bash
-        run: |
-          du -h -d1 .
-          export PATH=$PWD/build/bin:$PATH
-          export EXE=sherpa-onnx-offline-tts
 
-          .github/scripts/test-offline-tts.sh
-          du -h -d1 .
 
       - name: Test online paraformer
         shell: bash
@@ -327,8 +373,4 @@ jobs:
           overwrite: true
           file: sherpa-onnx-*.tar.bz2
 
-      - uses: actions/upload-artifact@v4
-        with:
-          name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
-          path: tts
 
diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml
index 084531e4a6..813b8fd0eb 100644
--- a/.github/workflows/macos.yaml
+++ b/.github/workflows/macos.yaml
@@ -18,7 +18,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -37,7 +39,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -115,6 +119,45 @@ jobs:
           otool -L build/bin/sherpa-onnx
           otool -l build/bin/sherpa-onnx
 
+      - name: Test offline TTS
+        if: matrix.with_tts == 'ON'
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline-tts
+
+          .github/scripts/test-offline-tts.sh
+
+      - name: Test offline Moonshine
+        if: matrix.build_type != 'Debug'
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline
+
+          .github/scripts/test-offline-moonshine.sh
+
+      - name: Test C++ API
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+          export CXX_WHISPER_EXE=whisper-cxx-api
+          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api
+
+          .github/scripts/test-cxx-api.sh
+          du -h -d1 .
+
+      - name: Test offline speaker diarization
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline-speaker-diarization
+
+          .github/scripts/test-speaker-diarization.sh
+
       - name: Test offline transducer
         shell: bash
         run: |
@@ -190,15 +233,6 @@ jobs:
 
           .github/scripts/test-kws.sh
 
-      - name: Test offline TTS
-        if: matrix.with_tts == 'ON'
-        shell: bash
-        run: |
-          export PATH=$PWD/build/bin:$PATH
-          export EXE=sherpa-onnx-offline-tts
-
-          .github/scripts/test-offline-tts.sh
-
       - name: Test online paraformer
         shell: bash
         run: |
@@ -216,8 +250,6 @@ jobs:
 
           .github/scripts/test-offline-whisper.sh
 
-
-
       - name: Test online transducer
         shell: bash
         run: |
@@ -235,11 +267,12 @@ jobs:
           .github/scripts/test-online-transducer.sh
 
       - name: Copy files
+        if: matrix.build_type == 'Release'
         shell: bash
         run: |
           SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
 
-          if [[ ${{ matrix.with_tts }} ]]; then
+          if [[ ${{ matrix.with_tts }} == ON ]]; then
             dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}
           else
             dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts
@@ -256,7 +289,7 @@ jobs:
           tar cjvf ${dst}.tar.bz2 $dst
 
       - name: Release pre-compiled binaries and libs for macOS
-        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        if: matrix.build_type == 'Release' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
         uses: svenstaro/upload-release-action@v2
         with:
           file_glob: true
diff --git a/.github/workflows/mfc.yaml b/.github/workflows/mfc.yaml
index e501478a29..1315092c2d 100644
--- a/.github/workflows/mfc.yaml
+++ b/.github/workflows/mfc.yaml
@@ -8,7 +8,6 @@ on:
       - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/mfc.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'mfc-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -18,7 +17,6 @@ on:
       - master
     paths:
       - '.github/workflows/mfc.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'mfc-examples/**'
       - 'sherpa-onnx/csrc/*'
diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml
index 2ed2131847..aed04e284f 100644
--- a/.github/workflows/pascal.yaml
+++ b/.github/workflows/pascal.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/pascal.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'pascal-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -17,7 +16,6 @@ on:
       - master
     paths:
       - '.github/workflows/pascal.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'pascal-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -127,6 +125,21 @@ jobs:
             cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
           fi
 
+      - name:  Run Pascal test (Speaker diarization)
+        shell: bash
+        run: |
+          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
+
+          cd ./pascal-api-examples
+          pushd speaker-diarization
+
+          ./run.sh
+          rm -rfv *.onnx *.wav sherpa-onnx-*
+          ls -lh
+          echo "---"
+
+          popd
+
       - name:  Run Pascal test (TTS)
         shell: bash
         run: |
@@ -137,6 +150,31 @@ jobs:
 
           ./run-piper.sh
           rm -rf vits-piper-*
+          rm piper
+          ls -lh
+          echo "---"
+
+          ./run-kokoro-zh-en.sh
+          rm -rf kokoro-multi-*
+          rm kokoro-zh-en
+          ls -lh
+          echo "---"
+
+          ./run-kokoro-en.sh
+          rm -rf kokoro-en-*
+          rm kokoro-en
+          ls -lh
+          echo "---"
+
+          ./run-matcha-zh.sh
+          rm -rf matcha-icefall-*
+          rm matcha-zh
+          ls -lh
+          echo "---"
+
+          ./run-matcha-en.sh
+          rm -rf matcha-icefall-*
+          rm matcha-en
           ls -lh
           echo "---"
 
@@ -150,6 +188,10 @@ jobs:
           cd ./pascal-api-examples
 
           pushd vad-with-non-streaming-asr
+          time ./run-vad-with-moonshine.sh
+          rm -rf sherpa-onnx-*
+          echo "---"
+
           time ./run-vad-with-whisper.sh
           rm -rf sherpa-onnx-*
           echo "---"
@@ -205,6 +247,10 @@ jobs:
           rm -rf sherpa-onnx-*
           echo "---"
 
+          ./run-moonshine.sh
+          rm -rf sherpa-onnx-*
+          echo "---"
+
           ./run-whisper.sh
           rm -rf sherpa-onnx-*
           echo "---"
diff --git a/.github/workflows/pkg-config.yaml b/.github/workflows/pkg-config.yaml
index 57ed8a21a7..48ef160ba1 100644
--- a/.github/workflows/pkg-config.yaml
+++ b/.github/workflows/pkg-config.yaml
@@ -10,7 +10,6 @@ on:
     paths:
       - '.github/workflows/pkg-config.yaml'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -21,7 +20,6 @@ on:
     paths:
       - '.github/workflows/pkg-config.yaml'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/release-dart-package.yaml b/.github/workflows/release-dart-package.yaml
index f590403fea..cc830e2c2c 100644
--- a/.github/workflows/release-dart-package.yaml
+++ b/.github/workflows/release-dart-package.yaml
@@ -481,11 +481,8 @@ jobs:
       - name: Copy pre-built libs
         shell: bash
         run: |
-          echo "----ios-arm64----"
-          cp -v build-ios-shared/ios-arm64/libsherpa-onnx-c-api.dylib flutter/sherpa_onnx_ios/ios/
-          cp -v build-ios-shared/ios-onnxruntime/onnxruntime.xcframework/ios-arm64/onnxruntime.a flutter/sherpa_onnx_ios/ios/libonnxruntime.a
-
-          ls -lh flutter/sherpa_onnx_ios/ios/libonnxruntime.a
+          echo "----ios arm64 and arm64_x64_simulator----"
+          cp -av build-ios-shared/sherpa_onnx.xcframework flutter/sherpa_onnx_ios/ios/
 
           mv -v flutter/sherpa_onnx_ios /tmp/to_be_published
 
diff --git a/.github/workflows/riscv64-linux.yaml b/.github/workflows/riscv64-linux.yaml
index e3e5e8b198..f81d5cb2ec 100644
--- a/.github/workflows/riscv64-linux.yaml
+++ b/.github/workflows/riscv64-linux.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/riscv64-linux.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/riscv64-linux.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'toolchains/riscv64-linux-gnu.toolchain.cmake'
diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml
index 3e932707cc..ed5901e798 100644
--- a/.github/workflows/run-java-test.yaml
+++ b/.github/workflows/run-java-test.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/run-java-test.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'java-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -17,7 +16,6 @@ on:
       - master
     paths:
       - '.github/workflows/run-java-test.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'java-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -107,6 +105,38 @@ jobs:
             make -j4
             ls -lh lib
 
+      - name:  Run java test (Non-Streaming ASR)
+        shell: bash
+        run: |
+          cd ./java-api-examples
+
+          ./run-non-streaming-decode-file-moonshine.sh
+          rm -rf sherpa-onnx-moonshine-*
+
+          ./run-non-streaming-decode-file-sense-voice.sh
+          rm -rf sherpa-onnx-sense-voice-*
+
+          ./run-inverse-text-normalization-paraformer.sh
+
+          ./run-non-streaming-decode-file-paraformer.sh
+          rm -rf sherpa-onnx-paraformer-zh-*
+
+          ./run-non-streaming-decode-file-transducer.sh
+          rm -rf sherpa-onnx-zipformer-*
+
+          ./run-non-streaming-decode-file-whisper.sh
+          rm -rf sherpa-onnx-whisper-*
+
+          ./run-non-streaming-decode-file-nemo.sh
+          rm -rf sherpa-onnx-nemo-*
+
+      - name:  Run java test (speaker diarization)
+        shell: bash
+        run: |
+          cd ./java-api-examples
+          ./run-offline-speaker-diarization.sh
+          rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
+
       - name:  Run java test (kws)
         shell: bash
         run: |
@@ -199,32 +229,23 @@ jobs:
           ./run-streaming-decode-file-transducer.sh
           rm -rf sherpa-onnx-streaming-*
 
-      - name:  Run java test (Non-Streaming ASR)
+      - name:  Run java test (Non-Streaming TTS)
         shell: bash
         run: |
           cd ./java-api-examples
 
-          ./run-non-streaming-decode-file-sense-voice.sh
-          rm -rf sherpa-onnx-sense-voice-*
-
-          ./run-inverse-text-normalization-paraformer.sh
-
-          ./run-non-streaming-decode-file-paraformer.sh
-          rm -rf sherpa-onnx-paraformer-zh-*
-
-          ./run-non-streaming-decode-file-transducer.sh
-          rm -rf sherpa-onnx-zipformer-*
+          ./run-non-streaming-tts-kokoro-zh-en.sh
+          ./run-non-streaming-tts-kokoro-en.sh
+          ./run-non-streaming-tts-matcha-zh.sh
+          ./run-non-streaming-tts-matcha-en.sh
+          ls -lh
 
-          ./run-non-streaming-decode-file-whisper.sh
-          rm -rf sherpa-onnx-whisper-*
+          rm -rf kokoro-multi-*
+          rm -rf kokoro-en-*
 
-          ./run-non-streaming-decode-file-nemo.sh
-          rm -rf sherpa-onnx-nemo-*
+          rm -rf matcha-icefall-*
+          rm hifigan_v2.onnx
 
-      - name:  Run java test (Non-Streaming TTS)
-        shell: bash
-        run: |
-          cd ./java-api-examples
           ./run-non-streaming-tts-piper-en.sh
           rm -rf vits-piper-*
 
diff --git a/.github/workflows/run-python-test-macos.yaml b/.github/workflows/run-python-test-macos.yaml
index ed51379d2e..c9fafe68a3 100644
--- a/.github/workflows/run-python-test-macos.yaml
+++ b/.github/workflows/run-python-test-macos.yaml
@@ -7,7 +7,6 @@ on:
     paths:
       - '.github/workflows/run-python-test-macos.yaml'
       - '.github/scripts/test-python.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'python-api-examples/**'
@@ -17,7 +16,6 @@ on:
     paths:
       - '.github/workflows/run-python-test-macos.yaml'
       - '.github/scripts/test-python.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'python-api-examples/**'
@@ -54,6 +52,9 @@ jobs:
           - os: macos-latest
             python-version: "3.12"
 
+          - os: macos-latest
+            python-version: "3.13"
+
     steps:
       - uses: actions/checkout@v4
         with:
diff --git a/.github/workflows/run-python-test.yaml b/.github/workflows/run-python-test.yaml
index 80fa86a746..7080420f94 100644
--- a/.github/workflows/run-python-test.yaml
+++ b/.github/workflows/run-python-test.yaml
@@ -7,7 +7,6 @@ on:
     paths:
       - '.github/workflows/run-python-test.yaml'
       - '.github/scripts/test-python.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'python-api-examples/**'
@@ -17,7 +16,6 @@ on:
     paths:
       - '.github/workflows/run-python-test.yaml'
       - '.github/scripts/test-python.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'python-api-examples/**'
@@ -53,6 +51,8 @@ jobs:
             python-version: "3.11"
           - os: ubuntu-22.04
             python-version: "3.12"
+          - os: ubuntu-22.04
+            python-version: "3.13"
 
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/sanitizer.yaml b/.github/workflows/sanitizer.yaml
index 7fce3834a1..7cda968990 100644
--- a/.github/workflows/sanitizer.yaml
+++ b/.github/workflows/sanitizer.yaml
@@ -76,6 +76,15 @@ jobs:
           otool -L build/bin/sherpa-onnx
           otool -l build/bin/sherpa-onnx
 
+      - name: Test C++ API
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
+          export CXX_WHISPER_EXE=whisper-cxx-api
+
+          .github/scripts/test-cxx-api.sh
+
       - name: Test online punctuation
         shell: bash
         run: |
@@ -109,7 +118,6 @@ jobs:
           .github/scripts/test-online-ctc.sh
 
 
-
       - name: Test C API
         shell: bash
         run: |
diff --git a/.github/workflows/speaker-diarization.yaml b/.github/workflows/speaker-diarization.yaml
index 0bd6a575ca..ab2a4f0904 100644
--- a/.github/workflows/speaker-diarization.yaml
+++ b/.github/workflows/speaker-diarization.yaml
@@ -67,7 +67,7 @@ jobs:
           curl -SL -O https://huggingface.co/csukuangfj/pyannote-models/resolve/main/segmentation-3.0/pytorch_model.bin
 
           test_wavs=(
-            0-two-speakers-zh.wav
+            0-four-speakers-zh.wav
             1-two-speakers-en.wav
             2-two-speakers-en.wav
             3-two-speakers-en.wav
diff --git a/.github/workflows/swift.yaml b/.github/workflows/swift.yaml
index 3176c9b313..35bb7ab36d 100644
--- a/.github/workflows/swift.yaml
+++ b/.github/workflows/swift.yaml
@@ -4,10 +4,11 @@ on:
   push:
     branches:
       - master
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - './build-swift-macos.sh'
       - '.github/workflows/swift.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'swift-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -20,7 +21,6 @@ on:
     paths:
       - './build-swift-macos.sh'
       - '.github/workflows/swift.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'swift-api-examples/**'
       - 'sherpa-onnx/csrc/*'
@@ -65,6 +65,30 @@ jobs:
 
           ./build-swift-macos.sh
 
+      - name: Copy files
+        if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-macos-xcframework-static
+          mkdir $dst
+
+          mv -v build-swift-macos/sherpa-onnx.xcframework $dst
+
+          brew install tree
+          tree $dst
+
+          tar cjvf ${dst}.tar.bz2 $dst
+
+      - name: Release pre-compiled binaries and libs for macOS
+        if: matrix.os == 'macos-13' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: sherpa-onnx-*macos-xcframework-static.tar.bz2
+
       - name: test
         shell: bash
         run: |
diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml
index a9b2db5892..d9c863160a 100644
--- a/.github/workflows/test-build-wheel.yaml
+++ b/.github/workflows/test-build-wheel.yaml
@@ -7,7 +7,6 @@ on:
     paths:
       - 'setup.py'
       - '.github/workflows/test-build-wheel.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
@@ -17,7 +16,6 @@ on:
     paths:
       - 'setup.py'
       - '.github/workflows/test-build-wheel.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
@@ -139,7 +137,8 @@ jobs:
           export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
-          export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH
+          export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH
+          export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH
 
           which sherpa-onnx
           sherpa-onnx --help
diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml
index 58d5054902..d9e27e86fe 100644
--- a/.github/workflows/test-dart.yaml
+++ b/.github/workflows/test-dart.yaml
@@ -114,6 +114,7 @@ jobs:
           cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
           cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
           cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
+          cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
 
           cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
 
diff --git a/.github/workflows/test-dot-net-nuget.yaml b/.github/workflows/test-dot-net-nuget.yaml
index d325824414..b89781be56 100644
--- a/.github/workflows/test-dot-net-nuget.yaml
+++ b/.github/workflows/test-dot-net-nuget.yaml
@@ -75,10 +75,10 @@ jobs:
         run: |
           df -h
 
-      - name: Setup .NET 6.0
+      - name: Setup .NET 8.0
         uses: actions/setup-dotnet@v4
         with:
-          dotnet-version: 6.0.x
+          dotnet-version: 8.0.x
 
       - name: Check dotnet
         run: dotnet --info
diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml
index 6e32b155ec..9b46b64d97 100644
--- a/.github/workflows/test-dot-net.yaml
+++ b/.github/workflows/test-dot-net.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-dot-net.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'dotnet-examples/**'
@@ -17,7 +16,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-dot-net.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'dotnet-examples/**'
@@ -47,8 +45,57 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-dotnet-release-shared
+
+      - name: Build sherpa-onnx
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          mkdir build
+          cd build
+          cmake \
+            -DBUILD_SHARED_LIBS=ON \
+            -DCMAKE_INSTALL_PREFIX=./install \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+            -DBUILD_ESPEAK_NG_EXE=OFF \
+            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+            ..
+
+          cmake --build . --target install --config Release
+
+          rm -rf install/share
+          rm -rf install/lib/pkg*
+
+          ls -lh ./install/lib
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.os }}
+          path: ./build/install/lib/
+
+  test-dot-net:
+    runs-on: ${{ matrix.os }}
+    needs: [build-libs]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.8"]
+
+    steps:
+      - name: Check space
+        shell: bash
+        run: |
+          df -h
+
       - name: Free space
-        if: matrix.os == 'ubuntu-latest'
         shell: bash
         run: |
           df -h
@@ -56,7 +103,6 @@ jobs:
           df -h
 
       - name: Free more space
-        if: matrix.os == 'ubuntu-latest'
         shell: bash
         run: |
           # https://github.com/orgs/community/discussions/25678
@@ -68,7 +114,6 @@ jobs:
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
 
       - name: Free Disk Space (Ubuntu)
-        if: matrix.os == 'ubuntu-latest'
         uses: jlumbroso/free-disk-space@main
         with:
           # this might remove tools that are actually needed,
@@ -85,51 +130,10 @@ jobs:
           swap-storage: true
 
       - name: Check space
-        if: matrix.os == 'ubuntu-latest'
         shell: bash
         run: |
           df -h
 
-      - name: ccache
-        uses: hendrikmuhs/ccache-action@v1.2
-        with:
-          key: ${{ matrix.os }}-release-shared
-
-      - name: Build sherpa-onnx
-        shell: bash
-        run: |
-          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
-          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
-          cmake --version
-
-          mkdir build
-          cd build
-          cmake \
-            -DBUILD_SHARED_LIBS=ON \
-            -DCMAKE_INSTALL_PREFIX=./install \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-            -DBUILD_ESPEAK_NG_EXE=OFF \
-            -DSHERPA_ONNX_ENABLE_BINARY=ON \
-            ..
-
-          cmake --build . --target install --config Release
-
-      - uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.os }}
-          path: ./build/install/lib/
-
-  test-dot-net:
-    runs-on: ${{ matrix.os }}
-    needs: [build-libs]
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8"]
-
-    steps:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
@@ -148,13 +152,12 @@ jobs:
         uses: actions/download-artifact@v4
         with:
           name: ubuntu-latest
-          path: /tmp/linux
+          path: /tmp/linux-x64
 
       - name: Setup .NET
         uses: actions/setup-dotnet@v4
         with:
-          dotnet-version: |
-            6.0.x
+          dotnet-version: 8.0.x
 
       - name: Check dotnet
         run: dotnet --info
@@ -162,17 +165,21 @@ jobs:
       - name: Display files
         shell: bash
         run: |
-          echo "----------/tmp/----------"
-          ls -lh /tmp/
+          echo "----------/tmp----------"
+          ls -lh /tmp
 
-          echo "----------/tmp/linux----------"
-          ls -lh /tmp/linux
+          echo "----------/tmp/linux-x64----------"
+          ls -lh /tmp/linux-x64
+          df -h
 
       - name: Build
         shell: bash
         run: |
           cd scripts/dotnet
           ./run.sh
+          df -h
+
+          ls -lh /tmp/packages
 
       - name: Copy files
         shell: bash
@@ -181,9 +188,14 @@ jobs:
 
           ls -lh /tmp
 
+          df -h
+
       - name: Run tests
         shell: bash
         run: |
+          dotnet nuget locals all --clear
+          df -h
+
           .github/scripts/test-dot-net.sh
 
       - uses: actions/upload-artifact@v4
diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml
index 2634e5ca75..f2e4cb1bc3 100644
--- a/.github/workflows/test-go-package.yaml
+++ b/.github/workflows/test-go-package.yaml
@@ -68,6 +68,64 @@ jobs:
         run: |
           gcc --version
 
+      - name: Test Keyword spotting
+        if: matrix.os != 'windows-latest'
+        shell: bash
+        run: |
+          cd go-api-examples/keyword-spotting-from-file/
+          ./run.sh
+
+      - name: Test adding punctuation
+        if: matrix.os != 'windows-latest'
+        shell: bash
+        run: |
+          cd go-api-examples/add-punctuation/
+          ./run.sh
+
+      - name: Test non-streaming speaker diarization
+        if: matrix.os != 'windows-latest'
+        shell: bash
+        run: |
+          cd go-api-examples/non-streaming-speaker-diarization/
+          ./run.sh
+
+      - name: Test non-streaming speaker diarization
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
+        shell: bash
+        run: |
+          cd go-api-examples/non-streaming-speaker-diarization/
+          go mod tidy
+          cat go.mod
+          go build
+
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
+
+          ./run.sh
+
+      - name: Test non-streaming speaker diarization
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
+        shell: bash
+        run: |
+          cd go-api-examples/non-streaming-speaker-diarization/
+
+          go env GOARCH
+          go env -w GOARCH=386
+          go env -w CGO_ENABLED=1
+
+          go mod tidy
+          cat go.mod
+          go build
+
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
+
+          ./run.sh
+
       - name: Test streaming HLG decoding (Linux/macOS)
         if: matrix.os != 'windows-latest'
         shell: bash
@@ -151,6 +209,25 @@ jobs:
           go build
           ls -lh
 
+          echo "Test kokoro zh+en"
+          ./run-kokoro-zh-en.sh
+          rm -rf kokoro-multi-*
+          ls -lh
+
+          echo "Test kokoro en"
+          ./run-kokoro-en.sh
+          rm -rf kokoro-en-*
+          ls -lh
+
+          echo "Test matcha zh"
+          ./run-matcha-zh.sh
+          rm -rf matcha-icefall-*
+
+          echo "Test matcha en"
+          ./run-matcha-en.sh
+          rm -rf matcha-icefall-*
+          ls -lh *.wav
+
           echo "Test vits-ljs"
           ./run-vits-ljs.sh
           rm -rf vits-ljs
@@ -188,6 +265,15 @@ jobs:
           cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
           ls -lh
 
+          echo "Test matcha zh"
+          ./run-matcha-zh.sh
+          rm -rf matcha-icefall-*
+
+          echo "Test matcha en"
+          ./run-matcha-en.sh
+          rm -rf matcha-icefall-*
+          ls -lh *.wav
+
           echo "Test vits-ljs"
           ./run-vits-ljs.sh
           rm -rf vits-ljs
@@ -233,6 +319,15 @@ jobs:
           cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
           ls -lh
 
+          echo "Test matcha zh"
+          ./run-matcha-zh.sh
+          rm -rf matcha-icefall-*
+
+          echo "Test matcha en"
+          ./run-matcha-en.sh
+          rm -rf matcha-icefall-*
+          ls -lh *.wav
+
           echo "Test vits-ljs"
           ./run-vits-ljs.sh
           rm -rf vits-ljs
diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml
index 65c72e1741..8d68076d71 100644
--- a/.github/workflows/test-go.yaml
+++ b/.github/workflows/test-go.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-go.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'go-api-examples/**'
@@ -16,7 +15,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-go.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'go-api-examples/**'
@@ -134,53 +132,15 @@ jobs:
           name: ${{ matrix.os }}-libs
           path: to-upload/
 
-      - name: Test speaker identification
+      - name: Test Keyword spotting
         shell: bash
         run: |
-          cd scripts/go/_internal/speaker-identification/
-          ./run.sh
+          cd scripts/go/_internal/keyword-spotting-from-file/
 
-      - name: Test streaming HLG decoding
-        shell: bash
-        run: |
-          cd scripts/go/_internal/streaming-hlg-decoding/
           ./run.sh
 
-      - name: Test non-streaming TTS
-        shell: bash
-        run: |
-          mkdir tts-waves
-
-          cd scripts/go/_internal/non-streaming-tts/
-          ls -lh
-          go mod tidy
-          cat go.mod
-          go build
           ls -lh
 
-          echo "Test vits-ljs"
-          ./run-vits-ljs.sh
-          rm -rf vits-ljs
-
-          echo "Test vits-vctk"
-          ./run-vits-vctk.sh
-          rm -rf vits-vctk
-
-          echo "Test vits-zh-aishell3"
-          ./run-vits-zh-aishell3.sh
-          rm -rf vits-icefall-zh-aishell3
-
-          echo "Test vits-piper-en_US-lessac-medium"
-          ./run-vits-piper-en_US-lessac-medium.sh
-          rm -rf vits-piper-en_US-lessac-medium
-
-          cp *.wav ../../../../tts-waves/
-
-      - uses: actions/upload-artifact@v4
-        with:
-          name: tts-waves-${{ matrix.os }}
-          path: tts-waves
-
       - name: Test non-streaming decoding files
         shell: bash
         run: |
@@ -191,6 +151,10 @@ jobs:
           go build
           ls -lh
 
+          echo "Test Moonshine"
+          ./run-moonshine.sh
+          rm -rf sherpa-onnx-*
+
           echo "Test SenseVoice ctc"
           ./run-sense-voice-small.sh
           rm -rf sherpa-onnx-sense-*
@@ -224,6 +188,84 @@ jobs:
           ./run-tdnn-yesno.sh
           rm -rf sherpa-onnx-tdnn-yesno
 
+      - name: Test adding punctuation
+        shell: bash
+        run: |
+          cd scripts/go/_internal/add-punctuation/
+          ./run.sh
+
+      - name: Test non-streaming speaker diarization
+        shell: bash
+        run: |
+          cd scripts/go/_internal/non-streaming-speaker-diarization/
+          ./run.sh
+
+      - name: Test speaker identification
+        shell: bash
+        run: |
+          cd scripts/go/_internal/speaker-identification/
+          ./run.sh
+
+      - name: Test streaming HLG decoding
+        shell: bash
+        run: |
+          cd scripts/go/_internal/streaming-hlg-decoding/
+          ./run.sh
+
+      - name: Test non-streaming TTS
+        shell: bash
+        run: |
+          mkdir tts-waves
+
+          cd scripts/go/_internal/non-streaming-tts/
+          ls -lh
+          go mod tidy
+          cat go.mod
+          go build
+          ls -lh
+
+          echo "Test kokoro zh+en"
+          ./run-kokoro-zh-en.sh
+          rm -rf kokoro-multi-*
+          ls -lh
+
+          echo "Test kokoro en"
+          ./run-kokoro-en.sh
+          rm -rf kokoro-en-*
+          ls -lh
+
+          echo "Test matcha zh"
+          ./run-matcha-zh.sh
+          rm -rf matcha-icefall-*
+
+          echo "Test matcha en"
+          ./run-matcha-en.sh
+          rm -rf matcha-icefall-*
+          ls -lh *.wav
+
+          echo "Test vits-ljs"
+          ./run-vits-ljs.sh
+          rm -rf vits-ljs
+
+          echo "Test vits-vctk"
+          ./run-vits-vctk.sh
+          rm -rf vits-vctk
+
+          echo "Test vits-zh-aishell3"
+          ./run-vits-zh-aishell3.sh
+          rm -rf vits-icefall-zh-aishell3
+
+          echo "Test vits-piper-en_US-lessac-medium"
+          ./run-vits-piper-en_US-lessac-medium.sh
+          rm -rf vits-piper-en_US-lessac-medium
+
+          cp *.wav ../../../../tts-waves/
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: tts-waves-${{ matrix.os }}
+          path: tts-waves
+
       - name: Test streaming decoding files
         shell: bash
         run: |
diff --git a/.github/workflows/test-nodejs-addon-api.yaml b/.github/workflows/test-nodejs-addon-api.yaml
index 224fc0f0b6..539025c8c5 100644
--- a/.github/workflows/test-nodejs-addon-api.yaml
+++ b/.github/workflows/test-nodejs-addon-api.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-api.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -17,7 +16,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-api.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
index 07ab8d8781..232f8fe27c 100644
--- a/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
+++ b/.github/workflows/test-nodejs-addon-npm-aarch64.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm-aarch64.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm-aarch64.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
index 98cba9dec1..0a21630dea 100644
--- a/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
+++ b/.github/workflows/test-nodejs-addon-npm-win-x86.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm-win-x86.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -20,7 +19,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm-win-x86.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-addon-npm.yaml b/.github/workflows/test-nodejs-addon-npm.yaml
index 27a962357d..0e2b9f55fd 100644
--- a/.github/workflows/test-nodejs-addon-npm.yaml
+++ b/.github/workflows/test-nodejs-addon-npm.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -19,7 +18,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs-addon-npm.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-nodejs-npm.yaml b/.github/workflows/test-nodejs-npm.yaml
index cc49ac0c40..e1358fd8d2 100644
--- a/.github/workflows/test-nodejs-npm.yaml
+++ b/.github/workflows/test-nodejs-npm.yaml
@@ -26,7 +26,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.8"]
+        python-version: ["3.10"]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml
index 25f3c38fdc..78788ad047 100644
--- a/.github/workflows/test-nodejs.yaml
+++ b/.github/workflows/test-nodejs.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
@@ -18,7 +17,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-nodejs.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/c-api/*'
diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml
index 0f73e3643d..139e09a0e7 100644
--- a/.github/workflows/test-pip-install.yaml
+++ b/.github/workflows/test-pip-install.yaml
@@ -42,6 +42,8 @@ jobs:
             python-version: "3.11"
           - os: ubuntu-22.04
             python-version: "3.12"
+          - os: ubuntu-22.04
+            python-version: "3.13"
 
           - os: macos-12
             python-version: "3.8"
@@ -55,6 +57,8 @@ jobs:
 
           - os: macos-14
             python-version: "3.12"
+          - os: macos-14
+            python-version: "3.13"
 
           - os: windows-2019
             python-version: "3.7"
@@ -69,6 +73,8 @@ jobs:
             python-version: "3.11"
           - os: windows-2022
             python-version: "3.12"
+          - os: windows-2022
+            python-version: "3.13"
 
     steps:
       - uses: actions/checkout@v4
@@ -104,7 +110,8 @@ jobs:
           export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
           export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
-          export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH
+          export PATH=/c/hostedtoolcache/windows/Python/3.12.8/x64/bin:$PATH
+          export PATH=/c/hostedtoolcache/windows/Python/3.13.1/x64/bin:$PATH
 
           sherpa-onnx --help
           sherpa-onnx-keyword-spotter --help
diff --git a/.github/workflows/test-piper-phonemize.yaml b/.github/workflows/test-piper-phonemize.yaml
index 1edbae6d2c..744095411d 100644
--- a/.github/workflows/test-piper-phonemize.yaml
+++ b/.github/workflows/test-piper-phonemize.yaml
@@ -5,7 +5,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-piper-phonemize.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -13,7 +12,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-piper-phonemize.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
diff --git a/.github/workflows/test-python-offline-websocket-server.yaml b/.github/workflows/test-python-offline-websocket-server.yaml
index 52a22ee5ae..4fa98464c5 100644
--- a/.github/workflows/test-python-offline-websocket-server.yaml
+++ b/.github/workflows/test-python-offline-websocket-server.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-python-offline-websocket-server.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
@@ -15,7 +14,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-python-offline-websocket-server.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
diff --git a/.github/workflows/test-python-online-websocket-server.yaml b/.github/workflows/test-python-online-websocket-server.yaml
index badf343a0a..d22e93002a 100644
--- a/.github/workflows/test-python-online-websocket-server.yaml
+++ b/.github/workflows/test-python-online-websocket-server.yaml
@@ -6,7 +6,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-python-online-websocket-server.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
@@ -15,7 +14,6 @@ on:
       - master
     paths:
       - '.github/workflows/test-python-online-websocket-server.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
       - 'sherpa-onnx/python/**'
diff --git a/.github/workflows/wasm-simd-hf-space-de-tts.yaml b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
index cbd3b1fce6..76013291b7 100644
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
index 510a003c7a..d34a182d41 100644
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
@@ -28,7 +28,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-en-tts.yaml b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
index 9c5c1d4469..d67ae88181 100644
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
index dc8bada704..81052cac84 100644
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml
new file mode 100644
index 0000000000..14301f9f06
--- /dev/null
+++ b/.github/workflows/wasm-simd-hf-space-speaker-diarization.yaml
@@ -0,0 +1,167 @@
+name: wasm-simd-hf-space-speaker-diarization
+
+on:
+  push:
+    branches:
+      - wasm
+      - wasm-speaker-diarization
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+  workflow_dispatch:
+
+concurrency:
+  group: wasm-simd-hf-space-speaker-diarization-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  wasm-simd-hf-space-speaker-diarization:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install emsdk
+        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.53
+          actions-cache-folder: 'emsdk-cache'
+
+      - name: View emsdk version
+        shell: bash
+        run: |
+          emcc -v
+          echo "--------------------"
+          emcc --check
+
+      - name: Download model files
+        shell: bash
+        run: |
+          cd wasm/speaker-diarization/assets/
+          ls -lh
+          echo "----------"
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+          tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+          rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+          mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx
+          rm -rf sherpa-onnx-pyannote-segmentation-3-0
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+          mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx
+
+          echo "----------"
+
+          ls -lh
+
+      - name: Build sherpa-onnx for WebAssembly
+        shell: bash
+        run: |
+          ./build-wasm-simd-speaker-diarization.sh
+
+      - name: collect files
+        shell: bash
+        run: |
+          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speaker-diarization
+          mv build-wasm-simd-speaker-diarization/install/bin/wasm/speaker-diarization $dst
+          ls -lh $dst
+          tar cjfv $dst.tar.bz2 ./$dst
+
+      - name: Upload wasm files
+        uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-wasm-simd-speaker-diarization
+          path: ./sherpa-onnx-wasm-simd-*.tar.bz2
+
+      - name: Release
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
+      - name: Publish to ModelScope
+        # if: false
+        env:
+          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+        uses: nick-fields/retry@v2
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf ms
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git ms
+            cd ms
+            rm -fv *.js
+            rm -fv *.data
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
+
+            git status
+            git lfs track "*.data"
+            git lfs track "*.wasm"
+            ls -lh
+
+            git add .
+            git commit -m "update model"
+            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git
+
+      - name: Publish to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v2
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx huggingface
+            ls -lh
+
+            cd huggingface
+            rm -fv *.js
+            rm -fv *.data
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+
+            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .
+
+            git status
+            git lfs track "*.data"
+            git lfs track "*.wasm"
+            ls -lh
+
+            git add .
+            git commit -m "update model"
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx main
diff --git a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
index c093f0fe99..18c1c1d607 100644
--- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
+++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
@@ -37,7 +37,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
index c72e0cef29..02a328a9bd 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
index b76f912b47..1a72be6ab4 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
index 9bdd90ee24..8b7c2029f7 100644
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
@@ -29,7 +29,7 @@ jobs:
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
         with:
-          version: 3.1.51
+          version: 3.1.53
           actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
diff --git a/.github/workflows/windows-arm64.yaml b/.github/workflows/windows-arm64.yaml
index a6d2a96da2..b6ab5bf7e9 100644
--- a/.github/workflows/windows-arm64.yaml
+++ b/.github/workflows/windows-arm64.yaml
@@ -8,7 +8,6 @@ on:
       - 'v[0-9]+.[0-9]+.[0-9]+*'
     paths:
       - '.github/workflows/windows-arm64.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -16,7 +15,6 @@ on:
       - master
     paths:
       - '.github/workflows/windows-arm64.yaml'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
@@ -34,7 +32,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [windows-latest]
-        shared_lib: [ON]
+        shared_lib: [ON, OFF]
         with_tts: [ON, OFF]
 
     steps:
diff --git a/.github/workflows/windows-x64-cuda.yaml b/.github/workflows/windows-x64-cuda.yaml
index fd45704558..0d15af946b 100644
--- a/.github/workflows/windows-x64-cuda.yaml
+++ b/.github/workflows/windows-x64-cuda.yaml
@@ -14,7 +14,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -28,7 +27,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
diff --git a/.github/workflows/windows-x64-debug.yaml b/.github/workflows/windows-x64-debug.yaml
index 09f93fd0d0..7abf022853 100644
--- a/.github/workflows/windows-x64-debug.yaml
+++ b/.github/workflows/windows-x64-debug.yaml
@@ -14,7 +14,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -28,7 +27,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-online-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml
index 2d2811c31e..76dd426238 100644
--- a/.github/workflows/windows-x64.yaml
+++ b/.github/workflows/windows-x64.yaml
@@ -17,7 +17,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -34,7 +36,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
@@ -87,6 +91,32 @@ jobs:
           name: release-windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}
           path: build/install/*
 
+      - name: Test offline Moonshine for windows x64
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export EXE=sherpa-onnx-offline.exe
+
+          .github/scripts/test-offline-moonshine.sh
+
+      - name: Test C++ API
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
+          export CXX_WHISPER_EXE=whisper-cxx-api.exe
+          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
+
+          .github/scripts/test-cxx-api.sh
+
+      - name: Test offline speaker diarization
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export EXE=sherpa-onnx-offline-speaker-diarization.exe
+
+          .github/scripts/test-speaker-diarization.sh
+
       - name: Test online punctuation
         shell: bash
         run: |
diff --git a/.github/workflows/windows-x86-debug.yaml b/.github/workflows/windows-x86-debug.yaml
index f72bf25664..59d9ef3707 100644
--- a/.github/workflows/windows-x86-debug.yaml
+++ b/.github/workflows/windows-x86-debug.yaml
@@ -14,7 +14,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
       - '.github/scripts/test-online-ctc.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -28,7 +27,6 @@ on:
       - '.github/scripts/test-offline-ctc.sh'
       - '.github/scripts/test-offline-tts.sh'
       - '.github/scripts/test-online-ctc.sh'
-      - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml
index 316cef6265..f1498c0c0c 100644
--- a/.github/workflows/windows-x86.yaml
+++ b/.github/workflows/windows-x86.yaml
@@ -17,7 +17,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
   pull_request:
@@ -34,7 +36,9 @@ on:
       - '.github/scripts/test-audio-tagging.sh'
       - '.github/scripts/test-offline-punctuation.sh'
       - '.github/scripts/test-online-punctuation.sh'
-      - 'CMakeLists.txt'
+      - '.github/scripts/test-speaker-diarization.sh'
+      - '.github/scripts/test-c-api.sh'
+      - '.github/scripts/test-cxx-api.sh'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
 
@@ -87,6 +91,32 @@ jobs:
           name: release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }}
           path: build/install/*
 
+      - name: Test offline Moonshine for windows x86
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export EXE=sherpa-onnx-offline.exe
+
+          .github/scripts/test-offline-moonshine.sh
+
+      - name: Test C++ API
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
+          export CXX_WHISPER_EXE=whisper-cxx-api.exe
+          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe
+
+          .github/scripts/test-cxx-api.sh
+
+      - name: Test offline speaker diarization
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export EXE=sherpa-onnx-offline-speaker-diarization.exe
+
+          .github/scripts/test-speaker-diarization.sh
+
       - name: Test online punctuation
         shell: bash
         run: |
diff --git a/.gitignore b/.gitignore
index b0fbfae781..ea356b0652 100644
--- a/.gitignore
+++ b/.gitignore
@@ -120,3 +120,16 @@ vits-melo-tts-zh_en
 sherpa-onnx-online-punct-en-2024-08-06
 *.mp4
 *.mp3
+sherpa-onnx-pyannote-segmentation-3-0
+sherpa-onnx-moonshine-tiny-en-int8
+sherpa-onnx-moonshine-base-en-int8
+harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
+harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
+matcha-icefall-zh-baker
+matcha-icefall-en_US-ljspeech
+kokoro-en-v0_19
+*.pt
+lexicon.txt
+us_gold.json
+us_silver.json
+kokoro-multi-lang-v1_0
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7af4a3f67b..4317d83976 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,233 @@
+## 1.10.42
+
+* Fix publishing wheels (#1746)
+* Update README to include https://github.com/xinhecuican/QSmartAssistant (#1755)
+* Add Kokoro TTS to MFC examples (#1760)
+* Refactor node-addon C++ code. (#1768)
+* Add keyword spotter C API for HarmonyOS (#1769)
+* Add ArkTS API for Keyword spotting. (#1775)
+* Add Flutter example for Kokoro TTS (#1776)
+* Initialize the audio session for iOS ASR example (#1786)
+* Fix: Prepend 0 to tokenization to prevent word skipping for Kokoro. (#1787)
+* Export Kokoro 1.0 to sherpa-onnx (#1788)
+* Add C++ and Python API for Kokoro 1.0 multilingual TTS model (#1795)
+* Add Java and Koltin API for Kokoro TTS 1.0 (#1798)
+* Add Android demo for Kokoro TTS 1.0 (#1799)
+* Add C API for Kokoro TTS 1.0 (#1801)
+* Add CXX API for Kokoro TTS 1.0 (#1802)
+* Add Swift API for Kokoro TTS 1.0 (#1803)
+* Add Go API for Kokoro TTS 1.0 (#1804)
+* Add C# API for Kokoro TTS 1.0 (#1805)
+* Add Dart API for Kokoro TTS 1.0 (#1806)
+* Add Pascal API for Kokoro TTS 1.0 (#1807)
+* Add JavaScript API (node-addon) for Kokoro TTS 1.0 (#1808)
+* Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809)
+* Add Flutter example for Kokoro TTS 1.0 (#1810)
+* Add iOS demo for Kokoro TTS 1.0 (#1812)
+* Add HarmonyOS demo for Kokoro TTS 1.0 (#1813)
+
+## 1.10.41
+
+* Fix UI for Android TTS Engine. (#1735)
+* Add iOS TTS example for MatchaTTS (#1736)
+* Add iOS example for Kokoro TTS (#1737)
+* Fix dither binding in Pybind11 to ensure independence from high_freq in FeatureExtractorConfig (#1739)
+* Fix keyword spotting. (#1689)
+* Update readme to include https://github.com/hfyydd/sherpa-onnx-server (#1741)
+* Reduce vad-moonshine-c-api example code. (#1742)
+* Support Kokoro TTS for HarmonyOS. (#1743)
+
+## 1.10.40
+
+* Fix building wheels (#1703)
+* Export kokoro to sherpa-onnx (#1713)
+* Add C++ and Python API for Kokoro TTS models. (#1715)
+* Add C API for Kokoro TTS models (#1717)
+* Fix style issues (#1718)
+* Add C# API for Kokoro TTS models (#1720)
+* Add Swift API for Kokoro TTS models (#1721)
+* Add Go API for Kokoro TTS models (#1722)
+* Add Dart API for Kokoro TTS models (#1723)
+* Add Pascal API for Kokoro TTS models (#1724)
+* Add JavaScript API (node-addon) for Kokoro TTS models (#1725)
+* Add JavaScript (WebAssembly) API for Kokoro TTS models. (#1726)
+* Add Koltin and Java API for Kokoro TTS models (#1728)
+* Update README.md for KWS to not use git lfs. (#1729)
+
+
+
+
+## 1.10.39
+
+* Fix building without TTS (#1691)
+* Add README for android libs. (#1693)
+* Fix: export-onnx.py(expected all tensors to be on the same device) (#1699)
+* Fix passing strings from C# to C. (#1701)
+
+## 1.10.38
+
+* Fix initializing TTS in Python. (#1664)
+* Remove spaces after punctuations for TTS (#1666)
+* Add constructor fromPtr() for all flutter class with factory ctor. (#1667)
+* Add Kotlin API for Matcha-TTS models. (#1668)
+* Support Matcha-TTS models using espeak-ng (#1672)
+* Add Java API for Matcha-TTS models. (#1673)
+* Avoid adding tail padding for VAD in generate-subtitles.py (#1674)
+* Add C API for MatchaTTS models (#1675)
+* Add CXX API for MatchaTTS models (#1676)
+* Add JavaScript API (node-addon-api) for MatchaTTS models. (#1677)
+* Add HarmonyOS examples for MatchaTTS. (#1678)
+* Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
+* Update workflows to use .NET 8.0 also. (#1681)
+* Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)
+* Add Android demo for MatchaTTS models. (#1683)
+* Add Swift API for MatchaTTS models. (#1684)
+* Add Go API for MatchaTTS models (#1685)
+* Add Pascal API for MatchaTTS models. (#1686)
+* Add Dart API for MatchaTTS models (#1687)
+
+## 1.10.37
+
+* Add new tts models for Latvia and Persian+English (#1644)
+* Add a byte-level BPE Chinese+English non-streaming zipformer model (#1645)
+* Support removing invalid utf-8 sequences. (#1648)
+* Add TeleSpeech CTC to non_streaming_server.py (#1649)
+* Fix building macOS libs (#1656)
+* Add Go API for Keyword spotting (#1662)
+* Add Swift online punctuation (#1661)
+* Add C++ runtime for Matcha-TTS (#1627)
+
+## 1.10.36
+
+* Update AAR version in Android Java demo (#1618)
+* Support linking onnxruntime statically for Android (#1619)
+* Update readme to include Open-LLM-VTuber (#1622)
+* Rename maxNumStences to maxNumSentences (#1625)
+* Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX (Linux arm64 GPU). (#1630)
+* Update readme to include jetson orin nx and nano b01 (#1631)
+* feat: add checksum action (#1632)
+* Support decoding with byte-level BPE (bbpe) models. (#1633)
+* feat: enable c api for android ci (#1635)
+* Update README.md (#1640)
+* SherpaOnnxVadAsr: Offload runSecondPass to background thread for improved real-time audio processing (#1638)
+* Fix GitHub actions. (#1642)
+
+
+## 1.10.35
+
+* Add missing changes about speaker identfication demo for HarmonyOS (#1612)
+* Provide sherpa-onnx.aar for Android (#1615)
+* Use aar in Android Java demo. (#1616)
+
+## 1.10.34
+
+* Fix building node-addon package (#1598)
+* Update doc links for HarmonyOS (#1601)
+* Add on-device real-time ASR demo for HarmonyOS (#1606)
+* Add speaker identification APIs for HarmonyOS (#1607)
+* Add speaker identification demo for HarmonyOS (#1608)
+* Add speaker diarization API for HarmonyOS. (#1609)
+* Add speaker diarization demo for HarmonyOS (#1610)
+
+## 1.10.33
+
+* Add non-streaming ASR support for HarmonyOS. (#1564)
+* Add streaming ASR support for HarmonyOS. (#1565)
+* Fix building for Android (#1568)
+* Publish `sherpa_onnx.har` for HarmonyOS (#1572)
+* Add VAD+ASR demo for HarmonyOS (#1573)
+* Fix publishing har packages for HarmonyOS (#1576)
+* Add CI to build HAPs for HarmonyOS (#1578)
+* Add microphone demo about VAD+ASR for HarmonyOS (#1581)
+* Fix getting microphone permission for HarmonyOS VAD+ASR example (#1582)
+* Add HarmonyOS support for text-to-speech. (#1584)
+* Fix: support both old and new websockets request headers format (#1588)
+* Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)
+
+## 1.10.32
+
+* Support cross-compiling for HarmonyOS (#1553)
+* HarmonyOS support for VAD. (#1561)
+* Fix publishing flutter iOS app to appstore (#1563).
+
+## 1.10.31
+
+* Publish pre-built wheels for Python 3.13 (#1485)
+* Publish pre-built macos xcframework (#1490)
+* Fix reading tokens.txt on Windows. (#1497)
+* Add two-pass ASR Android APKs for Moonshine models. (#1499)
+* Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500)
+* Publish pre-built wheels with CUDA support for Linux aarch64. (#1507)
+* Export the English TTS model from MeloTTS (#1509)
+* Add Lazarus example for Moonshine models. (#1532)
+* Add isolate_tts demo (#1529)
+* Add WebAssembly example for VAD + Moonshine models. (#1535)
+* Add Android APK for streaming Paraformer ASR (#1538)
+* Support static build for windows arm64. (#1539)
+* Use xcframework for Flutter iOS plugin to support iOS simulators.
+
+## 1.10.30
+
+* Fix building node-addon for Windows x86. (#1469)
+* Begin to support https://github.com/usefulsensors/moonshine (#1470)
+* Publish pre-built JNI libs for Linux aarch64 (#1472)
+* Add C++ runtime and Python APIs for Moonshine models (#1473)
+* Add Kotlin and Java API for Moonshine models (#1474)
+* Add C and C++ API for Moonshine models (#1476)
+* Add Swift API for Moonshine models. (#1477)
+* Add Go API examples for adding punctuations to text. (#1478)
+* Add Go API for Moonshine models (#1479)
+* Add JavaScript API for Moonshine models (#1480)
+* Add Dart API for Moonshine models. (#1481)
+* Add Pascal API for Moonshine models (#1482)
+* Add C# API for Moonshine models. (#1483)
+
+## 1.10.29
+
+* Add Go API for offline punctuation models (#1434)
+* Support https://huggingface.co/Revai/reverb-diarization-v1 (#1437)
+* Add more models for speaker diarization (#1440)
+* Add Java API example for hotwords. (#1442)
+* Add java android demo (#1454)
+* Add C++ API for streaming ASR. (#1455)
+* Add C++ API for non-streaming ASR (#1456)
+* Handle NaN embeddings in speaker diarization. (#1461)
+* Add speaker identification with VAD and non-streaming ASR using ALSA (#1463)
+* Support GigaAM CTC models for Russian ASR (#1464)
+* Add GigaAM NeMo transducer model for Russian ASR (#1467)
+
+## 1.10.28
+
+* Fix swift example for generating subtitles. (#1362)
+* Allow more online models to load tokens file from the memory (#1352)
+* Fix CI errors introduced by supporting loading keywords from buffers (#1366)
+* Fix running MeloTTS models on GPU. (#1379)
+* Support Parakeet models from NeMo (#1381)
+* Export Pyannote speaker segmentation models to onnx (#1382)
+* Support Agglomerative clustering. (#1384)
+* Add Python API for clustering (#1385)
+* support whisper turbo (#1390)
+* context_state is not set correctly when previous context is passed after reset (#1393)
+* Speaker diarization example with onnxruntime Python API (#1395)
+* C++ API for speaker diarization (#1396)
+* Python API for speaker diarization. (#1400)
+* C API for speaker diarization (#1402)
+* docs(nodejs-addon-examples): add guide for pnpm user (#1401)
+* Go API for speaker diarization (#1403)
+* Swift API for speaker diarization (#1404)
+* Update readme to include more external projects using sherpa-onnx (#1405)
+* C# API for speaker diarization (#1407)
+* JavaScript API (node-addon) for speaker diarization (#1408)
+* WebAssembly exmaple for speaker diarization (#1411)
+* Handle audio files less than 10s long for speaker diarization. (#1412)
+* JavaScript API with WebAssembly for speaker diarization (#1414)
+* Kotlin API for speaker diarization (#1415)
+* Java API for speaker diarization (#1416)
+* Dart API for speaker diarization (#1418)
+* Pascal API for speaker diarization (#1420)
+* Android JNI support for speaker diarization (#1421)
+* Android demo for speaker diarization (#1423)
+
 ## 1.10.27
 
 * Add non-streaming ONNX models for Russian ASR (#1358)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9084a0216a..ef6d45b18f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,10 +8,9 @@ set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
 project(sherpa-onnx)
 
 # Remember to update
-# ./nodejs-addon-examples
-# ./dart-api-examples/
 # ./CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.27")
+# ./new-release.sh
+set(SHERPA_ONNX_VERSION "1.10.42")
 
 # Disable warning about
 #
@@ -32,6 +31,7 @@ option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" O
 option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
 option(SHERPA_ONNX_ENABLE_DIRECTML "Enable ONNX Runtime DirectML support" OFF)
 option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
+option(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION "Whether to enable WASM for speaker diarization" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
@@ -46,13 +46,18 @@ option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-i
 option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF)
 option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ON)
 
+set(SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION "1.11.0" CACHE STRING "Used only for Linux ARM64 GPU. If you use Jetson nano b01, then please set it to 1.11.0. If you use Jetson Orin NX, then set it to 1.16.0")
+
+
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
 
-set(CMAKE_SKIP_BUILD_RPATH FALSE)
-set(BUILD_RPATH_USE_ORIGIN TRUE)
-set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+if(NOT WIN32)
+  set(CMAKE_SKIP_BUILD_RPATH FALSE)
+  set(BUILD_RPATH_USE_ORIGIN TRUE)
+  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+endif()
 
 if(NOT APPLE)
   set(SHERPA_ONNX_RPATH_ORIGIN "$ORIGIN")
@@ -80,11 +85,6 @@ if(SHERPA_ONNX_ENABLE_PYTHON AND NOT BUILD_SHARED_LIBS)
   set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
 endif()
 
-if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS)
-  message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_JNI is ON")
-  set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
-endif()
-
 if(SHERPA_ONNX_ENABLE_GPU)
   message(WARNING "\
 Compiling for NVIDIA GPU is enabled. Please make sure cudatoolkit
@@ -123,6 +123,11 @@ if(MSVC)
   )
 endif()
 
+if(CMAKE_SYSTEM_NAME STREQUAL OHOS)
+  set(CMAKE_CXX_FLAGS "-Wno-unused-command-line-argument ${CMAKE_CXX_FLAGS}")
+  set(CMAKE_C_FLAGS "-Wno-unused-command-line-argument ${CMAKE_C_FLAGS}")
+endif()
+
 message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
 message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
 message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
@@ -135,6 +140,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
 message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
 message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
+message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
@@ -149,7 +155,7 @@ message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_
 message(STATUS "SHERPA_ONNX_ENABLE_SANITIZER: ${SHERPA_ONNX_ENABLE_SANITIZER}")
 message(STATUS "SHERPA_ONNX_BUILD_C_API_EXAMPLES: ${SHERPA_ONNX_BUILD_C_API_EXAMPLES}")
 
-if(BUILD_SHARED_LIBS)
+if(BUILD_SHARED_LIBS OR SHERPA_ONNX_ENABLE_JNI)
   set(CMAKE_CXX_VISIBILITY_PRESET hidden)
   set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -196,9 +202,19 @@ else()
   add_definitions(-DSHERPA_ONNX_ENABLE_DIRECTML=0)
 endif()
 
+if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
+  if(NOT SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION to ON if you want to build WASM for speaker diarization")
+  endif()
+
+  if(NOT SHERPA_ONNX_ENABLE_WASM)
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speaker diarization")
+  endif()
+endif()
+
 if(SHERPA_ONNX_ENABLE_WASM_TTS)
   if(NOT SHERPA_ONNX_ENABLE_TTS)
-    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build wasm TTS")
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build WASM for TTS")
   endif()
 
   if(NOT SHERPA_ONNX_ENABLE_WASM)
@@ -250,7 +266,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
 
 include(CheckIncludeFileCXX)
 
-if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android)
+if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android AND NOT CMAKE_SYSTEM_NAME STREQUAL OHOS)
   check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
   if(SHERPA_ONNX_HAS_ALSA)
     message(STATUS "With Alsa")
@@ -387,6 +403,7 @@ add_subdirectory(sherpa-onnx)
 if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY AND SHERPA_ONNX_BUILD_C_API_EXAMPLES)
   set(SHERPA_ONNX_PKG_WITH_CARGS "-lcargs")
   add_subdirectory(c-api-examples)
+  add_subdirectory(cxx-api-examples)
 endif()
 
 if(SHERPA_ONNX_ENABLE_WASM)
diff --git a/README.md b/README.md
index 890abe8827..b5fc25115e 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,12 @@
 ### Supported functions
 
-|Speech recognition| Speech synthesis | Speaker verification | Speaker identification |
-|------------------|------------------|----------------------|------------------------|
-|   ✔️              |         ✔️        |          ✔️           |                ✔️       |
+|Speech recognition| Speech synthesis |
+|------------------|------------------|
+|   ✔️              |         ✔️        |
+
+|Speaker identification| Speaker diarization | Speaker verification |
+|----------------------|-------------------- |------------------------|
+|   ✔️                  |         ✔️           |            ✔️           |
 
 | Spoken Language identification | Audio tagging | Voice activity detection |
 |--------------------------------|---------------|--------------------------|
@@ -14,14 +18,13 @@
 
 ### Supported platforms
 
-|Architecture| Android | iOS     | Windows    | macOS | linux |
-|------------|---------|---------|------------|-------|-------|
-|   x64      |  ✔️      |         |   ✔️        | ✔️     |  ✔️    |
-|   x86      |  ✔️      |         |   ✔️        |       |       |
-|   arm64    |  ✔️      | ✔️       |   ✔️        | ✔️     |  ✔️    |
-|   arm32    |  ✔️      |         |            |       |  ✔️    |
-|   riscv64  |         |         |            |       |  ✔️    |
-
+|Architecture| Android | iOS     | Windows    | macOS | linux | HarmonyOS |
+|------------|---------|---------|------------|-------|-------|-----------|
+|   x64      |  ✔️      |         |   ✔️        | ✔️     |  ✔️    |   ✔️       |
+|   x86      |  ✔️      |         |   ✔️        |       |       |           |
+|   arm64    |  ✔️      | ✔️       |   ✔️        | ✔️     |  ✔️    |   ✔️       |
+|   arm32    |  ✔️      |         |            |       |  ✔️    |   ✔️       |
+|   riscv64  |         |         |            |       |  ✔️    |           |
 
 ### Supported programming languages
 
@@ -47,6 +50,7 @@ This repository supports running the following functions **locally**
 
   - Speech-to-text (i.e., ASR); both streaming and non-streaming are supported
   - Text-to-speech (i.e., TTS)
+  - Speaker diarization
   - Speaker identification
   - Speaker verification
   - Spoken language identification
@@ -60,8 +64,11 @@ on the following platforms and operating systems:
   - Linux, macOS, Windows, openKylin
   - Android, WearOS
   - iOS
+  - HarmonyOS
   - NodeJS
   - WebAssembly
+  - [NVIDIA Jetson Orin NX][NVIDIA Jetson Orin NX] (Support running on both CPU and GPU)
+  - [NVIDIA Jetson Nano B01][NVIDIA Jetson Nano B01] (Support running on both CPU and GPU)
   - [Raspberry Pi][Raspberry Pi]
   - [RV1126][RV1126]
   - [LicheePi4A][LicheePi4A]
@@ -79,17 +86,19 @@ with the following APIs
 
 ### Links for Huggingface Spaces
 
-You can visit the following Huggingface spaces to try `sherpa-onnx` without
-installing anything. All you need is a browser.
-
-| Description                                           | URL                                |
-|-------------------------------------------------------|------------------------------------|
-| Speech recognition                                    | [Click me][hf-space-asr]           |
-| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]   |
-| Speech synthesis                                      | [Click me][hf-space-tts]           |
-| Generate subtitles                                    | [Click me][hf-space-subtitle]      |
-| Audio tagging                                         | [Click me][hf-space-audio-tagging] |
-| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]  |
+<details>
+<summary>You can visit the following Huggingface spaces to try sherpa-onnx without
+installing anything. All you need is a browser.</summary>
+
+| Description                                           | URL                                     |
+|-------------------------------------------------------|-----------------------------------------|
+| Speaker diarization                                   | [Click me][hf-space-speaker-diarization]|
+| Speech recognition                                    | [Click me][hf-space-asr]                |
+| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]        |
+| Speech synthesis                                      | [Click me][hf-space-tts]                |
+| Generate subtitles                                    | [Click me][hf-space-subtitle]           |
+| Audio tagging                                         | [Click me][hf-space-audio-tagging]      |
+| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]       |
 
 We also have spaces built using WebAssembly. They are listed below:
 
@@ -102,6 +111,7 @@ We also have spaces built using WebAssembly. They are listed below:
 |Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
 |VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
 |VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
+|VAD + speech recognition (English) with [Moonshine tiny][Moonshine tiny]|[Click me][wasm-hf-vad-asr-en-moonshine-tiny-en]| [地址][wasm-ms-vad-asr-en-moonshine-tiny-en]|
 |VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech]    |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
 |VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech]  |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
 |VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
@@ -111,24 +121,36 @@ We also have spaces built using WebAssembly. They are listed below:
 |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
 |Speech synthesis (English)                                                                  |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
 |Speech synthesis (German)                                                                   |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|
+|Speaker diarization                                                                         |[Click me][wasm-hf-speaker-diarization]|[地址][wasm-ms-speaker-diarization]|
+
+</details>
 
 ### Links for pre-built Android APKs
 
-| Description                            | URL                          | 中国用户                    |
-|----------------------------------------|------------------------------|-----------------------------|
-| Streaming speech recognition           | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]|
-| Text-to-speech                         | [Address][apk-tts]           | [点此][apk-tts-cn]          |
-| Voice activity detection (VAD)         | [Address][apk-vad]           | [点此][apk-vad-cn]          |
-| VAD + non-streaming speech recognition | [Address][apk-vad-asr]       | [点此][apk-vad-asr-cn]      |
-| Two-pass speech recognition            | [Address][apk-2pass]         | [点此][apk-2pass-cn]        |
-| Audio tagging                          | [Address][apk-at]            | [点此][apk-at-cn]           |
-| Audio tagging (WearOS)                 | [Address][apk-at-wearos]     | [点此][apk-at-wearos-cn]    |
-| Speaker identification                 | [Address][apk-sid]           | [点此][apk-sid-cn]          |
-| Spoken language identification         | [Address][apk-slid]          | [点此][apk-slid-cn]         |
-| Keyword spotting                       | [Address][apk-kws]           | [点此][apk-kws-cn]          |
+<details>
+
+<summary>You can find pre-built Android APKs for this repository in the following table</summary>
+
+| Description                            | URL                                | 中国用户                          |
+|----------------------------------------|------------------------------------|-----------------------------------|
+| Speaker diarization                    | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]|
+| Streaming speech recognition           | [Address][apk-streaming-asr]       | [点此][apk-streaming-asr-cn]      |
+| Text-to-speech                         | [Address][apk-tts]                 | [点此][apk-tts-cn]                |
+| Voice activity detection (VAD)         | [Address][apk-vad]                 | [点此][apk-vad-cn]                |
+| VAD + non-streaming speech recognition | [Address][apk-vad-asr]             | [点此][apk-vad-asr-cn]            |
+| Two-pass speech recognition            | [Address][apk-2pass]               | [点此][apk-2pass-cn]              |
+| Audio tagging                          | [Address][apk-at]                  | [点此][apk-at-cn]                 |
+| Audio tagging (WearOS)                 | [Address][apk-at-wearos]           | [点此][apk-at-wearos-cn]          |
+| Speaker identification                 | [Address][apk-sid]                 | [点此][apk-sid-cn]                |
+| Spoken language identification         | [Address][apk-slid]                | [点此][apk-slid-cn]               |
+| Keyword spotting                       | [Address][apk-kws]                 | [点此][apk-kws-cn]                |
+
+</details>
 
 ### Links for pre-built Flutter APPs
 
+<details>
+
 #### Real-time speech recognition
 
 | Description                    | URL                                 | 中国用户                            |
@@ -147,17 +169,24 @@ We also have spaces built using WebAssembly. They are listed below:
 
 > Note: You need to build from source for iOS.
 
+</details>
+
 ### Links for pre-built Lazarus APPs
 
+<details>
+
 #### Generating subtitles
 
 | Description                    | URL                        | 中国用户                   |
 |--------------------------------|----------------------------|----------------------------|
 | Generate subtitles (生成字幕)  | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|
 
+</details>
 
 ### Links for pre-trained models
 
+<details>
+
 | Description                                 | URL                                                                                   |
 |---------------------------------------------|---------------------------------------------------------------------------------------|
 | Speech recognition (speech to text, ASR)    | [Address][asr-models]                                                                 |
@@ -168,6 +197,64 @@ We also have spaces built using WebAssembly. They are listed below:
 | Speaker identification (Speaker ID)         | [Address][sid-models]                                                                 |
 | Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from  [Speech recognition][asr-models]|
 | Punctuation                                 | [Address][punct-models]                                                               |
+| Speaker segmentation                        | [Address][speaker-segmentation-models]                                                |
+
+</details>
+
+#### Some pre-trained ASR models (Streaming)
+
+<details>
+
+Please see
+
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html>
+
+for more models. The following table lists only **SOME** of them.
+
+
+|Name | Supported Languages| Description|
+|-----|-----|----|
+|[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20][sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english)|
+|[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16][sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)|
+|[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23][sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]|Chinese| Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23)|
+|[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17][sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]|English|Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-en-20m-2023-02-17)|
+|[sherpa-onnx-streaming-zipformer-korean-2024-06-16][sherpa-onnx-streaming-zipformer-korean-2024-06-16]|Korean| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)|
+|[sherpa-onnx-streaming-zipformer-fr-2023-04-14][sherpa-onnx-streaming-zipformer-fr-2023-04-14]|French| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)|
+
+</details>
+
+
+#### Some pre-trained ASR models (Non-Streaming)
+
+<details>
+
+Please see
+
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/index.html>
+  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html>
+
+for more models. The following table lists only **SOME** of them.
+
+|Name | Supported Languages| Description|
+|-----|-----|----|
+|[Whisper tiny.en](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2)|English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html)|
+|[Moonshine tiny][Moonshine tiny]|English|See [also](https://github.com/usefulsensors/moonshine)|
+|[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17][sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]|Chinese, Cantonese, English, Korean, Japanese| 支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html)|
+|[sherpa-onnx-paraformer-zh-2024-03-09][sherpa-onnx-paraformer-zh-2024-03-09]|Chinese, English| 也支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2024-03-09-chinese-english)|
+|[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01][sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]|Japanese|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-japanese)|
+|[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24][sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24-russian)|
+|[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24][sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]|Russian| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/russian.html#sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24)|
+|[sherpa-onnx-zipformer-ru-2024-09-18][sherpa-onnx-zipformer-ru-2024-09-18]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ru-2024-09-18-russian)|
+|[sherpa-onnx-zipformer-korean-2024-06-24][sherpa-onnx-zipformer-korean-2024-06-24]|Korean|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-korean-2024-06-24-korean)|
+|[sherpa-onnx-zipformer-thai-2024-06-20][sherpa-onnx-zipformer-thai-2024-06-20]|Thai| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-thai-2024-06-20-thai)|
+|[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04][sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]|Chinese| 支持多种方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html#sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04)|
+
+</details>
 
 ### Useful links
 
@@ -182,6 +269,13 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
 
 ## Projects using sherpa-onnx
 
+### [Open-LLM-VTuber](https://github.com/t41372/Open-LLM-VTuber)
+
+Talk to any LLM with hands-free voice interaction, voice interruption, and Live2D taking
+face running locally across platforms
+
+See also <https://github.com/t41372/Open-LLM-VTuber/pull/50>
+
 ### [voiceapi](https://github.com/ruzhila/voiceapi)
 
 <details>
@@ -191,10 +285,30 @@ for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
 It shows how to use the ASR and TTS Python APIs with FastAPI.
 </details>
 
-### [TMSpeech](https://github.com/jxlpzqc/TMSpeech)
+### [腾讯会议摸鱼工具 TMSpeech](https://github.com/jxlpzqc/TMSpeech)
 
 Uses streaming ASR in C# with graphical user interface.
 
+Video demo in Chinese: [【开源】Windows实时字幕软件（网课/开会必备）](https://www.bilibili.com/video/BV1rX4y1p7Nx)
+
+### [lol互动助手](https://github.com/l1veIn/lol-wom-electron)
+
+It uses the JavaScript API of sherpa-onnx along with [Electron](https://electronjs.org/)
+
+Video demo in Chinese: [爆了！炫神教你开打字挂！真正影响胜率的英雄联盟工具！英雄联盟的最后一块拼图！和游戏中的每个人无障碍沟通！](https://www.bilibili.com/video/BV142tje9E74)
+
+### [Sherpa-ONNX 语音识别服务器](https://github.com/hfyydd/sherpa-onnx-server)
+
+A server based on nodejs providing Restful API for speech recognition.
+
+### [QSmartAssistant](https://github.com/xinhecuican/QSmartAssistant)
+
+一个模块化，全过程可离线，低占用率的对话机器人/智能音箱
+
+It uses QT. Both [ASR](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#asr)
+and [TTS](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#tts)
+are used.
+
 
 [sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs
 [silero-vad]: https://github.com/snakers4/silero-vad
@@ -204,6 +318,7 @@ Uses streaming ASR in C# with graphical user interface.
 [VisionFive 2]: https://www.starfivetech.com/en/site/boards
 [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
 [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
+[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization
 [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
 [Whisper]: https://github.com/openai/whisper
 [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
@@ -227,6 +342,8 @@ Uses streaming ASR in C# with graphical user interface.
 [wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
 [wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
 [wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+[wasm-hf-vad-asr-en-moonshine-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
+[wasm-ms-vad-asr-en-moonshine-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
 [wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
 [wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
 [wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
@@ -248,6 +365,10 @@ Uses streaming ASR in C# with graphical user interface.
 [wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
 [wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
 [wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
+[wasm-hf-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx
+[wasm-ms-speaker-diarization]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx
+[apk-speaker-diarization]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk.html
+[apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html
 [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
 [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
 [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
@@ -290,5 +411,24 @@ Uses streaming ASR in C# with graphical user interface.
 [sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
 [slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
 [punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
+[speaker-segmentation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
 [GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
 [WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
+[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2
+[sherpa-onnx-streaming-zipformer-korean-2024-06-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-korean-2024-06-16.tar.bz2
+[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
+[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
+[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
+[sherpa-onnx-zipformer-ru-2024-09-18]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ru-2024-09-18.tar.bz2
+[sherpa-onnx-zipformer-korean-2024-06-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-korean-2024-06-24.tar.bz2
+[sherpa-onnx-zipformer-thai-2024-06-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-thai-2024-06-20.tar.bz2
+[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24.tar.bz2
+[sherpa-onnx-paraformer-zh-2024-03-09]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2024-03-09.tar.bz2
+[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
+[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
+[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+[sherpa-onnx-streaming-zipformer-fr-2023-04-14]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-fr-2023-04-14.tar.bz2
+[Moonshine tiny]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+[NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9
+[NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/
diff --git a/android/README.md b/android/README.md
index 42b29e08f3..bae3355987 100644
--- a/android/README.md
+++ b/android/README.md
@@ -4,6 +4,8 @@ Please refer to
 https://k2-fsa.github.io/sherpa/onnx/android/index.html
 for usage.
 
+- [SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It is for speaker diarization.
+
 - [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model.
 
 - [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model
diff --git a/android/SherpaOnnxAar/.gitignore b/android/SherpaOnnxAar/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxAar/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxAar/README.md b/android/SherpaOnnxAar/README.md
new file mode 100644
index 0000000000..3238153c2a
--- /dev/null
+++ b/android/SherpaOnnxAar/README.md
@@ -0,0 +1,20 @@
+# Usage of this project
+
+```
+git clone https://github.com/k2-fsa/sherpa-onnx
+cd sherpa-onnx
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.10.42/sherpa-onnx-v1.10.42-android.tar.bz2
+tar xvf sherpa-onnx-v1.10.42-android.tar.bz2
+
+cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/
+cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/
+cp -v jniLibs/x86/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/
+cp -v jniLibs/x86_64/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/
+
+cd android/SherpaOnnxAar
+
+./gradlew :sherpa_onnx:assembleRelease
+ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
+cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.10.42.aar
+```
diff --git a/android/SherpaOnnxAar/build.gradle.kts b/android/SherpaOnnxAar/build.gradle.kts
new file mode 100644
index 0000000000..e3f8a07411
--- /dev/null
+++ b/android/SherpaOnnxAar/build.gradle.kts
@@ -0,0 +1,6 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+    alias(libs.plugins.android.application) apply false
+    alias(libs.plugins.jetbrains.kotlin.android) apply false
+    alias(libs.plugins.android.library) apply false
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/gradle.properties b/android/SherpaOnnxAar/gradle.properties
new file mode 100644
index 0000000000..20e2a01520
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle.properties
@@ -0,0 +1,23 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/gradle/libs.versions.toml b/android/SherpaOnnxAar/gradle/libs.versions.toml
new file mode 100644
index 0000000000..56172d2933
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle/libs.versions.toml
@@ -0,0 +1,23 @@
+[versions]
+agp = "8.4.0"
+kotlin = "1.7.20"
+coreKtx = "1.15.0"
+junit = "4.13.2"
+junitVersion = "1.2.1"
+espressoCore = "3.6.1"
+appcompat = "1.7.0"
+material = "1.12.0"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" }
+material = { group = "com.google.android.material", name = "material", version.ref = "material" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+android-library = { id = "com.android.library", version.ref = "agp" }
+
diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..a8b3563581
--- /dev/null
+++ b/android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Thu Dec 12 14:02:30 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/android/SherpaOnnxAar/gradlew b/android/SherpaOnnxAar/gradlew
new file mode 100755
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxAar/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxAar/gradlew.bat b/android/SherpaOnnxAar/gradlew.bat
new file mode 100644
index 0000000000..ac1b06f938
--- /dev/null
+++ b/android/SherpaOnnxAar/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxAar/settings.gradle.kts b/android/SherpaOnnxAar/settings.gradle.kts
new file mode 100644
index 0000000000..53ee52b547
--- /dev/null
+++ b/android/SherpaOnnxAar/settings.gradle.kts
@@ -0,0 +1,23 @@
+pluginManagement {
+    repositories {
+        google {
+            content {
+                includeGroupByRegex("com\\.android.*")
+                includeGroupByRegex("com\\.google.*")
+                includeGroupByRegex("androidx.*")
+            }
+        }
+        mavenCentral()
+        gradlePluginPortal()
+    }
+}
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+    }
+}
+
+rootProject.name = "SherpaOnnxAar"
+include(":sherpa_onnx")
diff --git a/android/SherpaOnnxAar/sherpa_onnx/.gitignore b/android/SherpaOnnxAar/sherpa_onnx/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts
new file mode 100644
index 0000000000..4803cb8378
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts
@@ -0,0 +1,43 @@
+plugins {
+    alias(libs.plugins.android.library)
+    alias(libs.plugins.jetbrains.kotlin.android)
+}
+
+android {
+    namespace = "com.k2fsa.sherpa.onnx"
+    compileSdk = 34
+
+    defaultConfig {
+        minSdk = 21
+
+        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+        consumerProguardFiles("consumer-rules.pro")
+    }
+
+    buildTypes {
+        release {
+            isMinifyEnabled = false
+            proguardFiles(
+                getDefaultProguardFile("proguard-android-optimize.txt"),
+                "proguard-rules.pro"
+            )
+        }
+    }
+    compileOptions {
+        sourceCompatibility = JavaVersion.VERSION_1_8
+        targetCompatibility = JavaVersion.VERSION_1_8
+    }
+    kotlinOptions {
+        jvmTarget = "1.8"
+    }
+}
+
+dependencies {
+
+    implementation(libs.androidx.core.ktx)
+    implementation(libs.androidx.appcompat)
+    implementation(libs.material)
+    testImplementation(libs.junit)
+    androidTestImplementation(libs.androidx.junit)
+    androidTestImplementation(libs.androidx.espresso.core)
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
new file mode 100644
index 0000000000..db1fbefc31
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
@@ -0,0 +1,24 @@
+package com.k2fsa.sherpa.onnx
+
+import androidx.test.platform.app.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+
+import org.junit.Test
+import org.junit.runner.RunWith
+
+import org.junit.Assert.*
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+@RunWith(AndroidJUnit4::class)
+class ExampleInstrumentedTest {
+    @Test
+    fun useAppContext() {
+        // Context of the app under test.
+        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
+        assertEquals("com.k2fsa.sherpa.onnx.test", appContext.packageName)
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..a5918e68ab
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android">
+
+</manifest>
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt
new file mode 120000
index 0000000000..25c36e3965
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt
new file mode 120000
index 0000000000..952fae878a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt
new file mode 120000
index 0000000000..4392376a1f
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt
new file mode 120000
index 0000000000..1eed71678d
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflinePunctuation.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt
new file mode 120000
index 0000000000..faa3ab4acf
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt
new file mode 120000
index 0000000000..d850dd7fdc
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt
new file mode 120000
index 0000000000..2a3aff864e
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt
new file mode 120000
index 0000000000..5bb19ee10e
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt
new file mode 120000
index 0000000000..d4518b89bf
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt
new file mode 120000
index 0000000000..66441dea73
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Speaker.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..754102447c
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt
new file mode 120000
index 0000000000..de79a7d20a
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
new file mode 120000
index 0000000000..f1392e7712
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
new file mode 120000
index 0000000000..761b158ce9
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
new file mode 120000
index 0000000000..05c8fb2463
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
new file mode 100644
index 0000000000..05dfcd635f
--- /dev/null
+++ b/android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
@@ -0,0 +1,17 @@
+package com.k2fsa.sherpa.onnx
+
+import org.junit.Test
+
+import org.junit.Assert.*
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+class ExampleUnitTest {
+    @Test
+    fun addition_isCorrect() {
+        assertEquals(4, 2 + 2)
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/.gitignore b/android/SherpaOnnxJavaDemo/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxJavaDemo/README.md b/android/SherpaOnnxJavaDemo/README.md
new file mode 100644
index 0000000000..8d7b84dbd5
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/README.md
@@ -0,0 +1,44 @@
+# Introduction
+
+Please run the following commands to download model files before you run this Android demo:
+
+```bash
+# Assume we are inside
+# /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo
+
+cd app/src/main/assets/
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ./
+mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./
+
+rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/*
+
+mv encoder-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv decoder-epoch-99-avg-1.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv joiner-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+mv tokens.txt sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
+```
+
+You should have the following directory structure:
+```
+(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo/app/src/main/assets
+
+(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
+.
+└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+    ├── decoder-epoch-99-avg-1.onnx
+    ├── encoder-epoch-99-avg-1.int8.onnx
+    ├── joiner-epoch-99-avg-1.int8.onnx
+    └── tokens.txt
+
+1 directory, 4 files
+```
+
+Remember to remove unused files to reduce the file size of the final APK.
diff --git a/android/SherpaOnnxJavaDemo/app/.gitignore b/android/SherpaOnnxJavaDemo/app/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/build.gradle b/android/SherpaOnnxJavaDemo/app/build.gradle
new file mode 100644
index 0000000000..d8ccc7a608
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/build.gradle
@@ -0,0 +1,38 @@
+plugins {
+    id 'com.android.application'
+}
+
+android {
+    compileSdk 34
+
+    defaultConfig {
+        applicationId "com.k2fsa.sherpa.onnx"
+        minSdk 28
+        targetSdk 34
+        versionCode 1
+        versionName "1.0"
+
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+    }
+
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    compileOptions {
+        sourceCompatibility JavaVersion.VERSION_1_8
+        targetCompatibility JavaVersion.VERSION_1_8
+    }
+}
+
+dependencies {
+    implementation 'androidx.appcompat:appcompat:1.3.1'
+    implementation 'com.google.android.material:material:1.3.0'
+    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
+    implementation 'pub.devrel:easypermissions:3.0.0'
+    implementation 'androidx.core:core-ktx:1.7.0'
+    // implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
+    implementation 'com.github.k2-fsa:sherpa-onnx:v1.10.42'
+}
diff --git a/android/SherpaOnnxJavaDemo/app/proguard-rules.pro b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..947820249a
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools"
+    package="com.k2fsa.sherpa.onnx">
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
+
+    <application
+        android:name=".Application"
+        android:allowBackup="true"
+        android:dataExtractionRules="@xml/data_extraction_rules"
+        android:fullBackupContent="@xml/backup_rules"
+        android:icon="@mipmap/ic_launcher"
+        android:label="@string/app_name"
+        android:roundIcon="@mipmap/ic_launcher_round"
+        android:supportsRtl="true"
+        android:theme="@style/Theme.SherpaOnnxJavaDemo"
+        tools:targetApi="31">
+        <activity
+            android:name=".MainActivity"
+            android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+        <service
+            android:name=".service.SpeechSherpaRecognitionService"
+            android:exported="false"/>
+    </application>
+
+</manifest>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep b/android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java
new file mode 100644
index 0000000000..bd5f8a86f1
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java
@@ -0,0 +1,18 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.lifecycle.LiveData;
+import androidx.lifecycle.MutableLiveData;
+import androidx.lifecycle.ViewModel;
+
+public class AppViewModel extends ViewModel {
+    private final MutableLiveData<String> speechRecognitionResult = new MutableLiveData<>();
+
+    public LiveData<String> getSpeechRecognitionResult() {
+        return speechRecognitionResult;
+    }
+
+    public void setSpeechRecognitionResult(String result) {
+        speechRecognitionResult.postValue(result);
+    }
+
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java
new file mode 100644
index 0000000000..f8acf35cb9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java
@@ -0,0 +1,39 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.annotation.NonNull;
+import androidx.lifecycle.ViewModelProvider;
+import androidx.lifecycle.ViewModelStore;
+import androidx.lifecycle.ViewModelStoreOwner;
+
+
+public class Application extends android.app.Application implements ViewModelStoreOwner {
+    public static Application sApplication;
+
+
+    private AppViewModel viewModel;
+    private ViewModelStore viewModelStore;
+
+    public static Application getInstance() {
+        return sApplication;
+    }
+
+    @Override
+    public void onCreate() {
+        super.onCreate();
+        sApplication = this;
+        viewModelStore = new ViewModelStore();
+        viewModel = new ViewModelProvider(this).get(AppViewModel.class);
+    }
+
+    @NonNull
+    @Override
+    public ViewModelStore getViewModelStore() {
+        return viewModelStore;
+    }
+
+    public AppViewModel getViewModel() {
+        return viewModel;
+    }
+
+
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java
new file mode 100644
index 0000000000..c465f6c30b
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java
@@ -0,0 +1,52 @@
+package com.k2fsa.sherpa.onnx;
+
+import androidx.appcompat.app.AppCompatActivity;
+import androidx.core.content.ContextCompat;
+import androidx.lifecycle.ViewModelProvider;
+
+import android.Manifest;
+import android.content.Intent;
+import android.os.Bundle;
+import android.util.Log;
+import android.widget.TextView;
+
+import com.k2fsa.sherpa.onnx.service.SpeechSherpaRecognitionService;
+
+import pub.devrel.easypermissions.EasyPermissions;
+
+public class MainActivity extends AppCompatActivity {
+    private AppViewModel appViewModel;
+    private TextView tvText;
+    private static final int RC_AUDIO_PERM = 123;
+
+    @Override
+    protected void onCreate(Bundle savedInstanceState) {
+        super.onCreate(savedInstanceState);
+        setContentView(R.layout.activity_main);
+        tvText = findViewById(R.id.text);
+        requestMicrophonePermission();
+    }
+
+
+    private void startSpeechService() {
+        Intent serviceIntent = new Intent(this, SpeechSherpaRecognitionService.class);
+        ContextCompat.startForegroundService(this, serviceIntent);
+        appViewModel = new ViewModelProvider(Application.getInstance()).get(AppViewModel.class);
+        appViewModel.getSpeechRecognitionResult().observe(this, this::handleSpeechRecognitionResult);
+    }
+
+    private void handleSpeechRecognitionResult(String result) {
+        tvText.setText(result);
+    }
+
+    private void requestMicrophonePermission() {
+        String[] perms = {Manifest.permission.RECORD_AUDIO};
+        if (EasyPermissions.hasPermissions(this, perms)) {
+            startSpeechService();
+        } else {
+            EasyPermissions.requestPermissions(MainActivity.this,
+                    "We need access to your microphone for voice recognition",
+                    RC_AUDIO_PERM, perms);
+        }
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java
new file mode 100644
index 0000000000..02ad4a15d2
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java
@@ -0,0 +1,261 @@
+package com.k2fsa.sherpa.onnx.service;
+
+import android.Manifest;
+import android.annotation.SuppressLint;
+import android.app.Notification;
+import android.app.NotificationChannel;
+import android.app.NotificationManager;
+import android.app.Service;
+import android.content.Intent;
+import android.content.pm.PackageManager;
+import android.content.res.AssetManager;
+import android.media.AudioFormat;
+import android.media.AudioRecord;
+import android.media.MediaRecorder;
+import android.os.Build;
+import android.os.IBinder;
+import android.text.TextUtils;
+import android.util.Log;
+
+import androidx.core.app.ActivityCompat;
+import androidx.core.app.NotificationCompat;
+
+
+import com.k2fsa.sherpa.onnx.AppViewModel;
+import com.k2fsa.sherpa.onnx.Application;
+
+import com.k2fsa.sherpa.onnx.OnlineModelConfig;
+import com.k2fsa.sherpa.onnx.OnlineRecognizer;
+
+import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig;
+import com.k2fsa.sherpa.onnx.OnlineStream;
+import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig;
+import com.k2fsa.sherpa.onnx.R;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import java.util.Objects;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+
+public class SpeechSherpaRecognitionService extends Service {
+
+    private AppViewModel appViewModel;
+    private OnlineRecognizer recognizer;
+    private final int sampleRateInHz = 16000;
+
+    private Thread recordingThread;
+    private boolean isRecording = false;
+    private int audioSource = MediaRecorder.AudioSource.MIC;
+    private int channelConfig = AudioFormat.CHANNEL_IN_MONO;
+    private int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
+    private AudioRecord audioRecord;
+    private int idx = 0;
+    private String lastText = "";
+    private ExecutorService executor;
+
+    @Override
+    public void onCreate() {
+        super.onCreate();
+        startForegroundService();
+        // 获取 ViewModel
+        appViewModel = Application.getInstance().getViewModel();
+        int numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
+
+        if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
+            // TODO: Consider calling
+            //    ActivityCompat#requestPermissions
+            // here to request the missing permissions, and then overriding
+            //   public void onRequestPermissionsResult(int requestCode, String[] permissions,
+            //                                          int[] grantResults)
+            // to handle the case where the user grants the permission. See the documentation
+            // for ActivityCompat#requestPermissions for more details.
+            return;
+        }
+        audioRecord = new AudioRecord(
+                audioSource,
+                sampleRateInHz,
+                channelConfig,
+                audioFormat,
+                numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
+        );
+        executor = Executors.newSingleThreadExecutor();
+        executor.execute(this::initializeSherpa);
+    }
+
+
+    private void initializeSherpa() {
+        Log.d("Current Directory", System.getProperty("user.dir"));
+        String modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
+        initializeSherpaDir(modelDir, modelDir);
+        OnlineTransducerModelConfig onlineTransducerModelConfig = new OnlineTransducerModelConfig();
+        onlineTransducerModelConfig.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx");
+        onlineTransducerModelConfig.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx");
+        onlineTransducerModelConfig.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx");
+
+        OnlineModelConfig onlineModelConfig = new OnlineModelConfig();
+        onlineModelConfig.setTransducer(onlineTransducerModelConfig);
+        onlineModelConfig.setTokens(modelDir + "/tokens.txt");
+        onlineModelConfig.setModelType("zipformer");
+        onlineModelConfig.setDebug(true);
+
+        OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+        config.setModelConfig(onlineModelConfig);
+        recognizer = new OnlineRecognizer(getAssets(), config);
+
+        audioRecord.startRecording();
+        startRecognition();
+    }
+
+    private void startRecognition() {
+        isRecording = true;
+        recordingThread = new Thread(this::processSamples);
+        recordingThread.start();
+    }
+
+    private void processSamples() {
+        OnlineStream stream = recognizer.createStream("");
+        double interval = 0.1;
+        int bufferSize = (int) (interval * sampleRateInHz);
+        short[] buffer = new short[bufferSize];
+
+        while (isRecording) {
+            int ret = audioRecord != null ? audioRecord.read(buffer, 0, buffer.length) : -1;
+            if (ret > 0) {
+                float[] samples = new float[ret];
+                for (int i = 0; i < ret; i++) {
+                    samples[i] = buffer[i] / 32768.0f;
+                }
+                stream.acceptWaveform(samples, sampleRateInHz);
+                while (recognizer.isReady(stream)) {
+                    recognizer.decode(stream);
+                }
+
+                boolean isEndpoint = recognizer.isEndpoint(stream);
+                String text = recognizer.getResult(stream).getText();
+                if (isEndpoint) {
+                    float[] tailPaddings = new float[(int) (0.8 * sampleRateInHz)];
+                    stream.acceptWaveform(tailPaddings, sampleRateInHz);
+                    while (recognizer.isReady(stream)) {
+                        recognizer.decode(stream);
+                    }
+                    text = recognizer.getResult(stream).getText();
+                }
+
+                String textToDisplay = lastText;
+
+                if (!TextUtils.isEmpty(text)) {
+                    textToDisplay = TextUtils.isEmpty(text) ? idx + ": " + text : lastText + "\n" + idx + ": " + text;
+                }
+
+                if (isEndpoint) {
+                    recognizer.reset(stream);
+                    if (!TextUtils.isEmpty(text)) {
+                        lastText = lastText + "\n" + idx + ": " + text;
+                        textToDisplay = lastText;
+                        idx += 1;
+                    }
+                    appViewModel.setSpeechRecognitionResult(textToDisplay);
+                }
+            }
+
+        }
+        stream.release();
+
+    }
+
+
+    @Override
+    public int onStartCommand(Intent intent, int flags, int startId) {
+
+        return START_STICKY;
+    }
+
+    @Override
+    public void onDestroy() {
+        super.onDestroy();
+        audioRecord.stop();
+        audioRecord.release();
+        executor.shutdown();
+        stopForeground(true);
+    }
+
+    @Override
+    public IBinder onBind(Intent intent) {
+        return null;
+    }
+
+
+    @SuppressLint("ForegroundServiceType")
+    private void startForegroundService() {
+        String channelId = createNotificationChannel();
+
+        Notification notification = new NotificationCompat.Builder(this, channelId)
+                .setContentTitle("Foreground Service")
+                .setContentText("Running in the foreground")
+                .setSmallIcon(R.drawable.ic_bg_mic_24)
+                .build();
+
+        startForeground(1, notification);
+    }
+
+    // 创建通知渠道 (针对 Android 8.0 及以上版本)
+    private String createNotificationChannel() {
+        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+            String channelId = "speech_channel";
+            String channelName = "Speech Channel";
+            NotificationChannel channel = new NotificationChannel(channelId, channelName, NotificationManager.IMPORTANCE_LOW);
+            NotificationManager manager = getSystemService(NotificationManager.class);
+            if (manager != null) {
+                manager.createNotificationChannel(channel);
+            }
+            return channelId;
+        } else {
+            return "";
+        }
+    }
+
+    private void initializeSherpaDir(String assetDir, String internalDir) {
+        AssetManager assetManager = getAssets();
+        File outDir = new File(getFilesDir(), internalDir);
+
+        if (!outDir.exists()) {
+            outDir.mkdirs();
+        }
+
+        try {
+            String[] assets = assetManager.list(assetDir);
+            if (assets != null) {
+                for (String asset : assets) {
+                    String assetPath = assetDir.isEmpty() ? asset : assetDir + "/" + asset;
+                    File outFile = new File(outDir, asset);
+                    if (Objects.requireNonNull(assetManager.list(assetPath)).length > 0) {
+                        outFile.mkdirs();
+                        initializeSherpaDir(assetPath, internalDir + "/" + asset); // 递归复制子目录
+                    } else {
+                        InputStream in = assetManager.open(assetPath);
+                        OutputStream out = new FileOutputStream(outFile);
+
+                        byte[] buffer = new byte[1024];
+                        int read;
+                        while ((read = in.read(buffer)) != -1) {
+                            out.write(buffer, 0, read);
+                        }
+
+                        in.close();
+                        out.flush();
+                        out.close();
+                    }
+                }
+            }
+        } catch (IOException e) {
+            Log.e("ModelCopy", "Failed to copy assets", e);
+        }
+    }
+}
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 0000000000..2b068d1146
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:aapt="http://schemas.android.com/aapt"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                android:endX="85.84757"
+                android:endY="92.4963"
+                android:startX="42.9492"
+                android:startY="49.59793"
+                android:type="linear">
+                <item
+                    android:color="#44000000"
+                    android:offset="0.0" />
+                <item
+                    android:color="#00000000"
+                    android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+        android:fillColor="#FFFFFF"
+        android:fillType="nonZero"
+        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000" />
+</vector>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml
new file mode 100644
index 0000000000..5eb92eb316
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml
@@ -0,0 +1,5 @@
+<vector android:height="24dp" android:tint="#000000"
+    android:viewportHeight="24" android:viewportWidth="24"
+    android:width="24dp" xmlns:android="http://schemas.android.com/apk/res/android">
+    <path android:fillColor="@android:color/white" android:pathData="M12,14c1.66,0 2.99,-1.34 2.99,-3L15,5c0,-1.66 -1.34,-3 -3,-3S9,3.34 9,5v6c0,1.66 1.34,3 3,3zM17.3,11c0,3 -2.54,5.1 -5.3,5.1S6.7,14 6.7,11L5,11c0,3.41 2.72,6.23 6,6.72L11,21h2v-3.28c3.28,-0.48 6,-3.3 6,-6.72h-1.7z"/>
+</vector>
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 0000000000..07d5da9cbf
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="utf-8"?>
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path
+        android:fillColor="#3DDC84"
+        android:pathData="M0,0h108v108h-108z" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M9,0L9,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,0L19,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,0L29,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,0L39,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,0L49,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,0L59,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,0L69,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,0L79,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M89,0L89,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M99,0L99,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,9L108,9"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,19L108,19"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,29L108,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,39L108,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,49L108,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,59L108,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,69L108,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,79L108,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,89L108,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,99L108,99"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,29L89,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,39L89,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,49L89,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,59L89,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,69L89,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,79L89,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,19L29,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,19L39,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,19L49,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,19L59,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,19L69,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,19L79,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+</vector>
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml
new file mode 100644
index 0000000000..ae3ea627e9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    tools:context=".MainActivity">
+
+    <TextView
+        android:id="@+id/text"
+        android:layout_width="wrap_content"
+        android:layout_height="wrap_content"
+        android:text="Hello World!"
+
+        app:layout_constraintStart_toStartOf="parent"
+        app:layout_constraintTop_toTopOf="parent" />
+
+</androidx.constraintlayout.widget.ConstraintLayout>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 0000000000..eca70cfe52
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 0000000000..eca70cfe52
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp
new file mode 100644
index 0000000000..c209e78ecd
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..b2dfe3d1ba
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp
new file mode 100644
index 0000000000..4f0f1d64e5
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..62b611da08
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..948a3070fe
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..1b9a6956b3
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..28d4b77f9f
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9287f50836
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..aa7d6427e6
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9126ae37cb
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml
new file mode 100644
index 0000000000..20276125c9
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml
@@ -0,0 +1,16 @@
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.SherpaOnnxJavaDemo" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">@color/purple_200</item>
+        <item name="colorPrimaryVariant">@color/purple_700</item>
+        <item name="colorOnPrimary">@color/black</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_200</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml
new file mode 100644
index 0000000000..f8c6127d32
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="purple_200">#FFBB86FC</color>
+    <color name="purple_500">#FF6200EE</color>
+    <color name="purple_700">#FF3700B3</color>
+    <color name="teal_200">#FF03DAC5</color>
+    <color name="teal_700">#FF018786</color>
+    <color name="black">#FF000000</color>
+    <color name="white">#FFFFFFFF</color>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml
new file mode 100644
index 0000000000..31aa7267dd
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml
@@ -0,0 +1,3 @@
+<resources>
+    <string name="app_name">SherpaOnnxJavaDemo</string>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml
new file mode 100644
index 0000000000..d9f132e856
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml
@@ -0,0 +1,16 @@
+<resources xmlns:tools="http://schemas.android.com/tools">
+    <!-- Base application theme. -->
+    <style name="Theme.SherpaOnnxJavaDemo" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
+        <!-- Primary brand color. -->
+        <item name="colorPrimary">@color/purple_500</item>
+        <item name="colorPrimaryVariant">@color/purple_700</item>
+        <item name="colorOnPrimary">@color/white</item>
+        <!-- Secondary brand color. -->
+        <item name="colorSecondary">@color/teal_200</item>
+        <item name="colorSecondaryVariant">@color/teal_700</item>
+        <item name="colorOnSecondary">@color/black</item>
+        <!-- Status bar color. -->
+        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
+        <!-- Customize your theme here. -->
+    </style>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml
new file mode 100644
index 0000000000..fa0f996d2c
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample backup rules file; uncomment and customize as necessary.
+   See https://developer.android.com/guide/topics/data/autobackup
+   for details.
+   Note: This file is ignored for devices older that API 31
+   See https://developer.android.com/about/versions/12/backup-restore
+-->
+<full-backup-content>
+    <!--
+   <include domain="sharedpref" path="."/>
+   <exclude domain="sharedpref" path="device.xml"/>
+-->
+</full-backup-content>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml
new file mode 100644
index 0000000000..9ee9997b0b
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample data extraction rules file; uncomment and customize as necessary.
+   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
+   for details.
+-->
+<data-extraction-rules>
+    <cloud-backup>
+        <!-- TODO: Use <include> and <exclude> to control what is backed up.
+        <include .../>
+        <exclude .../>
+        -->
+    </cloud-backup>
+    <!--
+    <device-transfer>
+        <include .../>
+        <exclude .../>
+    </device-transfer>
+    -->
+</data-extraction-rules>
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/build.gradle b/android/SherpaOnnxJavaDemo/build.gradle
new file mode 100644
index 0000000000..5ae9a7b016
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/build.gradle
@@ -0,0 +1,9 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+    id 'com.android.application' version '7.2.2' apply false
+    id 'com.android.library' version '7.2.2' apply false
+}
+
+task clean(type: Delete) {
+    delete rootProject.buildDir
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/gradle.properties b/android/SherpaOnnxJavaDemo/gradle.properties
new file mode 100644
index 0000000000..dab7c28bff
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradle.properties
@@ -0,0 +1,21 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. More details, visit
+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app"s APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..489dbeed18
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Tue Oct 22 10:59:18 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
+distributionPath=wrapper/dists
+zipStorePath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
diff --git a/android/SherpaOnnxJavaDemo/gradlew b/android/SherpaOnnxJavaDemo/gradlew
new file mode 100644
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxJavaDemo/gradlew.bat b/android/SherpaOnnxJavaDemo/gradlew.bat
new file mode 100644
index 0000000000..107acd32c4
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxJavaDemo/settings.gradle b/android/SherpaOnnxJavaDemo/settings.gradle
new file mode 100644
index 0000000000..e552eb6899
--- /dev/null
+++ b/android/SherpaOnnxJavaDemo/settings.gradle
@@ -0,0 +1,17 @@
+pluginManagement {
+    repositories {
+        gradlePluginPortal()
+        google()
+        mavenCentral()
+    }
+}
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+        maven { url 'https://jitpack.io' }
+    }
+}
+rootProject.name = "SherpaOnnxJavaDemo"
+include ':app'
diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index b17a6ea6c7..b42937ad37 100644
--- a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -151,24 +151,27 @@ class MainActivity : AppCompatActivity() {
                 stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
                 while (kws.isReady(stream)) {
                     kws.decode(stream)
-                }
 
-                val text = kws.getResult(stream).keyword
+                    val text = kws.getResult(stream).keyword
+
+                    var textToDisplay = lastText
 
-                var textToDisplay = lastText
+                    if (text.isNotBlank()) {
+                        // Remember to reset the stream right after detecting a keyword
 
-                if (text.isNotBlank()) {
-                    if (lastText.isBlank()) {
-                        textToDisplay = "$idx: $text"
-                    } else {
-                        textToDisplay = "$idx: $text\n$lastText"
+                        kws.reset(stream)
+                        if (lastText.isBlank()) {
+                            textToDisplay = "$idx: $text"
+                        } else {
+                            textToDisplay = "$idx: $text\n$lastText"
+                        }
+                        lastText = "$idx: $text\n$lastText"
+                        idx += 1
                     }
-                    lastText = "$idx: $text\n$lastText"
-                    idx += 1
-                }
 
-                runOnUiThread {
-                    textView.text = textToDisplay
+                    runOnUiThread {
+                        textView.text = textToDisplay
+                    }
                 }
             }
         }
diff --git a/android/SherpaOnnxSpeakerDiarization/.gitignore b/android/SherpaOnnxSpeakerDiarization/.gitignore
new file mode 100644
index 0000000000..aa724b7707
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/.gitignore
@@ -0,0 +1,15 @@
+*.iml
+.gradle
+/local.properties
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
+local.properties
diff --git a/android/SherpaOnnxSpeakerDiarization/app/.gitignore b/android/SherpaOnnxSpeakerDiarization/app/.gitignore
new file mode 100644
index 0000000000..42afabfd2a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts
new file mode 100644
index 0000000000..7a390ba425
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts
@@ -0,0 +1,71 @@
+plugins {
+    alias(libs.plugins.android.application)
+    alias(libs.plugins.jetbrains.kotlin.android)
+}
+
+android {
+    namespace = "com.k2fsa.sherpa.onnx.speaker.diarization"
+    compileSdk = 34
+
+    defaultConfig {
+        applicationId = "com.k2fsa.sherpa.onnx.speaker.diarization"
+        minSdk = 21
+        targetSdk = 34
+        versionCode = 1
+        versionName = "1.0"
+
+        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+        vectorDrawables {
+            useSupportLibrary = true
+        }
+    }
+
+    buildTypes {
+        release {
+            isMinifyEnabled = false
+            proguardFiles(
+                getDefaultProguardFile("proguard-android-optimize.txt"),
+                "proguard-rules.pro"
+            )
+        }
+    }
+    compileOptions {
+        sourceCompatibility = JavaVersion.VERSION_1_8
+        targetCompatibility = JavaVersion.VERSION_1_8
+    }
+    kotlinOptions {
+        jvmTarget = "1.8"
+    }
+    buildFeatures {
+        compose = true
+    }
+    composeOptions {
+        kotlinCompilerExtensionVersion = "1.5.1"
+    }
+    packaging {
+        resources {
+            excludes += "/META-INF/{AL2.0,LGPL2.1}"
+        }
+    }
+}
+
+dependencies {
+
+    implementation(libs.androidx.core.ktx)
+    implementation(libs.androidx.lifecycle.runtime.ktx)
+    implementation(libs.androidx.activity.compose)
+    implementation(platform(libs.androidx.compose.bom))
+    implementation(libs.androidx.ui)
+    implementation(libs.androidx.ui.graphics)
+    implementation(libs.androidx.ui.tooling.preview)
+    implementation(libs.androidx.material3)
+    implementation(libs.androidx.navigation.compose)
+    implementation(libs.androidx.documentfile)
+    testImplementation(libs.junit)
+    androidTestImplementation(libs.androidx.junit)
+    androidTestImplementation(libs.androidx.espresso.core)
+    androidTestImplementation(platform(libs.androidx.compose.bom))
+    androidTestImplementation(libs.androidx.ui.test.junit4)
+    debugImplementation(libs.androidx.ui.tooling)
+    debugImplementation(libs.androidx.ui.test.manifest)
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro
new file mode 100644
index 0000000000..481bb43481
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt
new file mode 100644
index 0000000000..53d7af15fc
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt
@@ -0,0 +1,24 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.test.platform.app.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+
+import org.junit.Test
+import org.junit.runner.RunWith
+
+import org.junit.Assert.*
+
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+@RunWith(AndroidJUnit4::class)
+class ExampleInstrumentedTest {
+    @Test
+    fun useAppContext() {
+        // Context of the app under test.
+        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
+        assertEquals("com.k2fsa.sherpa.onnx.speaker.diarization", appContext.packageName)
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml
new file mode 100644
index 0000000000..d58f7e8d77
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:tools="http://schemas.android.com/tools">
+
+    <uses-permission
+        android:name="android.permission.READ_EXTERNAL_STORAGE"
+        android:maxSdkVersion="32" />
+
+    <application
+        android:allowBackup="true"
+        android:dataExtractionRules="@xml/data_extraction_rules"
+        android:fullBackupContent="@xml/backup_rules"
+        android:icon="@mipmap/ic_launcher"
+        android:label="@string/app_name"
+        android:roundIcon="@mipmap/ic_launcher_round"
+        android:supportsRtl="true"
+        android:theme="@style/Theme.SherpaOnnxSpeakerDiarization"
+        tools:targetApi="31">
+        <activity
+            android:name=".MainActivity"
+            android:exported="true"
+            android:label="@string/app_name"
+            android:theme="@style/Theme.SherpaOnnxSpeakerDiarization">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+
+</manifest>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt
new file mode 100644
index 0000000000..0895cf52cf
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt
@@ -0,0 +1,13 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.compose.ui.graphics.vector.ImageVector
+
+data class BarItem(
+    val title: String,
+
+    // see https://www.composables.com/icons
+    // and
+    // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
+    val image: ImageVector,
+    val route: String,
+)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt
new file mode 100644
index 0000000000..7a25d49b9a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt
@@ -0,0 +1,132 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import android.os.Bundle
+import androidx.activity.ComponentActivity
+import androidx.activity.compose.setContent
+import androidx.activity.enableEdgeToEdge
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.padding
+import androidx.compose.material3.CenterAlignedTopAppBar
+import androidx.compose.material3.ExperimentalMaterial3Api
+import androidx.compose.material3.Icon
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.NavigationBar
+import androidx.compose.material3.NavigationBarItem
+import androidx.compose.material3.Scaffold
+import androidx.compose.material3.Surface
+import androidx.compose.material3.Text
+import androidx.compose.material3.TopAppBarDefaults
+import androidx.compose.runtime.Composable
+import androidx.compose.runtime.getValue
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.tooling.preview.Preview
+import androidx.navigation.NavGraph.Companion.findStartDestination
+import androidx.navigation.NavHostController
+import androidx.navigation.compose.NavHost
+import androidx.navigation.compose.composable
+import androidx.navigation.compose.currentBackStackEntryAsState
+import androidx.navigation.compose.rememberNavController
+import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen
+import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen
+import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme
+
+const val TAG = "sherpa-onnx-sd"
+
+class MainActivity : ComponentActivity() {
+    override fun onCreate(savedInstanceState: Bundle?) {
+        super.onCreate(savedInstanceState)
+        enableEdgeToEdge()
+        setContent {
+            SherpaOnnxSpeakerDiarizationTheme {
+                // A surface container using the 'background' color from the theme
+                Surface(
+                    modifier = Modifier.fillMaxSize(),
+                    color = MaterialTheme.colorScheme.background
+                ) {
+                    MainScreen()
+                }
+            }
+        }
+        SpeakerDiarizationObject.initSpeakerDiarization(this.assets)
+    }
+}
+
+@OptIn(ExperimentalMaterial3Api::class)
+@Composable
+fun MainScreen(modifier: Modifier = Modifier) {
+    val navController = rememberNavController()
+    Scaffold(
+        topBar = {
+            CenterAlignedTopAppBar(
+                colors = TopAppBarDefaults.topAppBarColors(
+                    containerColor = MaterialTheme.colorScheme.primaryContainer,
+                    titleContentColor = MaterialTheme.colorScheme.primary,
+                ),
+                title = {
+                    Text(
+                        "Next-gen Kaldi: Speaker Diarization",
+                        fontWeight = FontWeight.Bold,
+                    )
+                },
+            )
+        },
+        content = { padding ->
+            Column(Modifier.padding(padding)) {
+                NavigationHost(navController = navController)
+
+            }
+        },
+        bottomBar = {
+            BottomNavigationBar(navController = navController)
+        }
+    )
+}
+
+@Composable
+fun NavigationHost(navController: NavHostController) {
+    NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
+        composable(NavRoutes.Home.route) {
+            HomeScreen()
+        }
+
+        composable(NavRoutes.Help.route) {
+            HelpScreen()
+        }
+    }
+}
+
+@Composable
+fun BottomNavigationBar(navController: NavHostController) {
+    NavigationBar {
+        val backStackEntry by navController.currentBackStackEntryAsState()
+        val currentRoute = backStackEntry?.destination?.route
+
+        NavBarItems.BarItems.forEach { navItem ->
+            NavigationBarItem(selected = currentRoute == navItem.route,
+                onClick = {
+                    navController.navigate(navItem.route) {
+                        popUpTo(navController.graph.findStartDestination().id) {
+                            saveState = true
+                        }
+                        launchSingleTop = true
+                        restoreState = true
+                    }
+                },
+                icon = {
+                    Icon(imageVector = navItem.image, contentDescription = navItem.title)
+                }, label = {
+                    Text(text = navItem.title)
+                })
+        }
+    }
+}
+
+@Preview(showBackground = true)
+@Composable
+fun MainScreenPreview() {
+    SherpaOnnxSpeakerDiarizationTheme {
+        MainScreen()
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt
new file mode 100644
index 0000000000..65c737f971
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt
@@ -0,0 +1,20 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import androidx.compose.material.icons.Icons
+import androidx.compose.material.icons.filled.Home
+import androidx.compose.material.icons.filled.Info
+
+object NavBarItems {
+    val BarItems = listOf(
+        BarItem(
+            title = "Home",
+            image = Icons.Filled.Home,
+            route = "home",
+        ),
+        BarItem(
+            title = "Help",
+            image = Icons.Filled.Info,
+            route = "help",
+        ),
+    )
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt
new file mode 100644
index 0000000000..2e1ae90b51
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt
@@ -0,0 +1,6 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+sealed class NavRoutes(val route: String) {
+    object Home : NavRoutes("home")
+    object Help : NavRoutes("help")
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt
new file mode 120000
index 0000000000..459cc22ccd
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt
new file mode 100644
index 0000000000..940a2b6434
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt
@@ -0,0 +1,137 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import android.content.Context
+import android.media.AudioFormat
+import android.media.MediaCodec
+import android.media.MediaExtractor
+import android.media.MediaFormat
+import android.net.Uri
+
+data class WaveData(
+    val sampleRate: Int? = null,
+    val samples: FloatArray? = null,
+    val msg: String? = null
+)
+
+// It supports only 16-bit encoded wave files
+//
+// References
+// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9
+// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt
+fun readUri(context: Context, uri: Uri): WaveData {
+    val extractor = MediaExtractor()
+    extractor.setDataSource(context, uri, null)
+
+    val samplesList: MutableList<FloatArray> = ArrayList()
+
+    for (i in 0 until extractor.trackCount) {
+        val format = extractor.getTrackFormat(i)
+        val mime = format.getString(MediaFormat.KEY_MIME)
+        if (mime?.startsWith("audio/") == true) {
+            extractor.selectTrack(i)
+
+            var encoding: Int = -1
+            try {
+                encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING)
+            } catch (_: Exception) {
+            }
+
+            if (encoding != AudioFormat.ENCODING_PCM_16BIT) {
+                return WaveData(msg = "We support only 16-bit encoded wave files")
+            }
+
+            val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
+            val decoder = MediaCodec.createDecoderByType(mime)
+            decoder.configure(format, null, null, 0)
+            decoder.start()
+
+            val inputBuffers = decoder.inputBuffers
+            var outputBuffers = decoder.outputBuffers
+
+            val info = MediaCodec.BufferInfo()
+            var eof = false
+
+            var outputBufferIndex = -1
+
+            while (true) {
+                if (!eof) {
+                    val inputBufferIndex = decoder.dequeueInputBuffer(10000)
+                    if (inputBufferIndex > 0) {
+                        val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0)
+                        if (size < 0) {
+                            decoder.queueInputBuffer(
+                                inputBufferIndex,
+                                0,
+                                0,
+                                0,
+                                MediaCodec.BUFFER_FLAG_END_OF_STREAM
+                            )
+                            eof = true
+                        } else {
+                            decoder.queueInputBuffer(
+                                inputBufferIndex,
+                                0,
+                                size,
+                                extractor.sampleTime,
+                                0
+                            )
+                            extractor.advance()
+                        }
+                    }
+                } // if (!eof)
+
+                if (outputBufferIndex >= 0) {
+                    outputBuffers[outputBufferIndex].position(0)
+                }
+
+                outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000)
+                if (outputBufferIndex >= 0) {
+                    if (info.flags != 0) {
+                        decoder.stop()
+                        decoder.release()
+
+                        var k = 0
+                        for (s in samplesList) {
+                            k += s.size
+                        }
+                        if (k == 0) {
+                            return WaveData(msg = "Failed to read selected file")
+                        }
+
+                        val ans = FloatArray(k)
+                        k = 0
+                        for (s in samplesList) {
+                            s.copyInto(ans, k)
+                            k += s.size
+                        }
+
+                        return WaveData(sampleRate = sampleRate, samples = ans)
+                    }
+
+                    val buffer = outputBuffers[outputBufferIndex]
+                    val chunk = ByteArray(info.size)
+                    buffer[chunk]
+                    buffer.clear()
+
+                    val numSamples = info.size / 2
+
+                    val samples = FloatArray(numSamples)
+                    for (k in 0 until numSamples) {
+                        // assume little endian
+                        val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f)
+
+                        samples[k] = s / 32768.0f
+                    }
+                    samplesList.add(samples)
+
+                    decoder.releaseOutputBuffer(outputBufferIndex, false)
+                } else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
+                    outputBuffers = decoder.outputBuffers
+                }
+            }
+        }
+    }
+
+    extractor.release()
+    return WaveData(msg = "not an audio file")
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt
new file mode 100644
index 0000000000..9df6bd5616
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt
@@ -0,0 +1,67 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import android.content.res.AssetManager
+import android.util.Log
+import com.k2fsa.sherpa.onnx.FastClusteringConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization
+import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig
+import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig
+import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig
+
+// Please download
+// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+// then unzip it, rename model.onnx to segmentation.onnx, and mv
+// segmentation.onnx to the assets folder
+val segmentationModel = "segmentation.onnx"
+
+// please download it from
+// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+// and rename it to embedding.onnx
+// and move it to the assets folder
+val embeddingModel = "embedding.onnx"
+
+// in the end, your assets folder should look like below
+/*
+(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
+(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
+total 89048
+-rw-r--r--  1 fangjun  staff    38M Oct 12 20:28 embedding.onnx
+-rw-r--r--  1 fangjun  staff   5.7M Oct 12 20:28 segmentation.onnx
+ */
+
+object SpeakerDiarizationObject {
+    var _sd: OfflineSpeakerDiarization? = null
+    val sd: OfflineSpeakerDiarization
+        get() {
+            return _sd!!
+        }
+
+    fun initSpeakerDiarization(assetManager: AssetManager? = null) {
+        synchronized(this) {
+            if (_sd != null) {
+                return
+            }
+            Log.i(TAG, "Initializing sherpa-onnx speaker diarization")
+
+            val config = OfflineSpeakerDiarizationConfig(
+                segmentation = OfflineSpeakerSegmentationModelConfig(
+                    pyannote = OfflineSpeakerSegmentationPyannoteModelConfig(
+                        segmentationModel
+                    ),
+                    debug = true,
+                ),
+                embedding = SpeakerEmbeddingExtractorConfig(
+                    model = embeddingModel,
+                    debug = true,
+                    numThreads = 2,
+                ),
+                clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f),
+                minDurationOn = 0.2f,
+                minDurationOff = 0.5f,
+            )
+            _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
+        }
+    }
+}
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..9bab8fe88a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt
new file mode 100644
index 0000000000..b3640b9e97
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt
@@ -0,0 +1,38 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import androidx.compose.foundation.layout.Box
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.Spacer
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.height
+import androidx.compose.foundation.layout.padding
+import androidx.compose.material3.Text
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.unit.sp
+
+@Composable
+fun HelpScreen() {
+    Box(modifier = Modifier.fillMaxSize()) {
+        Column(
+            modifier = Modifier.padding(8.dp)
+        ) {
+            Text(
+                "This app accepts only 16kHz 16-bit 1-channel *.wav files. " +
+                        "It has two arguments: Number of speakers and clustering threshold. " +
+                        "If you know the actual number of speakers in the file, please set it. " +
+                        "Otherwise, please set it to 0. In that case, you have to set the threshold. " +
+                        "A larger threshold leads to fewer segmented speakers."
+            )
+            Spacer(modifier = Modifier.height(5.dp))
+            Text("The speaker segmentation model is from " +
+                "pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+
+                 "whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)")
+            Spacer(modifier = Modifier.height(5.dp))
+            Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
+            Spacer(modifier = Modifier.height(5.dp))
+            Text("Everything is open-sourced!", fontSize = 20.sp)
+        }
+    }
+}
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt
new file mode 100644
index 0000000000..a5a9cd31c9
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt
@@ -0,0 +1,210 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.screens
+
+import android.util.Log
+import androidx.activity.compose.rememberLauncherForActivityResult
+import androidx.activity.result.contract.ActivityResultContracts
+import androidx.compose.foundation.layout.Arrangement
+import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.Row
+import androidx.compose.foundation.layout.Spacer
+import androidx.compose.foundation.layout.fillMaxWidth
+import androidx.compose.foundation.layout.padding
+import androidx.compose.foundation.layout.size
+import androidx.compose.foundation.rememberScrollState
+import androidx.compose.foundation.verticalScroll
+import androidx.compose.material3.Button
+import androidx.compose.material3.OutlinedTextField
+import androidx.compose.material3.Text
+import androidx.compose.runtime.Composable
+import androidx.compose.runtime.getValue
+import androidx.compose.runtime.mutableStateOf
+import androidx.compose.runtime.remember
+import androidx.compose.runtime.setValue
+import androidx.compose.ui.Alignment
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.platform.LocalClipboardManager
+import androidx.compose.ui.platform.LocalContext
+import androidx.compose.ui.text.AnnotatedString
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.unit.sp
+import androidx.documentfile.provider.DocumentFile
+import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject
+import com.k2fsa.sherpa.onnx.speaker.diarization.TAG
+import kotlin.concurrent.thread
+
+
+private var samples: FloatArray? = null
+
+@Composable
+fun HomeScreen() {
+    val context = LocalContext.current
+
+    var sampleRate: Int
+    var filename by remember { mutableStateOf("") }
+    var status by remember { mutableStateOf("") }
+    var progress by remember { mutableStateOf("") }
+    val clipboardManager = LocalClipboardManager.current
+    var done by remember { mutableStateOf(false) }
+    var fileIsOk by remember { mutableStateOf(false) }
+    var started by remember { mutableStateOf(false) }
+    var numSpeakers by remember { mutableStateOf(0) }
+    var threshold by remember { mutableStateOf(0.5f) }
+
+
+    val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long ->
+        Int
+        val percent = 100.0 * numProcessedChunks / numTotalChunks
+        progress = "%.2f%%".format(percent)
+        Log.i(TAG, progress)
+        return@here 0
+    }
+
+    val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) {
+        it?.let {
+            val documentFile = DocumentFile.fromSingleUri(context, it)
+            filename = documentFile?.name ?: ""
+
+            progress = ""
+            done = false
+            fileIsOk = false
+
+            if (filename.isNotEmpty()) {
+                val data = readUri(context, it)
+                Log.i(TAG, "sample rate: ${data.sampleRate}")
+                Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}")
+                if (data.msg != null) {
+                    Log.i(TAG, "failed to read $filename")
+                    status = data.msg
+                } else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) {
+                    status =
+                        "Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}"
+                } else {
+                    samples = data.samples!!
+                    fileIsOk = true
+                }
+            }
+        }
+    }
+
+    Column(
+        modifier = Modifier.padding(10.dp),
+        verticalArrangement = Arrangement.Top,
+    ) {
+        Row(
+            modifier = Modifier.fillMaxWidth(),
+            horizontalArrangement = Arrangement.SpaceEvenly,
+            verticalAlignment = Alignment.CenterVertically
+        ) {
+
+            Button(onClick = {
+                launcher.launch(arrayOf("audio/*"))
+            }) {
+                Text("Select a .wav file")
+            }
+
+            Button(enabled = fileIsOk && !started,
+                onClick = {
+                    Log.i(TAG, "started")
+                    Log.i(TAG, "num samples: ${samples?.size}")
+                    started = true
+                    progress = ""
+
+                    val config = SpeakerDiarizationObject.sd.config
+                    config.clustering.numClusters = numSpeakers
+                    config.clustering.threshold = threshold
+
+                    SpeakerDiarizationObject.sd.setConfig(config)
+
+                    thread(true) {
+                        done = false
+                        status = "Started! Please wait"
+                        val segments = SpeakerDiarizationObject.sd.processWithCallback(
+                            samples!!,
+                            callback = callback,
+                        )
+                        done = true
+                        started = false
+                        status = ""
+                        for (s in segments) {
+                            val start = "%.2f".format(s.start)
+                            val end = "%.2f".format(s.end)
+                            val speaker = "speaker_%02d".format(s.speaker)
+                            status += "$start -- $end $speaker\n"
+                            Log.i(TAG, "$start -- $end $speaker")
+                        }
+
+                        Log.i(TAG, status)
+                    }
+                }) {
+                Text("Start")
+            }
+            if (progress.isNotEmpty()) {
+                Text(progress, fontSize = 25.sp)
+            }
+        }
+
+        Row(
+            modifier = Modifier.fillMaxWidth(),
+            horizontalArrangement = Arrangement.SpaceEvenly,
+            verticalAlignment = Alignment.CenterVertically
+        ) {
+            OutlinedTextField(
+                value = numSpeakers.toString(),
+                onValueChange = {
+                    if (it.isEmpty() || it.isBlank()) {
+                        numSpeakers = 0
+                    } else {
+                        numSpeakers = it.toIntOrNull() ?: 0
+                    }
+                },
+                label = {
+                    Text("Number of Speakers")
+                },
+            )
+        }
+
+        Row(
+            modifier = Modifier.fillMaxWidth(),
+            horizontalArrangement = Arrangement.SpaceEvenly,
+            verticalAlignment = Alignment.CenterVertically
+        ) {
+            OutlinedTextField(
+                value = threshold.toString(),
+                onValueChange = {
+                    if (it.isEmpty() || it.isBlank()) {
+                        threshold = 0.5f
+                    } else {
+                        threshold = it.toFloatOrNull() ?: 0.5f
+                    }
+                },
+                label = {
+                    Text("Clustering threshold")
+                },
+            )
+        }
+
+        if (filename.isNotEmpty()) {
+            Text(text = "Selected $filename")
+            Spacer(Modifier.size(20.dp))
+        }
+
+        if (done) {
+            Button(onClick = {
+                clipboardManager.setText(AnnotatedString(status))
+                progress = "Copied!"
+            }) {
+                Text("Copy result")
+            }
+            Spacer(Modifier.size(20.dp))
+        }
+
+        if (status.isNotEmpty()) {
+            Text(
+                status,
+                modifier = Modifier.verticalScroll(rememberScrollState()),
+            )
+        }
+
+
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt
new file mode 100644
index 0000000000..a96515d3da
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt
@@ -0,0 +1,11 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import androidx.compose.ui.graphics.Color
+
+val Purple80 = Color(0xFFD0BCFF)
+val PurpleGrey80 = Color(0xFFCCC2DC)
+val Pink80 = Color(0xFFEFB8C8)
+
+val Purple40 = Color(0xFF6650a4)
+val PurpleGrey40 = Color(0xFF625b71)
+val Pink40 = Color(0xFF7D5260)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt
new file mode 100644
index 0000000000..5dbbe7e59e
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt
@@ -0,0 +1,58 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import android.app.Activity
+import android.os.Build
+import androidx.compose.foundation.isSystemInDarkTheme
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.darkColorScheme
+import androidx.compose.material3.dynamicDarkColorScheme
+import androidx.compose.material3.dynamicLightColorScheme
+import androidx.compose.material3.lightColorScheme
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.platform.LocalContext
+
+private val DarkColorScheme = darkColorScheme(
+    primary = Purple80,
+    secondary = PurpleGrey80,
+    tertiary = Pink80
+)
+
+private val LightColorScheme = lightColorScheme(
+    primary = Purple40,
+    secondary = PurpleGrey40,
+    tertiary = Pink40
+
+    /* Other default colors to override
+    background = Color(0xFFFFFBFE),
+    surface = Color(0xFFFFFBFE),
+    onPrimary = Color.White,
+    onSecondary = Color.White,
+    onTertiary = Color.White,
+    onBackground = Color(0xFF1C1B1F),
+    onSurface = Color(0xFF1C1B1F),
+    */
+)
+
+@Composable
+fun SherpaOnnxSpeakerDiarizationTheme(
+    darkTheme: Boolean = isSystemInDarkTheme(),
+    // Dynamic color is available on Android 12+
+    dynamicColor: Boolean = true,
+    content: @Composable () -> Unit
+) {
+    val colorScheme = when {
+        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
+            val context = LocalContext.current
+            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
+        }
+
+        darkTheme -> DarkColorScheme
+        else -> LightColorScheme
+    }
+
+    MaterialTheme(
+        colorScheme = colorScheme,
+        typography = Typography,
+        content = content
+    )
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt
new file mode 100644
index 0000000000..39a81b9418
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt
@@ -0,0 +1,34 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
+
+import androidx.compose.material3.Typography
+import androidx.compose.ui.text.TextStyle
+import androidx.compose.ui.text.font.FontFamily
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.unit.sp
+
+// Set of Material typography styles to start with
+val Typography = Typography(
+    bodyLarge = TextStyle(
+        fontFamily = FontFamily.Default,
+        fontWeight = FontWeight.Normal,
+        fontSize = 16.sp,
+        lineHeight = 24.sp,
+        letterSpacing = 0.5.sp
+    )
+    /* Other default text styles to override
+    titleLarge = TextStyle(
+        fontFamily = FontFamily.Default,
+        fontWeight = FontWeight.Normal,
+        fontSize = 22.sp,
+        lineHeight = 28.sp,
+        letterSpacing = 0.sp
+    ),
+    labelSmall = TextStyle(
+        fontFamily = FontFamily.Default,
+        fontWeight = FontWeight.Medium,
+        fontSize = 11.sp,
+        lineHeight = 16.sp,
+        letterSpacing = 0.5.sp
+    )
+    */
+)
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
new file mode 100644
index 0000000000..2b068d1146
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:aapt="http://schemas.android.com/aapt"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                android:endX="85.84757"
+                android:endY="92.4963"
+                android:startX="42.9492"
+                android:startY="49.59793"
+                android:type="linear">
+                <item
+                    android:color="#44000000"
+                    android:offset="0.0" />
+                <item
+                    android:color="#00000000"
+                    android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+        android:fillColor="#FFFFFF"
+        android:fillType="nonZero"
+        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000" />
+</vector>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 0000000000..07d5da9cbf
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="utf-8"?>
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path
+        android:fillColor="#3DDC84"
+        android:pathData="M0,0h108v108h-108z" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M9,0L9,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,0L19,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,0L29,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,0L39,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,0L49,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,0L59,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,0L69,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,0L79,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M89,0L89,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M99,0L99,108"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,9L108,9"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,19L108,19"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,29L108,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,39L108,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,49L108,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,59L108,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,69L108,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,79L108,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,89L108,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M0,99L108,99"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,29L89,29"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,39L89,39"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,49L89,49"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,59L89,59"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,69L89,69"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M19,79L89,79"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M29,19L29,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M39,19L39,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M49,19L49,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M59,19L59,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M69,19L69,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+    <path
+        android:fillColor="#00000000"
+        android:pathData="M79,19L79,89"
+        android:strokeWidth="0.8"
+        android:strokeColor="#33FFFFFF" />
+</vector>
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 0000000000..6f3b755bf5
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 0000000000..6f3b755bf5
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp
new file mode 100644
index 0000000000..c209e78ecd
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..b2dfe3d1ba
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp
new file mode 100644
index 0000000000..4f0f1d64e5
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..62b611da08
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..948a3070fe
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..1b9a6956b3
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..28d4b77f9f
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9287f50836
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
new file mode 100644
index 0000000000..aa7d6427e6
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
new file mode 100644
index 0000000000..9126ae37cb
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml
new file mode 100644
index 0000000000..f8c6127d32
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="purple_200">#FFBB86FC</color>
+    <color name="purple_500">#FF6200EE</color>
+    <color name="purple_700">#FF3700B3</color>
+    <color name="teal_200">#FF03DAC5</color>
+    <color name="teal_700">#FF018786</color>
+    <color name="black">#FF000000</color>
+    <color name="white">#FFFFFFFF</color>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml
new file mode 100644
index 0000000000..05f2df0901
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml
@@ -0,0 +1,3 @@
+<resources>
+    <string name="app_name">SherpaOnnxSpeakerDiarization</string>
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml
new file mode 100644
index 0000000000..34d1d96ed3
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+
+    <style name="Theme.SherpaOnnxSpeakerDiarization" parent="android:Theme.Material.Light.NoActionBar" />
+</resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml
new file mode 100644
index 0000000000..fa0f996d2c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample backup rules file; uncomment and customize as necessary.
+   See https://developer.android.com/guide/topics/data/autobackup
+   for details.
+   Note: This file is ignored for devices older that API 31
+   See https://developer.android.com/about/versions/12/backup-restore
+-->
+<full-backup-content>
+    <!--
+   <include domain="sharedpref" path="."/>
+   <exclude domain="sharedpref" path="device.xml"/>
+-->
+</full-backup-content>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml
new file mode 100644
index 0000000000..9ee9997b0b
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?><!--
+   Sample data extraction rules file; uncomment and customize as necessary.
+   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
+   for details.
+-->
+<data-extraction-rules>
+    <cloud-backup>
+        <!-- TODO: Use <include> and <exclude> to control what is backed up.
+        <include .../>
+        <exclude .../>
+        -->
+    </cloud-backup>
+    <!--
+    <device-transfer>
+        <include .../>
+        <exclude .../>
+    </device-transfer>
+    -->
+</data-extraction-rules>
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt
new file mode 100644
index 0000000000..5571dbb56c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt
@@ -0,0 +1,17 @@
+package com.k2fsa.sherpa.onnx.speaker.diarization
+
+import org.junit.Test
+
+import org.junit.Assert.*
+
+/**
+ * Example local unit test, which will execute on the development machine (host).
+ *
+ * See [testing documentation](http://d.android.com/tools/testing).
+ */
+class ExampleUnitTest {
+    @Test
+    fun addition_isCorrect() {
+        assertEquals(4, 2 + 2)
+    }
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/build.gradle.kts
new file mode 100644
index 0000000000..f74b04bf26
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/build.gradle.kts
@@ -0,0 +1,5 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+    alias(libs.plugins.android.application) apply false
+    alias(libs.plugins.jetbrains.kotlin.android) apply false
+}
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle.properties b/android/SherpaOnnxSpeakerDiarization/gradle.properties
new file mode 100644
index 0000000000..20e2a01520
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle.properties
@@ -0,0 +1,23 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml b/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml
new file mode 100644
index 0000000000..fe93be92df
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml
@@ -0,0 +1,35 @@
+[versions]
+agp = "8.4.0"
+kotlin = "1.9.0"
+coreKtx = "1.10.1"
+junit = "4.13.2"
+junitVersion = "1.1.5"
+espressoCore = "3.5.1"
+lifecycleRuntimeKtx = "2.6.1"
+activityCompose = "1.8.0"
+composeBom = "2023.08.00"
+navigationCompose = "2.8.2"
+documentfile = "1.0.1"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
+androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
+androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
+androidx-ui = { group = "androidx.compose.ui", name = "ui" }
+androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
+androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
+androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
+androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
+androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
+androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
+androidx-navigation-compose = { group = "androidx.navigation", name = "navigation-compose", version.ref = "navigationCompose" }
+androidx-documentfile = { group = "androidx.documentfile", name = "documentfile", version.ref = "documentfile" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000..e708b1c023
Binary files /dev/null and b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000..a46693001c
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Oct 12 14:27:04 CST 2024
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/android/SherpaOnnxSpeakerDiarization/gradlew b/android/SherpaOnnxSpeakerDiarization/gradlew
new file mode 100755
index 0000000000..4f906e0c81
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/android/SherpaOnnxSpeakerDiarization/gradlew.bat b/android/SherpaOnnxSpeakerDiarization/gradlew.bat
new file mode 100644
index 0000000000..ac1b06f938
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts b/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts
new file mode 100644
index 0000000000..7226b5499a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerDiarization/settings.gradle.kts
@@ -0,0 +1,23 @@
+pluginManagement {
+    repositories {
+        google {
+            content {
+                includeGroupByRegex("com\\.android.*")
+                includeGroupByRegex("com\\.google.*")
+                includeGroupByRegex("androidx.*")
+            }
+        }
+        mavenCentral()
+        gradlePluginPortal()
+    }
+}
+dependencyResolutionManagement {
+    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+    repositories {
+        google()
+        mavenCentral()
+    }
+}
+
+rootProject.name = "SherpaOnnxSpeakerDiarization"
+include(":app")
diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..9bab8fe88a
--- /dev/null
+++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index b95ad7d787..99e49e782d 100644
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -183,6 +183,9 @@ class MainActivity : AppCompatActivity() {
     private fun initTts() {
         var modelDir: String?
         var modelName: String?
+        var acousticModelName: String?
+        var vocoder: String?
+        var voices: String?
         var ruleFsts: String?
         var ruleFars: String?
         var lexicon: String?
@@ -193,8 +196,22 @@ class MainActivity : AppCompatActivity() {
         // The purpose of such a design is to make the CI test easier
         // Please see
         // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
-        modelDir = null
+
+        // VITS -- begin
         modelName = null
+        // VITS -- end
+
+        // Matcha -- begin
+        acousticModelName = null
+        vocoder = null
+        // Matcha -- end
+
+        // For Kokoro -- begin
+        voices = null
+        // For Kokoro -- end
+
+
+        modelDir = null
         ruleFsts = null
         ruleFars = null
         lexicon = null
@@ -217,7 +234,6 @@ class MainActivity : AppCompatActivity() {
         // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
         // modelDir = "vits-icefall-zh-aishell3"
         // modelName = "model.onnx"
-        // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
         // ruleFars = "vits-icefall-zh-aishell3/rule.far"
         // lexicon = "lexicon.txt"
 
@@ -233,24 +249,67 @@ class MainActivity : AppCompatActivity() {
         // modelDir = "vits-coqui-de-css10"
         // modelName = "model.onnx"
 
+        // Example 6
+        // vits-melo-tts-zh_en
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
+        // modelDir = "vits-melo-tts-zh_en"
+        // modelName = "model.onnx"
+        // lexicon = "lexicon.txt"
+        // dictDir = "vits-melo-tts-zh_en/dict"
+
+        // Example 7
+        // matcha-icefall-zh-baker
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+        // modelDir = "matcha-icefall-zh-baker"
+        // acousticModelName = "model-steps-3.onnx"
+        // vocoder = "hifigan_v2.onnx"
+        // lexicon = "lexicon.txt"
+        // dictDir = "matcha-icefall-zh-baker/dict"
+
+        // Example 8
+        // matcha-icefall-en_US-ljspeech
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+        // modelDir = "matcha-icefall-en_US-ljspeech"
+        // acousticModelName = "model-steps-3.onnx"
+        // vocoder = "hifigan_v2.onnx"
+        // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
+
+        // Example 9
+        // kokoro-en-v0_19
+        // modelDir = "kokoro-en-v0_19"
+        // modelName = "model.onnx"
+        // voices = "voices.bin"
+        // dataDir = "kokoro-en-v0_19/espeak-ng-data"
+
+        // Example 10
+        // kokoro-multi-lang-v1_0
+        // modelDir = "kokoro-multi-lang-v1_0"
+        // modelName = "model.onnx"
+        // voices = "voices.bin"
+        // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
+        // dictDir = "kokoro-multi-lang-v1_0/dict"
+        // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
+        // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
+
         if (dataDir != null) {
-            val newDir = copyDataDir(modelDir!!)
-            modelDir = newDir + "/" + modelDir
-            dataDir = newDir + "/" + dataDir
-            assets = null
+            val newDir = copyDataDir(dataDir!!)
+            dataDir = "$newDir/$dataDir"
         }
 
         if (dictDir != null) {
-            val newDir = copyDataDir(modelDir!!)
-            modelDir = newDir + "/" + modelDir
-            dictDir = modelDir + "/" + "dict"
-            ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
-            assets = null
+            val newDir = copyDataDir(dictDir!!)
+            dictDir = "$newDir/$dictDir"
+            if (ruleFsts == null) {
+                ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
+            }
         }
 
         val config = getOfflineTtsConfig(
             modelDir = modelDir!!,
-            modelName = modelName!!,
+            modelName = modelName ?: "",
+            acousticModelName = acousticModelName ?: "",
+            vocoder = vocoder ?: "",
+            voices = voices ?: "",
             lexicon = lexicon ?: "",
             dataDir = dataDir ?: "",
             dictDir = dictDir ?: "",
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
deleted file mode 100644
index 4f9c4b6f6b..0000000000
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright (c)  2023  Xiaomi Corporation
-package com.k2fsa.sherpa.onnx
-
-import android.content.res.AssetManager
-
-data class OfflineTtsVitsModelConfig(
-    var model: String,
-    var lexicon: String = "",
-    var tokens: String,
-    var dataDir: String = "",
-    var dictDir: String = "",
-    var noiseScale: Float = 0.667f,
-    var noiseScaleW: Float = 0.8f,
-    var lengthScale: Float = 1.0f,
-)
-
-data class OfflineTtsModelConfig(
-    var vits: OfflineTtsVitsModelConfig,
-    var numThreads: Int = 1,
-    var debug: Boolean = false,
-    var provider: String = "cpu",
-)
-
-data class OfflineTtsConfig(
-    var model: OfflineTtsModelConfig,
-    var ruleFsts: String = "",
-    var ruleFars: String = "",
-    var maxNumSentences: Int = 1,
-)
-
-class GeneratedAudio(
-    val samples: FloatArray,
-    val sampleRate: Int,
-) {
-    fun save(filename: String) =
-        saveImpl(filename = filename, samples = samples, sampleRate = sampleRate)
-
-    private external fun saveImpl(
-        filename: String,
-        samples: FloatArray,
-        sampleRate: Int
-    ): Boolean
-}
-
-class OfflineTts(
-    assetManager: AssetManager? = null,
-    var config: OfflineTtsConfig,
-) {
-    private var ptr: Long
-
-    init {
-        ptr = if (assetManager != null) {
-            newFromAsset(assetManager, config)
-        } else {
-            newFromFile(config)
-        }
-    }
-
-    fun sampleRate() = getSampleRate(ptr)
-
-    fun numSpeakers() = getNumSpeakers(ptr)
-
-    fun generate(
-        text: String,
-        sid: Int = 0,
-        speed: Float = 1.0f
-    ): GeneratedAudio {
-        val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
-        return GeneratedAudio(
-            samples = objArray[0] as FloatArray,
-            sampleRate = objArray[1] as Int
-        )
-    }
-
-    fun generateWithCallback(
-        text: String,
-        sid: Int = 0,
-        speed: Float = 1.0f,
-        callback: (samples: FloatArray) -> Int
-    ): GeneratedAudio {
-        val objArray = generateWithCallbackImpl(
-            ptr,
-            text = text,
-            sid = sid,
-            speed = speed,
-            callback = callback
-        )
-        return GeneratedAudio(
-            samples = objArray[0] as FloatArray,
-            sampleRate = objArray[1] as Int
-        )
-    }
-
-    fun allocate(assetManager: AssetManager? = null) {
-        if (ptr == 0L) {
-            ptr = if (assetManager != null) {
-                newFromAsset(assetManager, config)
-            } else {
-                newFromFile(config)
-            }
-        }
-    }
-
-    fun free() {
-        if (ptr != 0L) {
-            delete(ptr)
-            ptr = 0
-        }
-    }
-
-    protected fun finalize() {
-        if (ptr != 0L) {
-            delete(ptr)
-            ptr = 0
-        }
-    }
-
-    fun release() = finalize()
-
-    private external fun newFromAsset(
-        assetManager: AssetManager,
-        config: OfflineTtsConfig,
-    ): Long
-
-    private external fun newFromFile(
-        config: OfflineTtsConfig,
-    ): Long
-
-    private external fun delete(ptr: Long)
-    private external fun getSampleRate(ptr: Long): Int
-    private external fun getNumSpeakers(ptr: Long): Int
-
-    // The returned array has two entries:
-    //  - the first entry is an 1-D float array containing audio samples.
-    //    Each sample is normalized to the range [-1, 1]
-    //  - the second entry is the sample rate
-    private external fun generateImpl(
-        ptr: Long,
-        text: String,
-        sid: Int = 0,
-        speed: Float = 1.0f
-    ): Array<Any>
-
-    private external fun generateWithCallbackImpl(
-        ptr: Long,
-        text: String,
-        sid: Int = 0,
-        speed: Float = 1.0f,
-        callback: (samples: FloatArray) -> Int
-    ): Array<Any>
-
-    companion object {
-        init {
-            System.loadLibrary("sherpa-onnx-jni")
-        }
-    }
-}
-
-// please refer to
-// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
-// to download models
-fun getOfflineTtsConfig(
-    modelDir: String,
-    modelName: String,
-    lexicon: String,
-    dataDir: String,
-    dictDir: String,
-    ruleFsts: String,
-    ruleFars: String
-): OfflineTtsConfig {
-    return OfflineTtsConfig(
-        model = OfflineTtsModelConfig(
-            vits = OfflineTtsVitsModelConfig(
-                model = "$modelDir/$modelName",
-                lexicon = "$modelDir/$lexicon",
-                tokens = "$modelDir/tokens.txt",
-                dataDir = dataDir,
-                dictDir = dictDir,
-            ),
-            numThreads = 2,
-            debug = true,
-            provider = "cpu",
-        ),
-        ruleFsts = ruleFsts,
-        ruleFars = ruleFars,
-    )
-}
diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
new file mode 120000
index 0000000000..f1392e7712
--- /dev/null
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
@@ -0,0 +1 @@
+../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
index a01e0a7b6d..e372be4329 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
@@ -47,7 +47,7 @@ fun getSampleText(lang: String): String {
         }
 
         "eng" -> {
-            text = "This is a text-to-speech engine using next generation Kaldi"
+            text = "How are you doing today? This is a text-to-speech engine using next generation Kaldi"
         }
 
         "est" -> {
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
index 9a6bd47aba..c96f9f0efc 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
@@ -3,6 +3,10 @@
 package com.k2fsa.sherpa.onnx.tts.engine
 
 import PreferenceHelper
+import android.media.AudioAttributes
+import android.media.AudioFormat
+import android.media.AudioManager
+import android.media.AudioTrack
 import android.media.MediaPlayer
 import android.net.Uri
 import android.os.Bundle
@@ -18,7 +22,9 @@ import androidx.compose.foundation.layout.fillMaxSize
 import androidx.compose.foundation.layout.fillMaxWidth
 import androidx.compose.foundation.layout.padding
 import androidx.compose.foundation.layout.wrapContentHeight
+import androidx.compose.foundation.rememberScrollState
 import androidx.compose.foundation.text.KeyboardOptions
+import androidx.compose.foundation.verticalScroll
 import androidx.compose.material3.Button
 import androidx.compose.material3.ExperimentalMaterial3Api
 import androidx.compose.material3.MaterialTheme
@@ -36,7 +42,13 @@ import androidx.compose.ui.Modifier
 import androidx.compose.ui.text.input.KeyboardType
 import androidx.compose.ui.unit.dp
 import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.channels.Channel
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
 import java.io.File
+import kotlin.time.TimeSource
 
 const val TAG = "sherpa-onnx-tts-engine"
 
@@ -45,9 +57,26 @@ class MainActivity : ComponentActivity() {
     private val ttsViewModel: TtsViewModel by viewModels()
 
     private var mediaPlayer: MediaPlayer? = null
+
+    // see
+    // https://developer.android.com/reference/kotlin/android/media/AudioTrack
+    private lateinit var track: AudioTrack
+
+    private var stopped: Boolean = false
+
+    private var samplesChannel = Channel<FloatArray>()
+
     override fun onCreate(savedInstanceState: Bundle?) {
         super.onCreate(savedInstanceState)
+
+        Log.i(TAG, "Start to initialize TTS")
         TtsEngine.createTts(this)
+        Log.i(TAG, "Finish initializing TTS")
+
+        Log.i(TAG, "Start to initialize AudioTrack")
+        initAudioTrack()
+        Log.i(TAG, "Finish initializing AudioTrack")
+
         val preferenceHelper = PreferenceHelper(this)
         setContent {
             SherpaOnnxTtsEngineTheme {
@@ -57,7 +86,7 @@ class MainActivity : ComponentActivity() {
                     color = MaterialTheme.colorScheme.background
                 ) {
                     Scaffold(topBar = {
-                        TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
+                        TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
                     }) {
                         Box(modifier = Modifier.padding(it)) {
                             Column(modifier = Modifier.padding(16.dp)) {
@@ -65,8 +94,8 @@ class MainActivity : ComponentActivity() {
                                     Text("Speed " + String.format("%.1f", TtsEngine.speed))
                                     Slider(
                                         value = TtsEngine.speedState.value,
-                                        onValueChange = { 
-                                            TtsEngine.speed = it 
+                                        onValueChange = {
+                                            TtsEngine.speed = it
                                             preferenceHelper.setSpeed(it)
                                         },
                                         valueRange = 0.2F..3.0F,
@@ -77,6 +106,12 @@ class MainActivity : ComponentActivity() {
                                 val testTextContent = getSampleText(TtsEngine.lang ?: "")
 
                                 var testText by remember { mutableStateOf(testTextContent) }
+                                var startEnabled by remember { mutableStateOf(true) }
+                                var playEnabled by remember { mutableStateOf(false) }
+                                var rtfText by remember {
+                                    mutableStateOf("")
+                                }
+                                val scrollState = rememberScrollState(0)
 
                                 val numSpeakers = TtsEngine.tts!!.numSpeakers()
                                 if (numSpeakers > 1) {
@@ -110,59 +145,128 @@ class MainActivity : ComponentActivity() {
                                     value = testText,
                                     onValueChange = { testText = it },
                                     label = { Text("Please input your text here") },
+                                    maxLines = 10,
                                     modifier = Modifier
                                         .fillMaxWidth()
                                         .padding(bottom = 16.dp)
+                                        .verticalScroll(scrollState)
                                         .wrapContentHeight(),
                                     singleLine = false,
                                 )
 
                                 Row {
                                     Button(
-                                        modifier = Modifier.padding(20.dp),
+                                        enabled = startEnabled,
+                                        modifier = Modifier.padding(5.dp),
                                         onClick = {
                                             Log.i(TAG, "Clicked, text: $testText")
                                             if (testText.isBlank() || testText.isEmpty()) {
                                                 Toast.makeText(
                                                     applicationContext,
-                                                    "Please input a test sentence",
+                                                    "Please input some text to generate",
                                                     Toast.LENGTH_SHORT
                                                 ).show()
                                             } else {
-                                                val audio = TtsEngine.tts!!.generate(
-                                                    text = testText,
-                                                    sid = TtsEngine.speakerId,
-                                                    speed = TtsEngine.speed,
-                                                )
-
-                                                val filename =
-                                                    application.filesDir.absolutePath + "/generated.wav"
-                                                val ok =
-                                                    audio.samples.isNotEmpty() && audio.save(filename)
-
-                                                if (ok) {
-                                                    stopMediaPlayer()
-                                                    mediaPlayer = MediaPlayer.create(
-                                                        applicationContext,
-                                                        Uri.fromFile(File(filename))
-                                                    )
-                                                    mediaPlayer?.start()
-                                                } else {
-                                                    Log.i(TAG, "Failed to generate or save audio")
+                                                startEnabled = false
+                                                playEnabled = false
+                                                stopped = false
+
+                                                track.pause()
+                                                track.flush()
+                                                track.play()
+                                                rtfText = ""
+                                                Log.i(TAG, "Started with text $testText")
+
+                                                samplesChannel = Channel<FloatArray>()
+
+                                                CoroutineScope(Dispatchers.IO).launch {
+                                                    for (samples in samplesChannel) {
+                                                        track.write(
+                                                            samples,
+                                                            0,
+                                                            samples.size,
+                                                            AudioTrack.WRITE_BLOCKING
+                                                        )
+                                                        if (stopped) {
+                                                            break
+                                                        }
+                                                    }
                                                 }
+
+                                                CoroutineScope(Dispatchers.Default).launch {
+                                                    val timeSource = TimeSource.Monotonic
+                                                    val startTime = timeSource.markNow()
+
+                                                    val audio =
+                                                        TtsEngine.tts!!.generateWithCallback(
+                                                            text = testText,
+                                                            sid = TtsEngine.speakerId,
+                                                            speed = TtsEngine.speed,
+                                                            callback = ::callback,
+                                                        )
+
+                                                    val elapsed =
+                                                        startTime.elapsedNow().inWholeMilliseconds.toFloat() / 1000;
+                                                    val audioDuration =
+                                                        audio.samples.size / TtsEngine.tts!!.sampleRate()
+                                                            .toFloat()
+                                                    val RTF = String.format(
+                                                        "Number of threads: %d\nElapsed: %.3f s\nAudio duration: %.3f s\nRTF: %.3f/%.3f = %.3f",
+                                                        TtsEngine.tts!!.config.model.numThreads,
+                                                        audioDuration,
+                                                        elapsed,
+                                                        elapsed,
+                                                        audioDuration,
+                                                        elapsed / audioDuration
+                                                    )
+                                                    samplesChannel.close()
+
+                                                    val filename =
+                                                        application.filesDir.absolutePath + "/generated.wav"
+
+
+                                                    val ok =
+                                                        audio.samples.isNotEmpty() && audio.save(
+                                                            filename
+                                                        )
+
+                                                    if (ok) {
+                                                        withContext(Dispatchers.Main) {
+                                                            startEnabled = true
+                                                            playEnabled = true
+                                                            rtfText = RTF
+                                                        }
+                                                    }
+                                                }.start()
                                             }
                                         }) {
-                                        Text("Test")
+                                        Text("Start")
+                                    }
+
+                                    Button(
+                                        modifier = Modifier.padding(5.dp),
+                                        enabled = playEnabled,
+                                        onClick = {
+                                            stopped = true
+                                            track.pause()
+                                            track.flush()
+                                            onClickPlay()
+                                        }) {
+                                        Text("Play")
                                     }
 
                                     Button(
-                                        modifier = Modifier.padding(20.dp),
+                                        modifier = Modifier.padding(5.dp),
                                         onClick = {
-                                            TtsEngine.speakerId = 0
-                                            TtsEngine.speed = 1.0f
-                                            testText = ""
+                                            onClickStop()
+                                            startEnabled = true
                                         }) {
-                                        Text("Reset")
+                                        Text("Stop")
+                                    }
+                                }
+                                if (rtfText.isNotEmpty()) {
+                                    Row {
+                                        Text(rtfText)
                                     }
                                 }
                             }
@@ -183,4 +287,63 @@ class MainActivity : ComponentActivity() {
         mediaPlayer?.release()
         mediaPlayer = null
     }
+
+    private fun onClickPlay() {
+        val filename = application.filesDir.absolutePath + "/generated.wav"
+        stopMediaPlayer()
+        mediaPlayer = MediaPlayer.create(
+            applicationContext,
+            Uri.fromFile(File(filename))
+        )
+        mediaPlayer?.start()
+    }
+
+    private fun onClickStop() {
+        stopped = true
+        track.pause()
+        track.flush()
+
+        stopMediaPlayer()
+    }
+
+    // this function is called from C++
+    private fun callback(samples: FloatArray): Int {
+        if (!stopped) {
+            val samplesCopy = samples.copyOf()
+            CoroutineScope(Dispatchers.IO).launch {
+                samplesChannel.send(samplesCopy)
+            }
+            return 1
+        } else {
+            track.stop()
+            Log.i(TAG, " return 0")
+            return 0
+        }
+    }
+
+    private fun initAudioTrack() {
+        val sampleRate = TtsEngine.tts!!.sampleRate()
+        val bufLength = AudioTrack.getMinBufferSize(
+            sampleRate,
+            AudioFormat.CHANNEL_OUT_MONO,
+            AudioFormat.ENCODING_PCM_FLOAT
+        )
+        Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")
+
+        val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+            .setUsage(AudioAttributes.USAGE_MEDIA)
+            .build()
+
+        val format = AudioFormat.Builder()
+            .setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
+            .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
+            .setSampleRate(sampleRate)
+            .build()
+
+        track = AudioTrack(
+            attr, format, bufLength, AudioTrack.MODE_STREAM,
+            AudioManager.AUDIO_SESSION_ID_GENERATE
+        )
+        track.play()
+    }
 }
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
index bc6a22c571..b510f97d39 120000
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/Tts.kt
@@ -1 +1 @@
-../../../../../../../../../../../SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
\ No newline at end of file
+../../../../../../../../../../../../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
index 480f8a384e..2ae628c271 100644
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
@@ -1,5 +1,6 @@
 package com.k2fsa.sherpa.onnx.tts.engine
 
+import PreferenceHelper
 import android.content.Context
 import android.content.res.AssetManager
 import android.util.Log
@@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
 import java.io.File
 import java.io.FileOutputStream
 import java.io.IOException
-import PreferenceHelper
 
 object TtsEngine {
     var tts: OfflineTts? = null
@@ -41,6 +41,9 @@ object TtsEngine {
 
     private var modelDir: String? = null
     private var modelName: String? = null
+    private var acousticModelName: String? = null // for matcha tts
+    private var vocoder: String? = null // for matcha tts
+    private var voices: String? = null // for kokoro
     private var ruleFsts: String? = null
     private var ruleFars: String? = null
     private var lexicon: String? = null
@@ -52,8 +55,21 @@ object TtsEngine {
         // The purpose of such a design is to make the CI test easier
         // Please see
         // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
-        modelDir = null
+        //
+        // For VITS -- begin
         modelName = null
+        // For VITS -- end
+
+        // For Matcha -- begin
+        acousticModelName = null
+        vocoder = null
+        // For Matcha -- end
+
+        // For Kokoro -- begin
+        voices = null
+        // For Kokoro -- end
+
+        modelDir = null
         ruleFsts = null
         ruleFars = null
         lexicon = null
@@ -82,7 +98,6 @@ object TtsEngine {
         // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
         // modelDir = "vits-icefall-zh-aishell3"
         // modelName = "model.onnx"
-        // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
         // ruleFars = "vits-icefall-zh-aishell3/rule.far"
         // lexicon = "lexicon.txt"
         // lang = "zho"
@@ -101,8 +116,57 @@ object TtsEngine {
         // modelDir = "vits-coqui-de-css10"
         // modelName = "model.onnx"
         // lang = "deu"
-    }
 
+        // Example 6
+        // vits-melo-tts-zh_en
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
+        // modelDir = "vits-melo-tts-zh_en"
+        // modelName = "model.onnx"
+        // lexicon = "lexicon.txt"
+        // dictDir = "vits-melo-tts-zh_en/dict"
+        // lang = "zho"
+
+        // Example 7
+        // matcha-icefall-zh-baker
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+        // modelDir = "matcha-icefall-zh-baker"
+        // acousticModelName = "model-steps-3.onnx"
+        // vocoder = "hifigan_v2.onnx"
+        // lexicon = "lexicon.txt"
+        // dictDir = "matcha-icefall-zh-baker/dict"
+        // lang = "zho"
+
+        // Example 8
+        // matcha-icefall-en_US-ljspeech
+        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+        // modelDir = "matcha-icefall-en_US-ljspeech"
+        // acousticModelName = "model-steps-3.onnx"
+        // vocoder = "hifigan_v2.onnx"
+        // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
+        // lang = "eng"
+
+        // Example 9
+        // kokoro-en-v0_19
+        // modelDir = "kokoro-en-v0_19"
+        // modelName = "model.onnx"
+        // voices = "voices.bin"
+        // dataDir = "kokoro-en-v0_19/espeak-ng-data"
+        // lang = "eng"
+
+        // Example 10
+        // kokoro-multi-lang-v1_0
+        // modelDir = "kokoro-multi-lang-v1_0"
+        // modelName = "model.onnx"
+        // voices = "voices.bin"
+        // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
+        // dictDir = "kokoro-multi-lang-v1_0/dict"
+        // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
+        // lang = "eng"
+        // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
+        //
+        // This model supports many languages, e.g., English, Chinese, etc.
+        // We set lang to eng here.
+    }
 
     fun createTts(context: Context) {
         Log.i(TAG, "Init Next-gen Kaldi TTS")
@@ -115,22 +179,25 @@ object TtsEngine {
         assets = context.assets
 
         if (dataDir != null) {
-            val newDir = copyDataDir(context, modelDir!!)
-            modelDir = "$newDir/$modelDir"
+            val newDir = copyDataDir(context, dataDir!!)
             dataDir = "$newDir/$dataDir"
-            assets = null
         }
 
         if (dictDir != null) {
-            val newDir = copyDataDir(context, modelDir!!)
-            modelDir = "$newDir/$modelDir"
-            dictDir = "$modelDir/dict"
-            ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
-            assets = null
+            val newDir = copyDataDir(context, dictDir!!)
+            dictDir = "$newDir/$dictDir"
+            if (ruleFsts == null) {
+                ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
+            }
         }
 
         val config = getOfflineTtsConfig(
-            modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
+            modelDir = modelDir!!,
+            modelName = modelName ?: "",
+            acousticModelName = acousticModelName ?: "",
+            vocoder = vocoder ?: "",
+            voices = voices ?: "",
+            lexicon = lexicon ?: "",
             dataDir = dataDir ?: "",
             dictDir = dictDir ?: "",
             ruleFsts = ruleFsts ?: "",
diff --git a/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml b/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
index ac28473148..67518e0a38 100755
--- a/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
+++ b/android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
@@ -1,3 +1,3 @@
 <resources>
-    <string name="app_name">TTS Engine</string>
+    <string name="app_name">TTS Engine: Next-gen Kaldi</string>
 </resources>
\ No newline at end of file
diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
index fb14d072d3..2f65276153 100644
--- a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -19,6 +19,11 @@ import com.k2fsa.sherpa.onnx.Vad
 import com.k2fsa.sherpa.onnx.getFeatureConfig
 import com.k2fsa.sherpa.onnx.getOfflineModelConfig
 import com.k2fsa.sherpa.onnx.getVadModelConfig
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
 import kotlin.concurrent.thread
 
 
@@ -166,6 +171,8 @@ class MainActivity : AppCompatActivity() {
 
         val bufferSize = 512 // in samples
         val buffer = ShortArray(bufferSize)
+        val coroutineScope = CoroutineScope(Dispatchers.IO)
+
 
         while (isRecording) {
             val ret = audioRecord?.read(buffer, 0, buffer.size)
@@ -175,11 +182,15 @@ class MainActivity : AppCompatActivity() {
                 vad.acceptWaveform(samples)
                 while(!vad.empty()) {
                     var segment = vad.front()
-                    val text = runSecondPass(segment.samples)
-
-                    if (text.isNotBlank()) {
-                        lastText = "${lastText}\n${idx}: ${text}"
-                        idx += 1
+                    coroutineScope.launch {
+                        val text = runSecondPass(segment.samples)
+                        if (text.isNotBlank()) {
+                            withContext(Dispatchers.Main) {
+                                lastText = "${lastText}\n${idx}: ${text}"
+                                idx += 1
+                                textView.text = lastText.lowercase()
+                            }
+                        }
                     }
 
                     vad.pop();
@@ -192,6 +203,9 @@ class MainActivity : AppCompatActivity() {
                 }
             }
         }
+
+        // Clean up the coroutine scope when done
+        coroutineScope.cancel()
     }
 
     private fun initOfflineRecognizer() {
diff --git a/build-aarch64-linux-gnu.sh b/build-aarch64-linux-gnu.sh
index d9851fbe1b..cdc48e3729 100755
--- a/build-aarch64-linux-gnu.sh
+++ b/build-aarch64-linux-gnu.sh
@@ -1,4 +1,25 @@
 #!/usr/bin/env bash
+#
+# Usage of this file
+#
+# (1) Build CPU version of sherpa-onnx
+#    ./build-aarch64-linux-gnu.sh
+#
+# (2) Build GPU version of sherpa-onnx
+#
+#   (a) Make sure your board has NVIDIA GPU(s)
+#
+#   (b) For Jetson Nano B01 (using CUDA 10.2)
+#
+#       export SHERPA_ONNX_ENABLE_GPU=ON
+#       export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
+#       ./build-aarch64-linux-gnu.sh
+#
+#   (c) For Jetson Orin NX (using CUDA 11.4)
+#
+#       export SHERPA_ONNX_ENABLE_GPU=ON
+#       export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
+#       ./build-aarch64-linux-gnu.sh
 
 if command -v aarch64-none-linux-gnu-gcc  &> /dev/null; then
   ln -svf $(which aarch64-none-linux-gnu-gcc) ./aarch64-linux-gnu-gcc
@@ -44,6 +65,21 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then
   BUILD_SHARED_LIBS=OFF
 fi
 
+if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"" ]]; then
+  # By default, use CPU
+  SHERPA_ONNX_ENABLE_GPU=OFF
+fi
+
+if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then
+  # Build shared libs if building GPU is enabled.
+  BUILD_SHARED_LIBS=ON
+fi
+
+if [[ x"$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION" == x"" ]]; then
+  # Used only when SHERPA_ONNX_ENABLE_GPU is ON
+  SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION="1.11.0"
+fi
+
 cmake \
   -DBUILD_PIPER_PHONMIZE_EXE=OFF \
   -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
@@ -51,6 +87,7 @@ cmake \
   -DBUILD_ESPEAK_NG_TESTS=OFF \
   -DCMAKE_INSTALL_PREFIX=./install \
   -DCMAKE_BUILD_TYPE=Release \
+  -DSHERPA_ONNX_ENABLE_GPU=$SHERPA_ONNX_ENABLE_GPU \
   -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
   -DSHERPA_ONNX_ENABLE_TESTS=OFF \
   -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
@@ -59,6 +96,7 @@ cmake \
   -DSHERPA_ONNX_ENABLE_JNI=OFF \
   -DSHERPA_ONNX_ENABLE_C_API=ON \
   -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON \
+  -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION \
   -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \
   ..
 
diff --git a/build-android-arm64-v8a.sh b/build-android-arm64-v8a.sh
index 7967af018e..88ba09ef03 100755
--- a/build-android-arm64-v8a.sh
+++ b/build-android-arm64-v8a.sh
@@ -1,7 +1,26 @@
 #!/usr/bin/env bash
 set -ex
 
-dir=$PWD/build-android-arm64-v8a
+# If BUILD_SHARED_LIBS is ON, we use libonnxruntime.so
+# If BUILD_SHARED_LIBS is OFF, we use libonnxruntime.a
+#
+# In any case, we will have libsherpa-onnx-jni.so
+#
+# If BUILD_SHARED_LIBS is OFF, then libonnxruntime.a is linked into libsherpa-onnx-jni.so
+# and you only need to copy libsherpa-onnx-jni.so to your Android projects.
+#
+# If BUILD_SHARED_LIBS is ON, then you need to copy both libsherpa-onnx-jni.so
+# and libonnxruntime.so to your Android projects
+#
+if [ -z $BUILD_SHARED_LIBS ]; then
+  BUILD_SHARED_LIBS=ON
+fi
+
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  dir=$PWD/build-android-arm64-v8a
+else
+  dir=$PWD/build-android-arm64-v8a-static
+fi
 
 mkdir -p $dir
 cd $dir
@@ -21,6 +40,9 @@ cd $dir
 
 if [ -z $ANDROID_NDK ]; then
   ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/22.1.7171670
+  if [ $BUILD_SHARED_LIBS == OFF ]; then
+    ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/27.0.11718014
+  fi
   # or use
   # ANDROID_NDK=/star-fj/fangjun/software/android-ndk
   #
@@ -32,6 +54,10 @@ if [ -z $ANDROID_NDK ]; then
     # Tools -> SDK manager -> Android SDK
     # and set "Android SDK location" to /Users/fangjun/software/my-android
     ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+    if [ $BUILD_SHARED_LIBS == OFF ]; then
+      ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+    fi
   fi
 fi
 
@@ -44,17 +70,29 @@ echo "ANDROID_NDK: $ANDROID_NDK"
 sleep 1
 onnxruntime_version=1.17.1
 
-if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
-  mkdir -p $onnxruntime_version
-  pushd $onnxruntime_version
-  wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
-  unzip onnxruntime-android-${onnxruntime_version}.zip
-  rm onnxruntime-android-${onnxruntime_version}.zip
-  popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
+    mkdir -p $onnxruntime_version
+    pushd $onnxruntime_version
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+    unzip onnxruntime-android-${onnxruntime_version}.zip
+    rm onnxruntime-android-${onnxruntime_version}.zip
+    popd
+  fi
+
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/arm64-v8a/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+  if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+    unzip onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+    rm onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version}.zip
+    mv onnxruntime-android-arm64-v8a-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+  fi
 
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/arm64-v8a/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
 
 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -88,24 +126,46 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -DBUILD_SHARED_LIBS=ON \
+    -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
     -DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+    -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
     -DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
     -DCMAKE_INSTALL_PREFIX=./install \
     -DANDROID_ABI="arm64-v8a" \
     -DANDROID_PLATFORM=android-21 ..
 
+    # By default, it links to libc++_static.a
+    # -DANDROID_STL=c++_shared \
+
 # Please use -DANDROID_PLATFORM=android-27 if you want to use Android NNAPI
 
 # make VERBOSE=1 -j4
 make -j4
 make install/strip
-cp -fv $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so install/lib
+cp -fv $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
 rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+  cat >install/lib/README.md <<EOF
+# Introduction
+
+Note that if you use Android Studio, then you only need to
+copy libonnxruntime.so and libsherpa-onnx-jni.so
+to your jniLibs, and you don't need libsherpa-onnx-c-api.so or
+libsherpa-onnx-cxx-api.so.
+
+libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users
+who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed.
+
+In any case, libonnxruntime.is is always needed.
+EOF
+  ls -lh install/lib/README.md
+fi
 
 # To run the generated binaries on Android, please use the following steps.
 #
diff --git a/build-android-armv7-eabi.sh b/build-android-armv7-eabi.sh
index 390f5a8440..77d4615eb4 100755
--- a/build-android-armv7-eabi.sh
+++ b/build-android-armv7-eabi.sh
@@ -1,7 +1,26 @@
 #!/usr/bin/env bash
 set -ex
 
-dir=$PWD/build-android-armv7-eabi
+# If BUILD_SHARED_LIBS is ON, we use libonnxruntime.so
+# If BUILD_SHARED_LIBS is OFF, we use libonnxruntime.a
+#
+# In any case, we will have libsherpa-onnx-jni.so
+#
+# If BUILD_SHARED_LIBS is OFF, then libonnxruntime.a is linked into libsherpa-onnx-jni.so
+# and you only need to copy libsherpa-onnx-jni.so to your Android projects.
+#
+# If BUILD_SHARED_LIBS is ON, then you need to copy both libsherpa-onnx-jni.so
+# and libonnxruntime.so to your Android projects
+#
+if [ -z $BUILD_SHARED_LIBS ]; then
+  BUILD_SHARED_LIBS=ON
+fi
+
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  dir=$PWD/build-android-armv7-eabi
+else
+  dir=$PWD/build-android-armv7-eabi-static
+fi
 
 mkdir -p $dir
 cd $dir
@@ -21,6 +40,9 @@ cd $dir
 
 if [ -z $ANDROID_NDK ]; then
   ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/22.1.7171670
+  if [ $BUILD_SHARED_LIBS == OFF ]; then
+    ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/27.0.11718014
+  fi
   # or use
   # ANDROID_NDK=/star-fj/fangjun/software/android-ndk
   #
@@ -32,6 +54,10 @@ if [ -z $ANDROID_NDK ]; then
     # Tools -> SDK manager -> Android SDK
     # and set "Android SDK location" to /Users/fangjun/software/my-android
     ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+    if [ $BUILD_SHARED_LIBS == OFF ]; then
+      ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+    fi
   fi
 fi
 
@@ -45,17 +71,29 @@ sleep 1
 
 onnxruntime_version=1.17.1
 
-if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
-  mkdir -p $onnxruntime_version
-  pushd $onnxruntime_version
-  wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
-  unzip onnxruntime-android-${onnxruntime_version}.zip
-  rm onnxruntime-android-${onnxruntime_version}.zip
-  popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
+    mkdir -p $onnxruntime_version
+    pushd $onnxruntime_version
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+    unzip onnxruntime-android-${onnxruntime_version}.zip
+    rm onnxruntime-android-${onnxruntime_version}.zip
+    popd
+  fi
 
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/armeabi-v7a/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/armeabi-v7a/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+  if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+    unzip onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+    rm onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version}.zip
+    mv onnxruntime-android-armeabi-v7a-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+  fi
+
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
 
 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -89,18 +127,42 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -DBUILD_SHARED_LIBS=ON \
+    -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
     -DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+    -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
     -DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
     -DCMAKE_INSTALL_PREFIX=./install \
     -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \
     -DANDROID_PLATFORM=android-21 ..
+
+    # By default, it links to libc++_static.a
+    # -DANDROID_STL=c++_shared \
+
 # make VERBOSE=1 -j4
 make -j4
 make install/strip
-cp -fv $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so install/lib
+cp -fv $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
 rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+  cat >install/lib/README.md <<EOF
+# Introduction
+
+Note that if you use Android Studio, then you only need to
+copy libonnxruntime.so and libsherpa-onnx-jni.so
+to your jniLibs, and you don't need libsherpa-onnx-c-api.so or
+libsherpa-onnx-cxx-api.so.
+
+libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users
+who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed.
+
+In any case, libonnxruntime.is is always needed.
+EOF
+  ls -lh install/lib/README.md
+fi
diff --git a/build-android-x86-64.sh b/build-android-x86-64.sh
index 3743842cc8..b0f898ebdf 100755
--- a/build-android-x86-64.sh
+++ b/build-android-x86-64.sh
@@ -1,7 +1,26 @@
 #!/usr/bin/env bash
 set -ex
 
-dir=$PWD/build-android-x86-64
+# If BUILD_SHARED_LIBS is ON, we use libonnxruntime.so
+# If BUILD_SHARED_LIBS is OFF, we use libonnxruntime.a
+#
+# In any case, we will have libsherpa-onnx-jni.so
+#
+# If BUILD_SHARED_LIBS is OFF, then libonnxruntime.a is linked into libsherpa-onnx-jni.so
+# and you only need to copy libsherpa-onnx-jni.so to your Android projects.
+#
+# If BUILD_SHARED_LIBS is ON, then you need to copy both libsherpa-onnx-jni.so
+# and libonnxruntime.so to your Android projects
+#
+if [ -z $BUILD_SHARED_LIBS ]; then
+  BUILD_SHARED_LIBS=ON
+fi
+
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  dir=$PWD/build-android-x86-64
+else
+  dir=$PWD/build-android-x86-64-static
+fi
 
 mkdir -p $dir
 cd $dir
@@ -21,6 +40,9 @@ cd $dir
 
 if [ -z $ANDROID_NDK ]; then
   ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/22.1.7171670
+  if [ $BUILD_SHARED_LIBS == OFF ]; then
+    ANDROID_NDK=/star-fj/fangjun/software/android-sdk/ndk/27.0.11718014
+  fi
   # or use
   # ANDROID_NDK=/star-fj/fangjun/software/android-ndk
   #
@@ -32,6 +54,10 @@ if [ -z $ANDROID_NDK ]; then
     # Tools -> SDK manager -> Android SDK
     # and set "Android SDK location" to /Users/fangjun/software/my-android
     ANDROID_NDK=/Users/fangjun/software/my-android/ndk/22.1.7171670
+
+    if [ $BUILD_SHARED_LIBS == OFF ]; then
+      ANDROID_NDK=/Users/fangjun/software/my-android/ndk/27.0.11718014
+    fi
   fi
 fi
 
@@ -45,17 +71,29 @@ sleep 1
 
 onnxruntime_version=1.17.1
 
-if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
-  mkdir -p $onnxruntime_version
-  pushd $onnxruntime_version
-  wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
-  unzip onnxruntime-android-${onnxruntime_version}.zip
-  rm onnxruntime-android-${onnxruntime_version}.zip
-  popd
-fi
+if [ $BUILD_SHARED_LIBS == ON ]; then
+  if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
+    mkdir -p $onnxruntime_version
+    pushd $onnxruntime_version
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
+    unzip onnxruntime-android-${onnxruntime_version}.zip
+    rm onnxruntime-android-${onnxruntime_version}.zip
+    popd
+  fi
 
-export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/x86_64/
-export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version/jni/x86_64/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
+else
+  if [ ! -f ${onnxruntime_version}-static/lib/libonnxruntime.a ]; then
+    wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+    unzip onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+    rm onnxruntime-android-x86_64-static_lib-${onnxruntime_version}.zip
+    mv onnxruntime-android-x86_64-static_lib-${onnxruntime_version} ${onnxruntime_version}-static
+  fi
+
+  export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_version-static/lib/
+  export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version-static/include/
+fi
 
 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
@@ -89,20 +127,44 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -DBUILD_SHARED_LIBS=ON \
+    -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
     -DSHERPA_ONNX_ENABLE_JNI=$SHERPA_ONNX_ENABLE_JNI \
+    -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
     -DCMAKE_INSTALL_PREFIX=./install \
     -DANDROID_ABI="x86_64" \
     -DSHERPA_ONNX_ENABLE_C_API=$SHERPA_ONNX_ENABLE_C_API \
     -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
     -DANDROID_PLATFORM=android-21 ..
 
+    # By default, it links to libc++_static.a
+    # -DANDROID_STL=c++_shared \
+
 # make VERBOSE=1 -j4
 make -j4
 make install/strip
-cp -fv $onnxruntime_version/jni/x86_64/libonnxruntime.so install/lib
+
+cp -fv $onnxruntime_version/jni/x86_64/libonnxruntime.so install/lib 2>/dev/null || true
+rm -rf install/share
 rm -rf install/lib/pkgconfig
+rm -rf install/lib/lib*.a
+
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+  cat >install/lib/README.md <<EOF
+# Introduction
+
+Note that if you use Android Studio, then you only need to
+copy libonnxruntime.so and libsherpa-onnx-jni.so
+to your jniLibs, and you don't need libsherpa-onnx-c-api.so or
+libsherpa-onnx-cxx-api.so.
+
+libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users
+who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed.
+
+In any case, libonnxruntime.is is always needed.
+EOF
+  ls -lh install/lib/README.md
+fi
diff --git a/build-android-x86.sh b/build-android-x86.sh
index f37f84c4e4..a05f3667e0 100755
--- a/build-android-x86.sh
+++ b/build-android-x86.sh
@@ -1,6 +1,12 @@
 #!/usr/bin/env bash
 set -ex
 
+if [ x$BUILD_SHARED_LIBS == xOFF ]; then
+  echo "BUILD_SHARED_LIBS=OFF is ignored for Android x86."
+  echo "Always link with libonnxruntime.so"
+  sleep 2
+fi
+
 dir=$PWD/build-android-x86
 
 mkdir -p $dir
@@ -106,3 +112,20 @@ make -j4
 make install/strip
 cp -fv $onnxruntime_version/jni/x86/libonnxruntime.so install/lib
 rm -rf install/lib/pkgconfig
+
+if [ -f install/lib/libsherpa-onnx-c-api.so ]; then
+  cat >install/lib/README.md <<EOF
+# Introduction
+
+Note that if you use Android Studio, then you only need to
+copy libonnxruntime.so and libsherpa-onnx-jni.so
+to your jniLibs, and you don't need libsherpa-onnx-c-api.so or
+libsherpa-onnx-cxx-api.so.
+
+libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users
+who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed.
+
+In any case, libonnxruntime.is is always needed.
+EOF
+  ls -lh install/lib/README.md
+fi
diff --git a/build-ios-shared.sh b/build-ios-shared.sh
index caf81decca..95ae2e1ab5 100755
--- a/build-ios-shared.sh
+++ b/build-ios-shared.sh
@@ -19,6 +19,12 @@ if [ "$SHERPA_ONNX_GITHUB_MIRROW" == true ]; then
     SHERPA_ONNX_GITHUB=hub.nuaa.cf
 fi
 
+if [ ! -z CMAKE_VERBOSE_MAKEFILE ]; then
+  CMAKE_VERBOSE_MAKEFILE=ON
+else
+  CMAKE_VERBOSE_MAKEFILE=OFF
+fi
+
 if [ ! -f $onnxruntime_dir/onnxruntime.xcframework/ios-arm64/onnxruntime.a ]; then
   mkdir -p $onnxruntime_dir
   pushd $onnxruntime_dir
@@ -50,7 +56,7 @@ if [[ ! -f build/simulator_x86_64/install/lib/libsherpa-onnx-c-api.dylib ]]; the
     -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
-    -S .. \
+    -S .. -D CMAKE_VERBOSE_MAKEFILE=$CMAKE_VERBOSE_MAKEFILE \
     -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
     -DPLATFORM=SIMULATOR64 \
     -DENABLE_BITCODE=0 \
@@ -70,6 +76,8 @@ if [[ ! -f build/simulator_x86_64/install/lib/libsherpa-onnx-c-api.dylib ]]; the
     -B build/simulator_x86_64
 
   cmake --build build/simulator_x86_64 -j 4 --target install
+else
+  echo "Skip building for simulator (x86_64)"
 fi
 
 echo "Building for simulator (arm64)"
@@ -81,7 +89,7 @@ if [[ ! -f build/simulator_arm64/install/lib/libsherpa-onnx-c-api.dylib ]]; then
     -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
-    -S .. \
+    -S .. -D CMAKE_VERBOSE_MAKEFILE=$CMAKE_VERBOSE_MAKEFILE \
     -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
     -DPLATFORM=SIMULATORARM64 \
     -DENABLE_BITCODE=0 \
@@ -101,6 +109,8 @@ if [[ ! -f build/simulator_arm64/install/lib/libsherpa-onnx-c-api.dylib ]]; then
     -B build/simulator_arm64
 
   cmake --build build/simulator_arm64 -j 4 --target install
+else
+  echo "Skip building for simulator (arm64)"
 fi
 
 echo "Building for arm64"
@@ -114,7 +124,7 @@ if [[ ! -f build/os64/install/lib/libsherpa-onnx-c-api.dylib ]]; then
     -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
     -DBUILD_ESPEAK_NG_EXE=OFF \
     -DBUILD_ESPEAK_NG_TESTS=OFF \
-    -S .. \
+    -S .. -D CMAKE_VERBOSE_MAKEFILE=$CMAKE_VERBOSE_MAKEFILE \
     -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
     -DPLATFORM=OS64 \
     -DENABLE_BITCODE=0 \
@@ -134,11 +144,156 @@ if [[ ! -f build/os64/install/lib/libsherpa-onnx-c-api.dylib ]]; then
     -B build/os64
 
   cmake --build build/os64 -j 4 --target install
+else
+  echo "Skip building for arm64"
 fi
 
 echo "Collect dynamic libraries "
 mkdir -p ios-arm64 ios-arm64-simulator ios-x86_64-simulator
 
 cp -v ./build/os64/install/lib/libsherpa-onnx-c-api.dylib ios-arm64/
-cp -v ./build/simulator_arm64/install/lib/libsherpa-onnx-c-api.dylib ios-arm64-simulator
-cp -v .//build/simulator_x86_64/install/lib/libsherpa-onnx-c-api.dylib ios-x86_64-simulator
+cp -v ./build/simulator_arm64/install/lib/libsherpa-onnx-c-api.dylib ios-arm64-simulator/
+cp -v .//build/simulator_x86_64/install/lib/libsherpa-onnx-c-api.dylib ios-x86_64-simulator/
+
+# see https://github.com/k2-fsa/sherpa-onnx/issues/1172#issuecomment-2439662662
+rm -rf ios-arm64_x86_64-simulator
+mkdir ios-arm64_x86_64-simulator
+
+lipo \
+  -create \
+    ios-arm64-simulator/libsherpa-onnx-c-api.dylib \
+    ios-x86_64-simulator/libsherpa-onnx-c-api.dylib \
+  -output \
+    ios-arm64_x86_64-simulator/libsherpa-onnx-c-api.dylib
+
+pushd ios-arm64
+rm -rf sherpa_onnx.framework
+mkdir sherpa_onnx.framework
+
+lipo \
+  -create \
+    libsherpa-onnx-c-api.dylib \
+  -output \
+    sherpa_onnx
+
+mv sherpa_onnx sherpa_onnx.framework/
+cd sherpa_onnx.framework
+
+install_name_tool \
+  -change @rpath/libsherpa-onnx-c-api.dylib @rpath/sherpa_onnx.framework/sherpa_onnx \
+  sherpa_onnx
+
+install_name_tool \
+  -id "@rpath/sherpa_onnx.framework/sherpa_onnx" \
+  sherpa_onnx
+
+chmod +x sherpa_onnx
+popd
+
+pushd ios-arm64_x86_64-simulator
+rm -rf sherpa_onnx.framework
+mkdir sherpa_onnx.framework
+
+lipo \
+  -create \
+    libsherpa-onnx-c-api.dylib \
+  -output \
+    sherpa_onnx
+
+mv sherpa_onnx sherpa_onnx.framework/
+cd sherpa_onnx.framework
+install_name_tool \
+  -change @rpath/libsherpa-onnx-c-api.dylib @rpath/sherpa_onnx.framework/sherpa_onnx \
+  sherpa_onnx
+
+install_name_tool \
+  -id "@rpath/sherpa_onnx.framework/sherpa_onnx" \
+  sherpa_onnx
+
+chmod +x sherpa_onnx
+popd
+
+for d in ios-arm64_x86_64-simulator ios-arm64; do
+  dst=$d/sherpa_onnx.framework
+
+  # The Info.plist is modified from
+  # https://github.com/Spicely/flutter_openim_sdk_ffi/blob/main/ios/openim_sdk_ffi.framework/Info.plist
+  cat >$dst/Info.plist <<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleName</key>
+	<string>sherpa_onnx</string>
+	<key>DTSDKName</key>
+	<string>iphoneos17.0</string>
+	<key>DTXcode</key>
+	<string>1501</string>
+	<key>DTSDKBuild</key>
+	<string>21A326</string>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>BuildMachineOSBuild</key>
+	<string>23B81</string>
+	<key>DTPlatformName</key>
+	<string>iphoneos</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.10.42</string>
+	<key>CFBundleSupportedPlatforms</key>
+	<array>
+		<string>iPhoneOS</string>
+	</array>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleExecutable</key>
+	<string>sherpa_onnx</string>
+	<key>DTCompiler</key>
+	<string>com.apple.compilers.llvm.clang.1_0</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>arm64</string>
+	</array>
+	<key>MinimumOSVersion</key>
+	<string>13.0</string>
+	<key>CFBundleIdentifier</key>
+	<string>com.k2fsa.sherpa.onnx</string>
+	<key>UIDeviceFamily</key>
+	<array>
+		<integer>1</integer>
+		<integer>2</integer>
+	</array>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>DTPlatformVersion</key>
+	<string>17.0</string>
+	<key>DTXcodeBuild</key>
+	<string>15A507</string>
+	<key>DTPlatformBuild</key>
+	<string>21A326</string>
+	<key>SupportedArchitectures</key>
+	<array>
+		<string>arm64</string>
+		<string>x86_64</string>
+	</array>
+	<key>SupportedPlatform</key>
+	<string>ios</string>
+</dict>
+</plist>
+EOF
+done
+
+rm -rf sherpa_onnx.xcframework
+xcodebuild -create-xcframework \
+  -framework ios-arm64/sherpa_onnx.framework \
+  -framework ios-arm64_x86_64-simulator/sherpa_onnx.framework \
+  -output sherpa_onnx.xcframework
+
+cd sherpa_onnx.xcframework
+echo "PWD: $PWD"
+ls -lh
+echo "---"
+ls -lh */*
diff --git a/build-ohos-arm64-v8a.sh b/build-ohos-arm64-v8a.sh
new file mode 100755
index 0000000000..4e0ecbb299
--- /dev/null
+++ b/build-ohos-arm64-v8a.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-arm64-v8a
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+  OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  # You can find the following content inside OHOS_SDK_NATIVE_DIR
+  # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  # total 524K
+  # -rw-r--r--  1 kuangfangjun root 501K Jan  1  2001 NOTICE.txt
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build-tools
+  # -rw-r--r--  1 kuangfangjun root  371 Jan  1  2001 compatible_config.json
+  # drwxr-xr-x  4 kuangfangjun root    0 Nov  6 22:36 docs
+  # drwxr-xr-x 10 kuangfangjun root    0 Nov  6 22:36 llvm
+  # -rw-r--r--  1 kuangfangjun root  16K Jan  1  2001 nativeapi_syscap_config.json
+  # -rw-r--r--  1 kuangfangjun root 5.9K Jan  1  2001 ndk_system_capability.json
+  # -rw-r--r--  1 kuangfangjun root  167 Jan  1  2001 oh-uni-package.json
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+  OHOS_SDK_NATIVE_DIR=/Users/fangjun/software/command-line-tools/sdk/default/openharmony/native
+  # (py38) fangjuns-MacBook-Pro:software fangjun$ ls -lh command-line-tools/sdk/default/openharmony/native/
+  # total 752
+  # -rw-r--r--   1 fangjun  staff   341K Jan  1  2001 NOTICE.txt
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:17 build
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:18 build-tools
+  # -rw-r--r--   1 fangjun  staff   371B Jan  1  2001 compatible_config.json
+  # drwxr-xr-x  10 fangjun  staff   320B Nov  6 21:18 llvm
+  # -rw-r--r--   1 fangjun  staff    16K Jan  1  2001 nativeapi_syscap_config.json
+  # -rw-r--r--   1 fangjun  staff   5.9K Jan  1  2001 ndk_system_capability.json
+  # -rw-r--r--   1 fangjun  staff   167B Jan  1  2001 oh-uni-package.json
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:17 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/aarch64-unknown-linux-ohos-clang ]; then
+  echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/aarch64-unknown-linux-ohos-clang does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+  echo "$OHOS_TOOLCHAIN_FILE does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-arm64-v8a-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+  # wget -c  https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+  wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+  unzip $onnxruntime_dir.zip
+  rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+  SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+  SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+  SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+cmake \
+    -DOHOS_ARCH=arm64-v8a \
+    -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+    -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+    -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+    -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+    -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+    -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+    -DBUILD_ESPEAK_NG_EXE=OFF \
+    -DBUILD_ESPEAK_NG_TESTS=OFF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=OFF \
+    -DSHERPA_ONNX_ENABLE_C_API=ON \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
+
+d=../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/arm64-v8a
+if [ -d $d ]; then
+  cp -v install/lib/libsherpa-onnx-c-api.so $d/
+  cp -v install/lib/libonnxruntime.so $d/
+fi
diff --git a/build-ohos-armeabi-v7a.sh b/build-ohos-armeabi-v7a.sh
new file mode 100755
index 0000000000..e0a2ac8839
--- /dev/null
+++ b/build-ohos-armeabi-v7a.sh
@@ -0,0 +1,126 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-armeabi-v7a
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+  OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+  # You can find the following content inside OHOS_SDK_NATIVE_DIR
+  # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  # total 524K
+  # -rw-r--r--  1 kuangfangjun root 501K Jan  1  2001 NOTICE.txt
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build-tools
+  # -rw-r--r--  1 kuangfangjun root  371 Jan  1  2001 compatible_config.json
+  # drwxr-xr-x  4 kuangfangjun root    0 Nov  6 22:36 docs
+  # drwxr-xr-x 10 kuangfangjun root    0 Nov  6 22:36 llvm
+  # -rw-r--r--  1 kuangfangjun root  16K Jan  1  2001 nativeapi_syscap_config.json
+  # -rw-r--r--  1 kuangfangjun root 5.9K Jan  1  2001 ndk_system_capability.json
+  # -rw-r--r--  1 kuangfangjun root  167 Jan  1  2001 oh-uni-package.json
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/armv7-unknown-linux-ohos-clang ]; then
+  echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/armv7-unknown-linux-ohos-clang does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+  echo "$OHOS_TOOLCHAIN_FILE does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-armeabi-v7a-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+  # wget -c https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+  wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+  unzip $onnxruntime_dir.zip
+  rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+  SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+  SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+  SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+# See https://github.com/llvm/llvm-project/issues/57732
+# we need to use -mfloat-abi=hard
+cmake \
+    -DOHOS_ARCH=armeabi-v7a \
+    -DCMAKE_CXX_FLAGS="-mfloat-abi=hard" \
+    -DCMAKE_C_FLAGS="-mfloat-abi=hard" \
+    -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+    -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+    -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+    -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+    -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+    -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+    -DBUILD_ESPEAK_NG_EXE=OFF \
+    -DBUILD_ESPEAK_NG_TESTS=OFF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=OFF \
+    -DSHERPA_ONNX_ENABLE_C_API=ON \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
diff --git a/build-ohos-x86-64.sh b/build-ohos-x86-64.sh
new file mode 100755
index 0000000000..9584edafc3
--- /dev/null
+++ b/build-ohos-x86-64.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+set -ex
+
+dir=$PWD/build-ohos-x86-64
+
+mkdir -p $dir
+cd $dir
+
+# Please first download the commandline tools from
+# https://developer.huawei.com/consumer/cn/download/
+#
+# Example filename on Linux: commandline-tools-linux-x64-5.0.5.200.zip
+# You can also download it from https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main
+
+# mkdir /star-fj/fangjun/software/huawei
+# cd /star-fj/fangjun/software/huawei
+# wget https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
+# unzip commandline-tools-linux-x64-5.0.5.200.zip
+# rm commandline-tools-linux-x64-5.0.5.200.zip
+if [ -z $OHOS_SDK_NATIVE_DIR ]; then
+  OHOS_SDK_NATIVE_DIR=/star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  # You can find the following content inside OHOS_SDK_NATIVE_DIR
+  # ls -lh /star-fj/fangjun/software/huawei/command-line-tools/sdk/default/openharmony/native/
+  # total 524K
+  # -rw-r--r--  1 kuangfangjun root 501K Jan  1  2001 NOTICE.txt
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 build-tools
+  # -rw-r--r--  1 kuangfangjun root  371 Jan  1  2001 compatible_config.json
+  # drwxr-xr-x  4 kuangfangjun root    0 Nov  6 22:36 docs
+  # drwxr-xr-x 10 kuangfangjun root    0 Nov  6 22:36 llvm
+  # -rw-r--r--  1 kuangfangjun root  16K Jan  1  2001 nativeapi_syscap_config.json
+  # -rw-r--r--  1 kuangfangjun root 5.9K Jan  1  2001 ndk_system_capability.json
+  # -rw-r--r--  1 kuangfangjun root  167 Jan  1  2001 oh-uni-package.json
+  # drwxr-xr-x  3 kuangfangjun root    0 Nov  6 22:36 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+  OHOS_SDK_NATIVE_DIR=/Users/fangjun/software/command-line-tools/sdk/default/openharmony/native
+  # (py38) fangjuns-MacBook-Pro:software fangjun$ ls -lh command-line-tools/sdk/default/openharmony/native/
+  # total 752
+  # -rw-r--r--   1 fangjun  staff   341K Jan  1  2001 NOTICE.txt
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:17 build
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:18 build-tools
+  # -rw-r--r--   1 fangjun  staff   371B Jan  1  2001 compatible_config.json
+  # drwxr-xr-x  10 fangjun  staff   320B Nov  6 21:18 llvm
+  # -rw-r--r--   1 fangjun  staff    16K Jan  1  2001 nativeapi_syscap_config.json
+  # -rw-r--r--   1 fangjun  staff   5.9K Jan  1  2001 ndk_system_capability.json
+  # -rw-r--r--   1 fangjun  staff   167B Jan  1  2001 oh-uni-package.json
+  # drwxr-xr-x   3 fangjun  staff    96B Nov  6 21:17 sysroot
+fi
+
+if [ ! -d $OHOS_SDK_NATIVE_DIR ]; then
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+if [ ! -f $OHOS_SDK_NATIVE_DIR/llvm/bin/x86_64-unknown-linux-ohos-clang ]; then
+  echo "$OHOS_SDK_NATIVE_DIR/llvm/bin/x86_64-unknown-linux-ohos-clang does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+export PATH=$OHOS_SDK_NATIVE_DIR/build-tools/cmake/bin:$PATH
+export PATH=$OHOS_SDK_NATIVE_DIR/llvm/bin:$PATH
+
+OHOS_TOOLCHAIN_FILE=$OHOS_SDK_NATIVE_DIR/build/cmake/ohos.toolchain.cmake
+
+if [ ! -f $OHOS_TOOLCHAIN_FILE ]; then
+  echo "$OHOS_TOOLCHAIN_FILE does not exist"
+  echo "Please first download Command Line Tools for HarmonyOS"
+  exit 1
+fi
+
+sleep 1
+onnxruntime_version=1.16.3
+onnxruntime_dir=onnxruntime-ohos-x86_64-$onnxruntime_version
+
+if [ ! -f $onnxruntime_dir/lib/libonnxruntime.so ]; then
+  # wget -c https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/$onnxruntime_dir.zip
+  wget -c https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/$onnxruntime_dir.zip
+  unzip $onnxruntime_dir.zip
+  rm $onnxruntime_dir.zip
+fi
+
+export SHERPA_ONNXRUNTIME_LIB_DIR=$dir/$onnxruntime_dir/lib
+export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_dir/include
+
+echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
+echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
+
+if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
+  SHERPA_ONNX_ENABLE_TTS=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ]; then
+  SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=ON
+fi
+
+if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
+  SHERPA_ONNX_ENABLE_BINARY=OFF
+fi
+
+cmake \
+    -DOHOS_ARCH=x86_64 \
+    -DCMAKE_TOOLCHAIN_FILE=$OHOS_TOOLCHAIN_FILE \
+    -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
+    -DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=$SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION \
+    -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
+    -DBUILD_PIPER_PHONMIZE_EXE=OFF \
+    -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
+    -DBUILD_ESPEAK_NG_EXE=OFF \
+    -DBUILD_ESPEAK_NG_TESTS=OFF \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=OFF \
+    -DSHERPA_ONNX_ENABLE_C_API=ON \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    ..
+
+# make VERBOSE=1 -j4
+make -j2
+make install/strip
+cp -fv $onnxruntime_dir/lib/libonnxruntime.so install/lib
+
+rm -rf install/share
+rm -rf install/lib/pkgconfig
+
+d=../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/x86_64
+if [ -d $d ]; then
+  cp -v install/lib/libsherpa-onnx-c-api.so $d/
+  cp -v install/lib/libonnxruntime.so $d/
+fi
diff --git a/build-swift-macos.sh b/build-swift-macos.sh
index f41dd7d5cb..359ea93714 100755
--- a/build-swift-macos.sh
+++ b/build-swift-macos.sh
@@ -7,6 +7,9 @@ mkdir -p $dir
 cd $dir
 
 cmake \
+  -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+  -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
+  -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
   -DCMAKE_INSTALL_PREFIX=./install \
   -DCMAKE_BUILD_TYPE=Release \
   -DBUILD_SHARED_LIBS=OFF \
@@ -21,6 +24,7 @@ cmake \
 
 make VERBOSE=1 -j4
 make install
+rm -fv ./install/include/cargs.h
 
 libtool -static -o ./install/lib/libsherpa-onnx.a \
   ./install/lib/libsherpa-onnx-c-api.a \
@@ -34,3 +38,8 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
   ./install/lib/libpiper_phonemize.a \
   ./install/lib/libespeak-ng.a \
   ./install/lib/libssentencepiece_core.a
+
+xcodebuild -create-xcframework \
+  -library install/lib/libsherpa-onnx.a \
+  -headers install/include \
+  -output sherpa-onnx.xcframework
diff --git a/build-wasm-simd-asr.sh b/build-wasm-simd-asr.sh
index eda18f74d7..c195393325 100755
--- a/build-wasm-simd-asr.sh
+++ b/build-wasm-simd-asr.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/build-wasm-simd-kws.sh b/build-wasm-simd-kws.sh
index 6fdf8218f3..408fd75a8b 100755
--- a/build-wasm-simd-kws.sh
+++ b/build-wasm-simd-kws.sh
@@ -9,12 +9,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/build-wasm-simd-nodejs.sh b/build-wasm-simd-nodejs.sh
index 3ad88d5d4c..43023cbedb 100755
--- a/build-wasm-simd-nodejs.sh
+++ b/build-wasm-simd-nodejs.sh
@@ -16,12 +16,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/build-wasm-simd-speaker-diarization.sh b/build-wasm-simd-speaker-diarization.sh
new file mode 100755
index 0000000000..888abb566e
--- /dev/null
+++ b/build-wasm-simd-speaker-diarization.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# Copyright (c)  2024  Xiaomi Corporation
+#
+# This script is to build sherpa-onnx for WebAssembly (speaker diarization)
+
+set -ex
+
+if [ x"$EMSCRIPTEN" == x"" ]; then
+  if ! command -v emcc &> /dev/null; then
+    echo "Please install emscripten first"
+    echo ""
+    echo "You can use the following commands to install it:"
+    echo ""
+    echo "git clone https://github.com/emscripten-core/emsdk.git"
+    echo "cd emsdk"
+    echo "git pull"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
+    echo "source ./emsdk_env.sh"
+    exit 1
+  else
+    EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
+  fi
+fi
+
+export EMSCRIPTEN=$EMSCRIPTEN
+echo "EMSCRIPTEN: $EMSCRIPTEN"
+if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+  echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+  echo "Please make sure you have installed emsdk correctly"
+  exit 1
+fi
+
+mkdir -p build-wasm-simd-speaker-diarization
+pushd build-wasm-simd-speaker-diarization
+
+export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+
+cmake \
+  -DCMAKE_INSTALL_PREFIX=./install \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+  \
+  -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+  -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+  -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+  -DBUILD_SHARED_LIBS=OFF \
+  -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+  -DSHERPA_ONNX_ENABLE_JNI=OFF \
+  -DSHERPA_ONNX_ENABLE_C_API=ON \
+  -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+  -DSHERPA_ONNX_ENABLE_GPU=OFF \
+  -DSHERPA_ONNX_ENABLE_WASM=ON \
+  -DSHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION=ON \
+  -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+  -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+  ..
+make -j2
+make install
+
+ls -lh install/bin/wasm/speaker-diarization
diff --git a/build-wasm-simd-tts.sh b/build-wasm-simd-tts.sh
index 6835e4c433..c707bef6e3 100755
--- a/build-wasm-simd-tts.sh
+++ b/build-wasm-simd-tts.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/build-wasm-simd-vad-asr.sh b/build-wasm-simd-vad-asr.sh
index 5d15cf6519..6219315500 100755
--- a/build-wasm-simd-vad-asr.sh
+++ b/build-wasm-simd-vad-asr.sh
@@ -15,12 +15,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/build-wasm-simd-vad.sh b/build-wasm-simd-vad.sh
index c74f57d373..2ab11249db 100755
--- a/build-wasm-simd-vad.sh
+++ b/build-wasm-simd-vad.sh
@@ -14,12 +14,13 @@ if [ x"$EMSCRIPTEN" == x"" ]; then
     echo "git clone https://github.com/emscripten-core/emsdk.git"
     echo "cd emsdk"
     echo "git pull"
-    echo "./emsdk install latest"
-    echo "./emsdk activate latest"
+    echo "./emsdk install 3.1.53"
+    echo "./emsdk activate 3.1.53"
     echo "source ./emsdk_env.sh"
     exit 1
   else
     EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+    emcc --version
   fi
 fi
 
diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt
index 0bf5264503..44c5814a53 100644
--- a/c-api-examples/CMakeLists.txt
+++ b/c-api-examples/CMakeLists.txt
@@ -4,9 +4,29 @@ include_directories(${CMAKE_SOURCE_DIR})
 add_executable(decode-file-c-api decode-file-c-api.c)
 target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
 
+add_executable(kws-c-api kws-c-api.c)
+target_link_libraries(kws-c-api sherpa-onnx-c-api)
+
 if(SHERPA_ONNX_ENABLE_TTS)
   add_executable(offline-tts-c-api offline-tts-c-api.c)
   target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
+
+  add_executable(matcha-tts-zh-c-api matcha-tts-zh-c-api.c)
+  target_link_libraries(matcha-tts-zh-c-api sherpa-onnx-c-api)
+
+  add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c)
+  target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api)
+
+  add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
+  target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)
+
+  add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c)
+  target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api)
+endif()
+
+if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+  add_executable(offline-speaker-diarization-c-api offline-speaker-diarization-c-api.c)
+  target_link_libraries(offline-speaker-diarization-c-api sherpa-onnx-c-api)
 endif()
 
 add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
@@ -30,6 +50,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
 add_executable(sense-voice-c-api sense-voice-c-api.c)
 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
 
+add_executable(moonshine-c-api moonshine-c-api.c)
+target_link_libraries(moonshine-c-api sherpa-onnx-c-api)
+
 add_executable(zipformer-c-api zipformer-c-api.c)
 target_link_libraries(zipformer-c-api sherpa-onnx-c-api)
 
@@ -48,6 +71,12 @@ target_link_libraries(telespeech-c-api sherpa-onnx-c-api)
 add_executable(vad-sense-voice-c-api vad-sense-voice-c-api.c)
 target_link_libraries(vad-sense-voice-c-api sherpa-onnx-c-api)
 
+add_executable(vad-whisper-c-api vad-whisper-c-api.c)
+target_link_libraries(vad-whisper-c-api sherpa-onnx-c-api)
+
+add_executable(vad-moonshine-c-api vad-moonshine-c-api.c)
+target_link_libraries(vad-moonshine-c-api sherpa-onnx-c-api)
+
 add_executable(streaming-zipformer-buffered-tokens-hotwords-c-api
                streaming-zipformer-buffered-tokens-hotwords-c-api.c)
 target_link_libraries(streaming-zipformer-buffered-tokens-hotwords-c-api sherpa-onnx-c-api)
diff --git a/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c b/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
index ec8be3b075..45a0bb87a0 100644
--- a/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
+++ b/c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
     fprintf(stderr, "Memory error\n");
     return -1;
   }
-  size_t read_bytes = fread(*buffer_out, 1, size, file);
+  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
   if (read_bytes != size) {
     printf("Errors occured in reading the file %s\n", filename);
     free((void *)*buffer_out);
diff --git a/c-api-examples/kokoro-tts-en-c-api.c b/c-api-examples/kokoro-tts-en-c-api.c
new file mode 100644
index 0000000000..44e6c28d89
--- /dev/null
+++ b/c-api-examples/kokoro-tts-en-c-api.c
@@ -0,0 +1,84 @@
+// c-api-examples/kokoro-tts-en-c-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+./kokoro-tts-en-c-api
+
+ */
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  SherpaOnnxOfflineTtsConfig config;
+  memset(&config, 0, sizeof(config));
+  config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
+  config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
+  config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
+  config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
+
+  config.model.num_threads = 2;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  const char *filename = "./generated-kokoro-en.wav";
+  const char *text =
+      "Today as always, men fall into two groups: slaves and free men. Whoever "
+      "does not have two-thirds of his day for himself, is a slave, whatever "
+      "he may be: a statesman, a businessman, an official, or a scholar. "
+      "Friends fell out often because life was changing so fast. The easiest "
+      "thing in the world was to lose touch with someone.";
+
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+  // mapping of sid to voice name
+  // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+  // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+                                                       ProgressCallback);
+#endif
+
+  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+  SherpaOnnxDestroyOfflineTts(tts);
+
+  fprintf(stderr, "Input text is: %s\n", text);
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename);
+
+  return 0;
+}
diff --git a/c-api-examples/kokoro-tts-zh-en-c-api.c b/c-api-examples/kokoro-tts-zh-en-c-api.c
new file mode 100644
index 0000000000..4d998fb71d
--- /dev/null
+++ b/c-api-examples/kokoro-tts-zh-en-c-api.c
@@ -0,0 +1,82 @@
+// c-api-examples/kokoro-tts-zh-en-c-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English + Chinese TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+./kokoro-tts-zh-en-c-api
+
+ */
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  SherpaOnnxOfflineTtsConfig config;
+  memset(&config, 0, sizeof(config));
+  config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
+  config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
+  config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+  config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+  config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
+  config.model.kokoro.lexicon =
+      "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
+      "lexicon-zh.txt";
+
+  config.model.num_threads = 2;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  const char *filename = "./generated-kokoro-zh-en.wav";
+  const char *text =
+      "中英文语音合成测试。This is generated by next generation Kaldi using "
+      "Kokoro without Misaki. 你觉得中英文说的如何呢？";
+
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+  int32_t sid = 0;    // there are 53 speakers
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+                                                       ProgressCallback);
+#endif
+
+  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+  SherpaOnnxDestroyOfflineTts(tts);
+
+  fprintf(stderr, "Input text is: %s\n", text);
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename);
+
+  return 0;
+}
diff --git a/c-api-examples/kws-c-api.c b/c-api-examples/kws-c-api.c
new file mode 100644
index 0000000000..8909809f43
--- /dev/null
+++ b/c-api-examples/kws-c-api.c
@@ -0,0 +1,152 @@
+// c-api-examples/kws-c-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file demonstrates how to use keywords spotter with sherpa-onnx's C
+// clang-format off
+//
+// Usage
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+//
+// ./kws-c-api
+//
+// clang-format on
+#include <stdio.h>
+#include <stdlib.h>  // exit
+#include <string.h>  // memset
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  SherpaOnnxKeywordSpotterConfig config;
+
+  memset(&config, 0, sizeof(config));
+  config.model_config.transducer.encoder =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+  config.model_config.transducer.decoder =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+
+  config.model_config.transducer.joiner =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+  config.model_config.tokens =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "tokens.txt";
+
+  config.model_config.provider = "cpu";
+  config.model_config.num_threads = 1;
+  config.model_config.debug = 1;
+
+  config.keywords_file =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "test_wavs/test_keywords.txt";
+
+  const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&config);
+  if (!kws) {
+    fprintf(stderr, "Please check your config");
+    exit(-1);
+  }
+
+  fprintf(stderr,
+          "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n");
+
+  const char *wav_filename =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "test_wavs/3.wav";
+
+  float tail_paddings[8000] = {0};  // 0.5 seconds
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    exit(-1);
+  }
+
+  const SherpaOnnxOnlineStream *stream = SherpaOnnxCreateKeywordStream(kws);
+  if (!stream) {
+    fprintf(stderr, "Failed to create stream\n");
+    exit(-1);
+  }
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+                                       wave->num_samples);
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+                                       sizeof(tail_paddings) / sizeof(float));
+  SherpaOnnxOnlineStreamInputFinished(stream);
+  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+    SherpaOnnxDecodeKeywordStream(kws, stream);
+    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+    if (r && r->json && strlen(r->keyword)) {
+      fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+      // Remember to reset the keyword stream right after a keyword is detected
+      SherpaOnnxResetKeywordStream(kws, stream);
+    }
+    SherpaOnnxDestroyKeywordResult(r);
+  }
+  SherpaOnnxDestroyOnlineStream(stream);
+
+  // --------------------------------------------------------------------------
+
+  fprintf(stderr, "--Use pre-defined keywords + add a new keyword--\n");
+
+  stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, "y ǎn y uán @演员");
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+                                       wave->num_samples);
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+                                       sizeof(tail_paddings) / sizeof(float));
+  SherpaOnnxOnlineStreamInputFinished(stream);
+  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+    SherpaOnnxDecodeKeywordStream(kws, stream);
+    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+    if (r && r->json && strlen(r->keyword)) {
+      fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+      // Remember to reset the keyword stream
+      SherpaOnnxResetKeywordStream(kws, stream);
+    }
+    SherpaOnnxDestroyKeywordResult(r);
+  }
+  SherpaOnnxDestroyOnlineStream(stream);
+
+  // --------------------------------------------------------------------------
+
+  fprintf(stderr, "--Use pre-defined keywords + add two new keywords--\n");
+
+  stream = SherpaOnnxCreateKeywordStreamWithKeywords(
+      kws, "y ǎn y uán @演员/zh ī m íng @知名");
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
+                                       wave->num_samples);
+
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
+                                       sizeof(tail_paddings) / sizeof(float));
+  SherpaOnnxOnlineStreamInputFinished(stream);
+  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
+    SherpaOnnxDecodeKeywordStream(kws, stream);
+    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
+    if (r && r->json && strlen(r->keyword)) {
+      fprintf(stderr, "Detected keyword: %s\n", r->json);
+
+      // Remember to reset the keyword stream
+      SherpaOnnxResetKeywordStream(kws, stream);
+    }
+    SherpaOnnxDestroyKeywordResult(r);
+  }
+  SherpaOnnxDestroyOnlineStream(stream);
+
+  SherpaOnnxFreeWave(wave);
+  SherpaOnnxDestroyKeywordSpotter(kws);
+
+  return 0;
+}
diff --git a/c-api-examples/matcha-tts-en-c-api.c b/c-api-examples/matcha-tts-en-c-api.c
new file mode 100644
index 0000000000..99b0a9742d
--- /dev/null
+++ b/c-api-examples/matcha-tts-en-c-api.c
@@ -0,0 +1,87 @@
+// c-api-examples/matcha-tts-en-c-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for English TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-en-c-api
+
+ */
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  SherpaOnnxOfflineTtsConfig config;
+  memset(&config, 0, sizeof(config));
+  config.model.matcha.acoustic_model =
+      "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
+
+  config.model.matcha.vocoder = "./hifigan_v2.onnx";
+
+  config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
+
+  config.model.matcha.data_dir =
+      "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
+
+  config.model.num_threads = 1;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  const char *filename = "./generated-matcha-en.wav";
+  const char *text =
+      "Today as always, men fall into two groups: slaves and free men. Whoever "
+      "does not have two-thirds of his day for himself, is a slave, whatever "
+      "he may be: a statesman, a businessman, an official, or a scholar. "
+      "Friends fell out often because life was changing so fast. The easiest "
+      "thing in the world was to lose touch with someone.";
+
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+                                                       ProgressCallback);
+#endif
+
+  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+  SherpaOnnxDestroyOfflineTts(tts);
+
+  fprintf(stderr, "Input text is: %s\n", text);
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename);
+
+  return 0;
+}
diff --git a/c-api-examples/matcha-tts-zh-c-api.c b/c-api-examples/matcha-tts-zh-c-api.c
new file mode 100644
index 0000000000..9fb9f4597d
--- /dev/null
+++ b/c-api-examples/matcha-tts-zh-c-api.c
@@ -0,0 +1,87 @@
+// c-api-examples/matcha-tts-zh-c-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx C API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-zh-c-api
+
+ */
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  SherpaOnnxOfflineTtsConfig config;
+  memset(&config, 0, sizeof(config));
+  config.model.matcha.acoustic_model =
+      "./matcha-icefall-zh-baker/model-steps-3.onnx";
+  config.model.matcha.vocoder = "./hifigan_v2.onnx";
+  config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
+  config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
+  config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
+  config.model.num_threads = 1;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  // clang-format off
+  config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
+  // clang-format on
+
+  const char *filename = "./generated-matcha-zh.wav";
+  const char *text =
+      "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如"
+      "涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感"
+      "受着生命的奇迹与温柔."
+      "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; "
+      "经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。";
+
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
+#else
+  const SherpaOnnxGeneratedAudio *audio =
+      SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
+                                                       ProgressCallback);
+#endif
+
+  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
+
+  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+  SherpaOnnxDestroyOfflineTts(tts);
+
+  fprintf(stderr, "Input text is: %s\n", text);
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename);
+
+  return 0;
+}
diff --git a/c-api-examples/moonshine-c-api.c b/c-api-examples/moonshine-c-api.c
new file mode 100644
index 0000000000..775dd24c98
--- /dev/null
+++ b/c-api-examples/moonshine-c-api.c
@@ -0,0 +1,83 @@
+// c-api-examples/moonshine-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use Moonshine tiny with sherpa-onnx's C API.
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  const char *wav_filename =
+      "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
+  const char *preprocessor =
+      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+  const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+  const char *uncached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+  const char *cached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+  const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    return -1;
+  }
+
+  // Offline model config
+  SherpaOnnxOfflineModelConfig offline_model_config;
+  memset(&offline_model_config, 0, sizeof(offline_model_config));
+  offline_model_config.debug = 1;
+  offline_model_config.num_threads = 1;
+  offline_model_config.provider = "cpu";
+  offline_model_config.tokens = tokens;
+  offline_model_config.moonshine.preprocessor = preprocessor;
+  offline_model_config.moonshine.encoder = encoder;
+  offline_model_config.moonshine.uncached_decoder = uncached_decoder;
+  offline_model_config.moonshine.cached_decoder = cached_decoder;
+
+  // Recognizer config
+  SherpaOnnxOfflineRecognizerConfig recognizer_config;
+  memset(&recognizer_config, 0, sizeof(recognizer_config));
+  recognizer_config.decoding_method = "greedy_search";
+  recognizer_config.model_config = offline_model_config;
+
+  const SherpaOnnxOfflineRecognizer *recognizer =
+      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+  if (recognizer == NULL) {
+    fprintf(stderr, "Please check your config!\n");
+    SherpaOnnxFreeWave(wave);
+    return -1;
+  }
+
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
+
+  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
+                                  wave->num_samples);
+  SherpaOnnxDecodeOfflineStream(recognizer, stream);
+  const SherpaOnnxOfflineRecognizerResult *result =
+      SherpaOnnxGetOfflineStreamResult(stream);
+
+  fprintf(stderr, "Decoded text: %s\n", result->text);
+
+  SherpaOnnxDestroyOfflineRecognizerResult(result);
+  SherpaOnnxDestroyOfflineStream(stream);
+  SherpaOnnxDestroyOfflineRecognizer(recognizer);
+  SherpaOnnxFreeWave(wave);
+
+  return 0;
+}
diff --git a/c-api-examples/offline-speaker-diarization-c-api.c b/c-api-examples/offline-speaker-diarization-c-api.c
new file mode 100644
index 0000000000..d5a17dd0b7
--- /dev/null
+++ b/c-api-examples/offline-speaker-diarization-c-api.c
@@ -0,0 +1,131 @@
+// c-api-examples/offline-sepaker-diarization-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to implement speaker diarization with
+// sherpa-onnx's C API.
+
+// clang-format off
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+
+ */
+// clang-format on
+
+#include <stdio.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+static int32_t ProgressCallback(int32_t num_processed_chunks,
+                                int32_t num_total_chunks, void *arg) {
+  float progress = 100.0 * num_processed_chunks / num_total_chunks;
+  fprintf(stderr, "progress %.2f%%\n", progress);
+
+  // the return value is currently ignored
+  return 0;
+}
+
+int main() {
+  // Please see the comments at the start of this file for how to download
+  // the .onnx file and .wav files below
+  const char *segmentation_model =
+      "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+
+  const char *embedding_extractor_model =
+      "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+  const char *wav_filename = "./0-four-speakers-zh.wav";
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    return -1;
+  }
+
+  SherpaOnnxOfflineSpeakerDiarizationConfig config;
+  memset(&config, 0, sizeof(config));
+
+  config.segmentation.pyannote.model = segmentation_model;
+  config.embedding.model = embedding_extractor_model;
+
+  // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
+  // we set num_clusters to 4
+  //
+  config.clustering.num_clusters = 4;
+  // If you don't know the number of speakers in the test wave file, please
+  // use
+  // config.clustering.threshold = 0.5; // You need to tune this threshold
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      SherpaOnnxCreateOfflineSpeakerDiarization(&config);
+
+  if (!sd) {
+    fprintf(stderr, "Failed to initialize offline speaker diarization\n");
+    return -1;
+  }
+
+  if (SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd) !=
+      wave->sample_rate) {
+    fprintf(
+        stderr,
+        "Expected sample rate: %d. Actual sample rate from the wave file: %d\n",
+        SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd),
+        wave->sample_rate);
+    goto failed;
+  }
+
+  const SherpaOnnxOfflineSpeakerDiarizationResult *result =
+      SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+          sd, wave->samples, wave->num_samples, ProgressCallback, NULL);
+  if (!result) {
+    fprintf(stderr, "Failed to do speaker diarization");
+    goto failed;
+  }
+
+  int32_t num_segments =
+      SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
+
+  const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
+      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
+
+  for (int32_t i = 0; i != num_segments; ++i) {
+    fprintf(stderr, "%.3f -- %.3f speaker_%02d\n", segments[i].start,
+            segments[i].end, segments[i].speaker);
+  }
+
+failed:
+
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
+  SherpaOnnxDestroyOfflineSpeakerDiarization(sd);
+  SherpaOnnxFreeWave(wave);
+
+  return 0;
+}
diff --git a/c-api-examples/offline-tts-c-api.c b/c-api-examples/offline-tts-c-api.c
index 7fbdb004ca..eaa25af392 100644
--- a/c-api-examples/offline-tts-c-api.c
+++ b/c-api-examples/offline-tts-c-api.c
@@ -229,7 +229,7 @@ int32_t main(int32_t argc, char *argv[]) {
     ShowUsage();
   }
 
-  SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
 
   const SherpaOnnxGeneratedAudio *audio =
       SherpaOnnxOfflineTtsGenerate(tts, text, sid, 1.0);
diff --git a/c-api-examples/paraformer-c-api.c b/c-api-examples/paraformer-c-api.c
index 345aed5557..98d38c7898 100644
--- a/c-api-examples/paraformer-c-api.c
+++ b/c-api-examples/paraformer-c-api.c
@@ -54,7 +54,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -63,7 +63,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
 
   SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                   wave->num_samples);
diff --git a/c-api-examples/sense-voice-c-api.c b/c-api-examples/sense-voice-c-api.c
index 06e890636e..25d58219e7 100644
--- a/c-api-examples/sense-voice-c-api.c
+++ b/c-api-examples/sense-voice-c-api.c
@@ -56,7 +56,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -65,7 +65,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
 
   SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                   wave->num_samples);
diff --git a/c-api-examples/streaming-ctc-buffered-tokens-c-api.c b/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
index 3223772a87..98f5b4a60a 100644
--- a/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
+++ b/c-api-examples/streaming-ctc-buffered-tokens-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
     fprintf(stderr, "Memory error\n");
     return -1;
   }
-  size_t read_bytes = fread(*buffer_out, 1, size, file);
+  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
   if (read_bytes != size) {
     printf("Errors occured in reading the file %s\n", filename);
     free((void *)*buffer_out);
@@ -95,7 +95,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = online_model_config;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
 
   free((void *)tokens_buf);
@@ -107,7 +107,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
 
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c b/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
index cd87177b58..0c382cc941 100644
--- a/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
+++ b/c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
     fprintf(stderr, "Memory error\n");
     return -1;
   }
-  size_t read_bytes = fread(*buffer_out, 1, size, file);
+  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
   if (read_bytes != size) {
     printf("Errors occured in reading the file %s\n", filename);
     free((void *)*buffer_out);
@@ -96,7 +96,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = online_model_config;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
 
   free((void *)tokens_buf);
@@ -108,7 +108,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
 
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-paraformer-c-api.c b/c-api-examples/streaming-paraformer-c-api.c
index b54116f083..384ea411b1 100644
--- a/c-api-examples/streaming-paraformer-c-api.c
+++ b/c-api-examples/streaming-paraformer-c-api.c
@@ -57,7 +57,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = online_model_config;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -66,7 +66,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
 
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c b/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
index d5092c5cc2..bd76ea8abb 100644
--- a/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
+++ b/c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
@@ -36,7 +36,7 @@ static size_t ReadFile(const char *filename, const char **buffer_out) {
     fprintf(stderr, "Memory error\n");
     return -1;
   }
-  size_t read_bytes = fread(*buffer_out, 1, size, file);
+  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
   if (read_bytes != size) {
     printf("Errors occured in reading the file %s\n", filename);
     free((void *)*buffer_out);
@@ -116,7 +116,7 @@ int32_t main() {
   recognizer_config.hotwords_buf = hotwords_buf;
   recognizer_config.hotwords_buf_size = hotwords_buf_size;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
 
   free((void *)tokens_buf);
@@ -130,7 +130,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
 
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
diff --git a/c-api-examples/streaming-zipformer-c-api.c b/c-api-examples/streaming-zipformer-c-api.c
index e1417639d9..6011186ea1 100644
--- a/c-api-examples/streaming-zipformer-c-api.c
+++ b/c-api-examples/streaming-zipformer-c-api.c
@@ -63,7 +63,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = online_model_config;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -72,7 +72,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
 
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
diff --git a/c-api-examples/telespeech-c-api.c b/c-api-examples/telespeech-c-api.c
index fa7824c3be..9bf34b1a87 100644
--- a/c-api-examples/telespeech-c-api.c
+++ b/c-api-examples/telespeech-c-api.c
@@ -49,7 +49,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -58,7 +58,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
 
   SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                   wave->num_samples);
diff --git a/c-api-examples/vad-moonshine-c-api.c b/c-api-examples/vad-moonshine-c-api.c
new file mode 100644
index 0000000000..2ad6f6d631
--- /dev/null
+++ b/c-api-examples/vad-moonshine-c-api.c
@@ -0,0 +1,146 @@
+// c-api-examples/vad-moonshine-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  const char *wav_filename = "./Obama.wav";
+  const char *vad_filename = "./silero_vad.onnx";
+
+  const char *preprocessor =
+      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+  const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+  const char *uncached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+  const char *cached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+  const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    return -1;
+  }
+
+  if (wave->sample_rate != 16000) {
+    fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
+            wave->sample_rate);
+    SherpaOnnxFreeWave(wave);
+    return -1;
+  }
+
+  // Offline model config
+  SherpaOnnxOfflineModelConfig offline_model_config;
+  memset(&offline_model_config, 0, sizeof(offline_model_config));
+  offline_model_config.debug = 0;
+  offline_model_config.num_threads = 1;
+  offline_model_config.provider = "cpu";
+  offline_model_config.tokens = tokens;
+  offline_model_config.moonshine.preprocessor = preprocessor;
+  offline_model_config.moonshine.encoder = encoder;
+  offline_model_config.moonshine.uncached_decoder = uncached_decoder;
+  offline_model_config.moonshine.cached_decoder = cached_decoder;
+
+  // Recognizer config
+  SherpaOnnxOfflineRecognizerConfig recognizer_config;
+  memset(&recognizer_config, 0, sizeof(recognizer_config));
+  recognizer_config.decoding_method = "greedy_search";
+  recognizer_config.model_config = offline_model_config;
+
+  const SherpaOnnxOfflineRecognizer *recognizer =
+      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+  if (recognizer == NULL) {
+    fprintf(stderr, "Please check your recognizer config!\n");
+    SherpaOnnxFreeWave(wave);
+    return -1;
+  }
+
+  SherpaOnnxVadModelConfig vadConfig;
+  memset(&vadConfig, 0, sizeof(vadConfig));
+  vadConfig.silero_vad.model = vad_filename;
+  vadConfig.silero_vad.threshold = 0.5;
+  vadConfig.silero_vad.min_silence_duration = 0.5;
+  vadConfig.silero_vad.min_speech_duration = 0.5;
+  vadConfig.silero_vad.max_speech_duration = 10;
+  vadConfig.silero_vad.window_size = 512;
+  vadConfig.sample_rate = 16000;
+  vadConfig.num_threads = 1;
+  vadConfig.debug = 1;
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
+
+  if (vad == NULL) {
+    fprintf(stderr, "Please check your recognizer config!\n");
+    SherpaOnnxFreeWave(wave);
+    SherpaOnnxDestroyOfflineRecognizer(recognizer);
+    return -1;
+  }
+
+  int32_t window_size = vadConfig.silero_vad.window_size;
+  int32_t i = 0;
+  int is_eof = 0;
+
+  while (!is_eof) {
+    if (i + window_size < wave->num_samples) {
+      SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+                                                    window_size);
+    } else {
+      SherpaOnnxVoiceActivityDetectorFlush(vad);
+      is_eof = 1;
+    }
+    while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+      const SherpaOnnxSpeechSegment *segment =
+          SherpaOnnxVoiceActivityDetectorFront(vad);
+
+      const SherpaOnnxOfflineStream *stream =
+          SherpaOnnxCreateOfflineStream(recognizer);
+
+      SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
+                                      segment->samples, segment->n);
+
+      SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+      const SherpaOnnxOfflineRecognizerResult *result =
+          SherpaOnnxGetOfflineStreamResult(stream);
+
+      float start = segment->start / 16000.0f;
+      float duration = segment->n / 16000.0f;
+      float stop = start + duration;
+
+      fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+      SherpaOnnxDestroyOfflineRecognizerResult(result);
+      SherpaOnnxDestroyOfflineStream(stream);
+
+      SherpaOnnxDestroySpeechSegment(segment);
+      SherpaOnnxVoiceActivityDetectorPop(vad);
+    }
+    i += window_size;
+  }
+
+  SherpaOnnxDestroyOfflineRecognizer(recognizer);
+  SherpaOnnxDestroyVoiceActivityDetector(vad);
+  SherpaOnnxFreeWave(wave);
+
+  return 0;
+}
diff --git a/c-api-examples/vad-sense-voice-c-api.c b/c-api-examples/vad-sense-voice-c-api.c
index 172ec0a799..eeddfce883 100644
--- a/c-api-examples/vad-sense-voice-c-api.c
+++ b/c-api-examples/vad-sense-voice-c-api.c
@@ -66,7 +66,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -81,6 +81,7 @@ int32_t main() {
   vadConfig.silero_vad.threshold = 0.5;
   vadConfig.silero_vad.min_silence_duration = 0.5;
   vadConfig.silero_vad.min_speech_duration = 0.5;
+  vadConfig.silero_vad.max_speech_duration = 5;
   vadConfig.silero_vad.window_size = 512;
   vadConfig.sample_rate = 16000;
   vadConfig.num_threads = 1;
@@ -98,18 +99,24 @@ int32_t main() {
 
   int32_t window_size = vadConfig.silero_vad.window_size;
   int32_t i = 0;
-
-  while (i + window_size < wave->num_samples) {
-    SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
-                                                  window_size);
-    i += window_size;
+  int is_eof = 0;
+
+  while (!is_eof) {
+    if (i + window_size < wave->num_samples) {
+      SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+                                                    window_size);
+    } else {
+      SherpaOnnxVoiceActivityDetectorFlush(vad);
+      is_eof = 1;
+    }
 
     while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
       const SherpaOnnxSpeechSegment *segment =
           SherpaOnnxVoiceActivityDetectorFront(vad);
 
-      SherpaOnnxOfflineStream *stream =
+      const SherpaOnnxOfflineStream *stream =
           SherpaOnnxCreateOfflineStream(recognizer);
+
       SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
                                       segment->samples, segment->n);
 
@@ -130,34 +137,7 @@ int32_t main() {
       SherpaOnnxDestroySpeechSegment(segment);
       SherpaOnnxVoiceActivityDetectorPop(vad);
     }
-  }
-
-  SherpaOnnxVoiceActivityDetectorFlush(vad);
-
-  while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
-    const SherpaOnnxSpeechSegment *segment =
-        SherpaOnnxVoiceActivityDetectorFront(vad);
-
-    SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
-    SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
-                                    segment->n);
-
-    SherpaOnnxDecodeOfflineStream(recognizer, stream);
-
-    const SherpaOnnxOfflineRecognizerResult *result =
-        SherpaOnnxGetOfflineStreamResult(stream);
-
-    float start = segment->start / 16000.0f;
-    float duration = segment->n / 16000.0f;
-    float stop = start + duration;
-
-    fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
-
-    SherpaOnnxDestroyOfflineRecognizerResult(result);
-    SherpaOnnxDestroyOfflineStream(stream);
-
-    SherpaOnnxDestroySpeechSegment(segment);
-    SherpaOnnxVoiceActivityDetectorPop(vad);
+    i += window_size;
   }
 
   SherpaOnnxDestroyOfflineRecognizer(recognizer);
diff --git a/c-api-examples/vad-whisper-c-api.c b/c-api-examples/vad-whisper-c-api.c
new file mode 100644
index 0000000000..169b4ef126
--- /dev/null
+++ b/c-api-examples/vad-whisper-c-api.c
@@ -0,0 +1,169 @@
+// c-api-examples/vad-whisper-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use VAD + Whisper tiny.en with
+// sherpa-onnx's C API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+// rm sherpa-onnx-whisper-tiny.en.tar.bz2
+//
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  const char *wav_filename = "./Obama.wav";
+  const char *vad_filename = "./silero_vad.onnx";
+
+  const char *encoder = "sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
+  const char *decoder = "sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
+  const char *tokens = "sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    return -1;
+  }
+
+  if (wave->sample_rate != 16000) {
+    fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
+            wave->sample_rate);
+    SherpaOnnxFreeWave(wave);
+    return -1;
+  }
+
+  // Offline model config
+  SherpaOnnxOfflineModelConfig offline_model_config;
+  memset(&offline_model_config, 0, sizeof(offline_model_config));
+  offline_model_config.debug = 0;
+  offline_model_config.num_threads = 1;
+  offline_model_config.provider = "cpu";
+  offline_model_config.tokens = tokens;
+  offline_model_config.whisper.encoder = encoder;
+  offline_model_config.whisper.decoder = decoder;
+  offline_model_config.whisper.language = "en";
+  offline_model_config.whisper.tail_paddings = 0;
+  offline_model_config.whisper.task = "transcribe";
+
+  // Recognizer config
+  SherpaOnnxOfflineRecognizerConfig recognizer_config;
+  memset(&recognizer_config, 0, sizeof(recognizer_config));
+  recognizer_config.decoding_method = "greedy_search";
+  recognizer_config.model_config = offline_model_config;
+
+  const SherpaOnnxOfflineRecognizer *recognizer =
+      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
+
+  if (recognizer == NULL) {
+    fprintf(stderr, "Please check your recognizer config!\n");
+    SherpaOnnxFreeWave(wave);
+    return -1;
+  }
+
+  SherpaOnnxVadModelConfig vadConfig;
+  memset(&vadConfig, 0, sizeof(vadConfig));
+  vadConfig.silero_vad.model = vad_filename;
+  vadConfig.silero_vad.threshold = 0.5;
+  vadConfig.silero_vad.min_silence_duration = 0.5;
+  vadConfig.silero_vad.min_speech_duration = 0.5;
+  vadConfig.silero_vad.max_speech_duration = 10;
+  vadConfig.silero_vad.window_size = 512;
+  vadConfig.sample_rate = 16000;
+  vadConfig.num_threads = 1;
+  vadConfig.debug = 1;
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
+
+  if (vad == NULL) {
+    fprintf(stderr, "Please check your recognizer config!\n");
+    SherpaOnnxFreeWave(wave);
+    SherpaOnnxDestroyOfflineRecognizer(recognizer);
+    return -1;
+  }
+
+  int32_t window_size = vadConfig.silero_vad.window_size;
+  int32_t i = 0;
+
+  while (i + window_size < wave->num_samples) {
+    SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
+                                                  window_size);
+    i += window_size;
+
+    while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+      const SherpaOnnxSpeechSegment *segment =
+          SherpaOnnxVoiceActivityDetectorFront(vad);
+
+      const SherpaOnnxOfflineStream *stream =
+          SherpaOnnxCreateOfflineStream(recognizer);
+
+      SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
+                                      segment->samples, segment->n);
+
+      SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+      const SherpaOnnxOfflineRecognizerResult *result =
+          SherpaOnnxGetOfflineStreamResult(stream);
+
+      float start = segment->start / 16000.0f;
+      float duration = segment->n / 16000.0f;
+      float stop = start + duration;
+
+      fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+      SherpaOnnxDestroyOfflineRecognizerResult(result);
+      SherpaOnnxDestroyOfflineStream(stream);
+
+      SherpaOnnxDestroySpeechSegment(segment);
+      SherpaOnnxVoiceActivityDetectorPop(vad);
+    }
+  }
+
+  SherpaOnnxVoiceActivityDetectorFlush(vad);
+
+  while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
+    const SherpaOnnxSpeechSegment *segment =
+        SherpaOnnxVoiceActivityDetectorFront(vad);
+
+    const SherpaOnnxOfflineStream *stream =
+        SherpaOnnxCreateOfflineStream(recognizer);
+
+    SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
+                                    segment->n);
+
+    SherpaOnnxDecodeOfflineStream(recognizer, stream);
+
+    const SherpaOnnxOfflineRecognizerResult *result =
+        SherpaOnnxGetOfflineStreamResult(stream);
+
+    float start = segment->start / 16000.0f;
+    float duration = segment->n / 16000.0f;
+    float stop = start + duration;
+
+    fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);
+
+    SherpaOnnxDestroyOfflineRecognizerResult(result);
+    SherpaOnnxDestroyOfflineStream(stream);
+
+    SherpaOnnxDestroySpeechSegment(segment);
+    SherpaOnnxVoiceActivityDetectorPop(vad);
+  }
+
+  SherpaOnnxDestroyOfflineRecognizer(recognizer);
+  SherpaOnnxDestroyVoiceActivityDetector(vad);
+  SherpaOnnxFreeWave(wave);
+
+  return 0;
+}
diff --git a/c-api-examples/whisper-c-api.c b/c-api-examples/whisper-c-api.c
index 3a71bcb030..2e795b0253 100644
--- a/c-api-examples/whisper-c-api.c
+++ b/c-api-examples/whisper-c-api.c
@@ -58,7 +58,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
 
   SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                   wave->num_samples);
diff --git a/c-api-examples/zipformer-c-api.c b/c-api-examples/zipformer-c-api.c
index 35393b19c5..4db22fc38e 100644
--- a/c-api-examples/zipformer-c-api.c
+++ b/c-api-examples/zipformer-c-api.c
@@ -60,7 +60,7 @@ int32_t main() {
   recognizer_config.decoding_method = "greedy_search";
   recognizer_config.model_config = offline_model_config;
 
-  SherpaOnnxOfflineRecognizer *recognizer =
+  const SherpaOnnxOfflineRecognizer *recognizer =
       SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
 
   if (recognizer == NULL) {
@@ -69,7 +69,8 @@ int32_t main() {
     return -1;
   }
 
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
 
   SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                   wave->num_samples);
diff --git a/cmake/asio.cmake b/cmake/asio.cmake
index eaa262acbe..9e3ce8d235 100644
--- a/cmake/asio.cmake
+++ b/cmake/asio.cmake
@@ -2,7 +2,7 @@ function(download_asio)
   include(FetchContent)
 
   set(asio_URL  "https://github.com/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz")
-  set(asio_URL2  "https://hub.nuaa.cf/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz")
+  set(asio_URL2  "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/asio-asio-1-24-0.tar.gz")
   set(asio_HASH "SHA256=cbcaaba0f66722787b1a7c33afe1befb3a012b5af3ad7da7ff0f6b8c9b7a8a5b")
 
   # If you don't have access to the Internet,
diff --git a/cmake/cargs.cmake b/cmake/cargs.cmake
index 54487a6f0c..d7c6055087 100644
--- a/cmake/cargs.cmake
+++ b/cmake/cargs.cmake
@@ -2,7 +2,7 @@ function(download_cargs)
   include(FetchContent)
 
   set(cargs_URL "https://github.com/likle/cargs/archive/refs/tags/v1.0.3.tar.gz")
-  set(cargs_URL2 "https://hub.nuaa.cf/likle/cargs/archive/refs/tags/v1.0.3.tar.gz")
+  set(cargs_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/cargs-1.0.3.tar.gz")
   set(cargs_HASH "SHA256=ddba25bd35e9c6c75bc706c126001b8ce8e084d40ef37050e6aa6963e836eb8b")
 
   # If you don't have access to the Internet,
diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py
index 672e3d17ac..3d0dbef8e8 100644
--- a/cmake/cmake_extension.py
+++ b/cmake/cmake_extension.py
@@ -55,6 +55,7 @@ def get_binaries():
         "sherpa-onnx-offline-audio-tagging",
         "sherpa-onnx-offline-language-identification",
         "sherpa-onnx-offline-punctuation",
+        "sherpa-onnx-offline-speaker-diarization",
         "sherpa-onnx-offline-tts",
         "sherpa-onnx-offline-tts-play",
         "sherpa-onnx-offline-websocket-server",
@@ -79,6 +80,7 @@ def get_binaries():
         binaries += [
             "onnxruntime.dll",
             "sherpa-onnx-c-api.dll",
+            "sherpa-onnx-cxx-api.dll",
         ]
 
     return binaries
diff --git a/cmake/cppjieba.cmake b/cmake/cppjieba.cmake
index 9ad27d7b51..167da338f4 100644
--- a/cmake/cppjieba.cmake
+++ b/cmake/cppjieba.cmake
@@ -2,7 +2,7 @@ function(download_cppjieba)
   include(FetchContent)
 
   set(cppjieba_URL  "https://github.com/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
-  set(cppjieba_URL2 "https://hub.nuaa.cf/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
+  set(cppjieba_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/cppjieba-sherpa-onnx-2024-04-19.tar.gz")
   set(cppjieba_HASH "SHA256=03e5264687f0efaef05487a07d49c3f4c0f743347bfbf825df4b30cc75ac5288")
 
   # If you don't have access to the Internet,
diff --git a/cmake/eigen.cmake b/cmake/eigen.cmake
index 154cdd4c2b..9aef9abc88 100644
--- a/cmake/eigen.cmake
+++ b/cmake/eigen.cmake
@@ -2,7 +2,7 @@ function(download_eigen)
   include(FetchContent)
 
   set(eigen_URL  "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz")
-  set(eigen_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/eigen-3.4.0.tar.gz")
+  set(eigen_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/eigen-3.4.0.tar.gz")
   set(eigen_HASH "SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72")
 
   # If you don't have access to the Internet,
diff --git a/cmake/espeak-ng-for-piper.cmake b/cmake/espeak-ng-for-piper.cmake
index b54a0a6bd3..0ef8253060 100644
--- a/cmake/espeak-ng-for-piper.cmake
+++ b/cmake/espeak-ng-for-piper.cmake
@@ -2,7 +2,7 @@ function(download_espeak_ng_for_piper)
   include(FetchContent)
 
   set(espeak_ng_URL  "https://github.com/csukuangfj/espeak-ng/archive/f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
-  set(espeak_ng_URL2 "https://hub.nuaa.cf/csukuangfj/espeak-ng/archive/f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
+  set(espeak_ng_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
   set(espeak_ng_HASH "SHA256=70cbf4050e7a014aae19140b05e57249da4720f56128459fbe3a93beaf971ae6")
 
   set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE)
diff --git a/cmake/googletest.cmake b/cmake/googletest.cmake
index cf5fa10cc0..a9bfd443b0 100644
--- a/cmake/googletest.cmake
+++ b/cmake/googletest.cmake
@@ -2,7 +2,7 @@ function(download_googltest)
   include(FetchContent)
 
   set(googletest_URL  "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
-  set(googletest_URL2 "https://hub.nuaa.cf/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
+  set(googletest_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
   set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")
 
   # If you don't have access to the Internet,
diff --git a/cmake/hclust-cpp.cmake b/cmake/hclust-cpp.cmake
index 9040815255..c84ccafc83 100644
--- a/cmake/hclust-cpp.cmake
+++ b/cmake/hclust-cpp.cmake
@@ -3,6 +3,7 @@ function(download_hclust_cpp)
 
   # The latest commit as of 2024.09.29
   set(hclust_cpp_URL  "https://github.com/csukuangfj/hclust-cpp/archive/refs/tags/2024-09-29.tar.gz")
+  set(hclust_cpp_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/hclust-cpp-2024-09-29.tar.gz")
   set(hclust_cpp_HASH "SHA256=abab51448a3cb54272aae07522970306e0b2cc6479d59d7b19e7aee4d6cedd33")
 
   # If you don't have access to the Internet,
@@ -20,6 +21,7 @@ function(download_hclust_cpp)
       set(hclust_cpp_URL  "${f}")
       file(TO_CMAKE_PATH "${hclust_cpp_URL}" hclust_cpp_URL)
       message(STATUS "Found local downloaded hclust_cpp: ${hclust_cpp_URL}")
+      set(hclust_cpp_URL2)
       break()
     endif()
   endforeach()
diff --git a/cmake/kaldi-decoder.cmake b/cmake/kaldi-decoder.cmake
index d3d7ec2d56..91202342a5 100644
--- a/cmake/kaldi-decoder.cmake
+++ b/cmake/kaldi-decoder.cmake
@@ -2,7 +2,7 @@ function(download_kaldi_decoder)
   include(FetchContent)
 
   set(kaldi_decoder_URL  "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.6.tar.gz")
-  set(kaldi_decoder_URL2 "https://hub.nuaa.cf/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.6.tar.gz")
+  set(kaldi_decoder_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-decoder-0.2.6.tar.gz")
   set(kaldi_decoder_HASH "SHA256=b13c78b37495cafc6ef3f8a7b661b349c55a51abbd7f7f42f389408dcf86a463")
 
   set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
diff --git a/cmake/kaldi-native-fbank.cmake b/cmake/kaldi-native-fbank.cmake
index 2d87b6a8b2..8f6803c888 100644
--- a/cmake/kaldi-native-fbank.cmake
+++ b/cmake/kaldi-native-fbank.cmake
@@ -2,7 +2,7 @@ function(download_kaldi_native_fbank)
   include(FetchContent)
 
   set(kaldi_native_fbank_URL   "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.20.0.tar.gz")
-  set(kaldi_native_fbank_URL2  "https://hub.nuaa.cf/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.20.0.tar.gz")
+  set(kaldi_native_fbank_URL2  "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.20.0.tar.gz")
   set(kaldi_native_fbank_HASH "SHA256=c6195b3cf374eef824644061d3c04f6b2a9267ae554169cbaa9865c89c1fe4f9")
 
   set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE)
diff --git a/cmake/kaldifst.cmake b/cmake/kaldifst.cmake
index 765e2571a6..034d8c444a 100644
--- a/cmake/kaldifst.cmake
+++ b/cmake/kaldifst.cmake
@@ -2,7 +2,7 @@ function(download_kaldifst)
   include(FetchContent)
 
   set(kaldifst_URL  "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
-  set(kaldifst_URL2 "https://hub.nuaa.cf/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
+  set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.11.tar.gz")
   set(kaldifst_HASH "SHA256=b43b3332faa2961edc730e47995a58cd4e22ead21905d55b0c4a41375b4a525f")
 
   # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-aarch64-gpu.cmake b/cmake/onnxruntime-linux-aarch64-gpu.cmake
new file mode 100644
index 0000000000..5df32c996d
--- /dev/null
+++ b/cmake/onnxruntime-linux-aarch64-gpu.cmake
@@ -0,0 +1,119 @@
+# Copyright (c)  2022-2024  Xiaomi Corporation
+message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
+  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
+endif()
+
+if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
+  message(FATAL_ERROR "This file is for aarch64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
+endif()
+
+if(NOT BUILD_SHARED_LIBS)
+  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+endif()
+
+if(NOT SHERPA_ONNX_ENABLE_GPU)
+  message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
+endif()
+
+message(WARNING "\
+SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION: ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION}
+If you use Jetson nano b01, then please pass
+   -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
+to cmake (You need to make sure CUDA 10.2 is available on your board).
+
+If you use Jetson Orin NX, then please pass
+   -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
+to cmake (You need to make sure CUDA 11.4 is available on your board).
+")
+
+set(v ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION})
+
+set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${v}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
+
+if(v STREQUAL "1.11.0")
+  set(onnxruntime_HASH "SHA256=36eded935551e23aead09d4173bdf0bd1e7b01fdec15d77f97d6e34029aa60d7")
+else()
+  set(onnxruntime_HASH "SHA256=4c09d5acf2c2682b4eab1dc2f1ad98fc1fde5f5f1960063e337983ba59379a4b")
+endif()
+
+# If you don't have access to the Internet,
+# please download onnxruntime to one of the following locations.
+# You can add more if you want.
+set(possible_file_locations
+  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+  /tmp/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
+)
+
+foreach(f IN LISTS possible_file_locations)
+  if(EXISTS ${f})
+    set(onnxruntime_URL  "${f}")
+    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
+    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
+    set(onnxruntime_URL2)
+    break()
+  endif()
+endforeach()
+
+FetchContent_Declare(onnxruntime
+  URL
+    ${onnxruntime_URL}
+    ${onnxruntime_URL2}
+  URL_HASH          ${onnxruntime_HASH}
+)
+
+FetchContent_GetProperties(onnxruntime)
+if(NOT onnxruntime_POPULATED)
+  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
+  FetchContent_Populate(onnxruntime)
+endif()
+message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
+
+find_library(location_onnxruntime onnxruntime
+  PATHS
+  "${onnxruntime_SOURCE_DIR}/lib"
+  NO_CMAKE_SYSTEM_PATH
+)
+
+message(STATUS "location_onnxruntime: ${location_onnxruntime}")
+
+add_library(onnxruntime SHARED IMPORTED)
+
+set_target_properties(onnxruntime PROPERTIES
+  IMPORTED_LOCATION ${location_onnxruntime}
+  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
+)
+
+find_library(location_onnxruntime_cuda_lib onnxruntime_providers_cuda
+  PATHS
+  "${onnxruntime_SOURCE_DIR}/lib"
+  NO_CMAKE_SYSTEM_PATH
+)
+
+add_library(onnxruntime_providers_cuda SHARED IMPORTED)
+set_target_properties(onnxruntime_providers_cuda PROPERTIES
+  IMPORTED_LOCATION ${location_onnxruntime_cuda_lib}
+)
+message(STATUS "location_onnxruntime_cuda_lib: ${location_onnxruntime_cuda_lib}")
+
+# for libonnxruntime_providers_shared.so
+find_library(location_onnxruntime_providers_shared_lib onnxruntime_providers_shared
+  PATHS
+  "${onnxruntime_SOURCE_DIR}/lib"
+  NO_CMAKE_SYSTEM_PATH
+)
+add_library(onnxruntime_providers_shared SHARED IMPORTED)
+set_target_properties(onnxruntime_providers_shared PROPERTIES
+  IMPORTED_LOCATION ${location_onnxruntime_providers_shared_lib}
+)
+message(STATUS "location_onnxruntime_providers_shared_lib: ${location_onnxruntime_providers_shared_lib}")
+
+file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
+message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
+install(FILES ${onnxruntime_lib_files} DESTINATION lib)
diff --git a/cmake/onnxruntime-linux-aarch64-static.cmake b/cmake/onnxruntime-linux-aarch64-static.cmake
index 9606c79db7..4752e01020 100644
--- a/cmake/onnxruntime-linux-aarch64-static.cmake
+++ b/cmake/onnxruntime-linux-aarch64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-static_lib-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=831b9a3869501040b4399de85f34c4f170e2bcbd41881edaeb553f8dc4080985")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-aarch64.cmake b/cmake/onnxruntime-linux-aarch64.cmake
index a6ef7ce122..a18f59e511 100644
--- a/cmake/onnxruntime-linux-aarch64.cmake
+++ b/cmake/onnxruntime-linux-aarch64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-glibc2_17-Release-1.17.1-patched.zip")
 set(onnxruntime_HASH "SHA256=6e0e68985f8dd1f643e5a4dbe7cd54c9e176a0cc62249c6bee0699b87fc6d4fb")
 
 # If you don't have access to the Internet,
@@ -53,11 +53,7 @@ if(NOT onnxruntime_POPULATED)
 endif()
 message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
 
-find_library(location_onnxruntime onnxruntime
-  PATHS
-  "${onnxruntime_SOURCE_DIR}/lib"
-  NO_CMAKE_SYSTEM_PATH
-)
+set(location_onnxruntime "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so")
 
 message(STATUS "location_onnxruntime: ${location_onnxruntime}")
 
diff --git a/cmake/onnxruntime-linux-arm-static.cmake b/cmake/onnxruntime-linux-arm-static.cmake
index cf2269afbe..fa9170e34b 100644
--- a/cmake/onnxruntime-linux-arm-static.cmake
+++ b/cmake/onnxruntime-linux-arm-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-arm-static_lib-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=3f2ba38156d2facfb732c0fe53bc1eaaf2791d9a91dd240380e3d53716798b09")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-arm.cmake b/cmake/onnxruntime-linux-arm.cmake
index a3adfaebd5..28bd426866 100644
--- a/cmake/onnxruntime-linux-arm.cmake
+++ b/cmake/onnxruntime-linux-arm.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-arm-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-arm-1.17.1-patched.zip")
 set(onnxruntime_HASH "SHA256=4ec00f7adc7341c068babea3d0f607349655e598222d4212115ae4f52619efdb")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-riscv64-static.cmake b/cmake/onnxruntime-linux-riscv64-static.cmake
index b400c4741a..dec7cf1bb3 100644
--- a/cmake/onnxruntime-linux-riscv64-static.cmake
+++ b/cmake/onnxruntime-linux-riscv64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
 set(onnxruntime_HASH "SHA256=77ecc51d8caf0953755db6edcdec2fc03bce3f6d379bedd635be50bb95f88da5")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-riscv64.cmake b/cmake/onnxruntime-linux-riscv64.cmake
index c773e5ecb7..121459326c 100644
--- a/cmake/onnxruntime-linux-riscv64.cmake
+++ b/cmake/onnxruntime-linux-riscv64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
 set(onnxruntime_HASH "SHA256=c2cbc5af081ff82f46640befd85433811486daaf28e702163c6e4e75020fde81")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64-gpu.cmake b/cmake/onnxruntime-linux-x86_64-gpu.cmake
index 5407a0b824..7aed2526f4 100644
--- a/cmake/onnxruntime-linux-x86_64-gpu.cmake
+++ b/cmake/onnxruntime-linux-x86_64-gpu.cmake
@@ -20,7 +20,7 @@ endif()
 
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.17.1-patched.zip")
 set(onnxruntime_HASH "SHA256=1261de176e8d9d4d2019f8fa8c732c6d11494f3c6e73168ab6d2cc0903f22551")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64-static.cmake b/cmake/onnxruntime-linux-x86_64-static.cmake
index c6bb867b25..f72f9ad5ea 100644
--- a/cmake/onnxruntime-linux-x86_64-static.cmake
+++ b/cmake/onnxruntime-linux-x86_64-static.cmake
@@ -15,7 +15,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-static_lib-1.17.1-glibc2_17.zip")
 set(onnxruntime_HASH "SHA256=b646beeb983de843a267096d4457d832f93089f5e7264fd54b48cff207cb2068")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-linux-x86_64.cmake b/cmake/onnxruntime-linux-x86_64.cmake
index eaa6f7608f..361f4d0d8f 100644
--- a/cmake/onnxruntime-linux-x86_64.cmake
+++ b/cmake/onnxruntime-linux-x86_64.cmake
@@ -15,7 +15,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-glibc2_17-Release-1.17.1-patched.zip")
 set(onnxruntime_HASH "SHA256=cb90c51a195bdd453aaf1582f3ef63b466dafbb15c4b8a552ca4dce3769e1d1e")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-arm64-static.cmake b/cmake/onnxruntime-osx-arm64-static.cmake
index 494e263ff1..7cd5a63ba7 100644
--- a/cmake/onnxruntime-osx-arm64-static.cmake
+++ b/cmake/onnxruntime-osx-arm64-static.cmake
@@ -13,7 +13,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-static_lib-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=b88a4017251c159fea005aefe836bd0cf4d0bc7454e2810784f84a42143f17eb")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-arm64.cmake b/cmake/onnxruntime-osx-arm64.cmake
index 3998cc8b4d..e3c986a44d 100644
--- a/cmake/onnxruntime-osx-arm64.cmake
+++ b/cmake/onnxruntime-osx-arm64.cmake
@@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-arm64-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-arm64-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-1.17.1.tgz")
 set(onnxruntime_HASH "SHA256=89566f424624a7ad9a7d9d5e413c44b9639a994d7171cf409901d125b16e2bb3")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-universal-static.cmake b/cmake/onnxruntime-osx-universal-static.cmake
index 2abcf46b4f..5bf635b8e9 100644
--- a/cmake/onnxruntime-osx-universal-static.cmake
+++ b/cmake/onnxruntime-osx-universal-static.cmake
@@ -14,7 +14,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
-set(onnxruntime_URL2  "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
+set(onnxruntime_URL2  "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-static_lib-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=45599dbd2fb9dd52d6505930c0e82ca165391e222a68f5606b9ea9d4f3922e15")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-universal.cmake b/cmake/onnxruntime-osx-universal.cmake
index 2b0fbb1108..fe5a53a634 100644
--- a/cmake/onnxruntime-osx-universal.cmake
+++ b/cmake/onnxruntime-osx-universal.cmake
@@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-universal2-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-universal2-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-1.17.1.tgz")
 set(onnxruntime_HASH "SHA256=9fa57fa6f202a373599377ef75064ae568fda8da838632b26a86024c7378d306")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-x86_64-static.cmake b/cmake/onnxruntime-osx-x86_64-static.cmake
index 259ec4d014..a3c98e709b 100644
--- a/cmake/onnxruntime-osx-x86_64-static.cmake
+++ b/cmake/onnxruntime-osx-x86_64-static.cmake
@@ -13,7 +13,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-static_lib-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=5ff8efb97e50e257943c6c588328d2c57b649278098d3b468036f02755b60903")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-osx-x86_64.cmake b/cmake/onnxruntime-osx-x86_64.cmake
index 81b78991ac..4ca9674608 100644
--- a/cmake/onnxruntime-osx-x86_64.cmake
+++ b/cmake/onnxruntime-osx-x86_64.cmake
@@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-x86_64-1.17.1.tgz")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-osx-x86_64-1.17.1.tgz")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-1.17.1.tgz")
 set(onnxruntime_HASH "SHA256=86c6b6896434084ff5086eebc4e9ea90be1ed4d46743f92864f46ee43e7b5059")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-wasm-simd.cmake b/cmake/onnxruntime-wasm-simd.cmake
index dcc8fb5dd6..19ac0411c0 100644
--- a/cmake/onnxruntime-wasm-simd.cmake
+++ b/cmake/onnxruntime-wasm-simd.cmake
@@ -11,7 +11,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=8f07778e4233cf5a61a9d0795d90c5497177fbe8a46b701fda2d8d4e2b11cef8")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-arm64-static.cmake b/cmake/onnxruntime-win-arm64-static.cmake
new file mode 100644
index 0000000000..0ebbfc29a9
--- /dev/null
+++ b/cmake/onnxruntime-win-arm64-static.cmake
@@ -0,0 +1,72 @@
+# Copyright (c)  2022-2023  Xiaomi Corporation
+message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")
+
+if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
+  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
+endif()
+
+if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64))
+  message(FATAL_ERROR "This file is for Windows arm64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
+endif()
+
+if(BUILD_SHARED_LIBS)
+  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+endif()
+
+if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
+  message(FATAL_ERROR "This file is for building a release version on Windows arm64")
+endif()
+
+set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_HASH "SHA256=534ab5bb8b5495ce45fed866cf3ec9034f89f2057a0152e49120b1088003a17e")
+
+# If you don't have access to the Internet,
+# please download onnxruntime to one of the following locations.
+# You can add more if you want.
+set(possible_file_locations
+  $ENV{HOME}/Downloads/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+  ${CMAKE_SOURCE_DIR}/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+  ${CMAKE_BINARY_DIR}/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+  /tmp/onnxruntime-win-arm64-static_lib-1.17.1.tar.bz2
+)
+
+foreach(f IN LISTS possible_file_locations)
+  if(EXISTS ${f})
+    set(onnxruntime_URL  "${f}")
+    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
+    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
+    set(onnxruntime_URL2)
+    break()
+  endif()
+endforeach()
+
+FetchContent_Declare(onnxruntime
+  URL
+    ${onnxruntime_URL}
+    ${onnxruntime_URL2}
+  URL_HASH          ${onnxruntime_HASH}
+)
+
+FetchContent_GetProperties(onnxruntime)
+if(NOT onnxruntime_POPULATED)
+  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
+  FetchContent_Populate(onnxruntime)
+endif()
+message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
+
+# for static libraries, we use onnxruntime_lib_files directly below
+include_directories(${onnxruntime_SOURCE_DIR}/include)
+
+file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.lib")
+
+set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)
+
+message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
+if(SHERPA_ONNX_ENABLE_PYTHON)
+  install(FILES ${onnxruntime_lib_files} DESTINATION ..)
+else()
+  install(FILES ${onnxruntime_lib_files} DESTINATION lib)
+endif()
diff --git a/cmake/onnxruntime-win-arm64.cmake b/cmake/onnxruntime-win-arm64.cmake
index 0705b6451a..a4f247e34d 100644
--- a/cmake/onnxruntime-win-arm64.cmake
+++ b/cmake/onnxruntime-win-arm64.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-arm64-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-arm64-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=47782cebcab0fd7a1f0a3f0676b088c1bc0f4fbf21666f6fe57570dc362fa5a8")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64-directml.cmake b/cmake/onnxruntime-win-x64-directml.cmake
index 9648ffecce..a171a69a71 100644
--- a/cmake/onnxruntime-win-x64-directml.cmake
+++ b/cmake/onnxruntime-win-x64-directml.cmake
@@ -20,7 +20,7 @@ if(NOT SHERPA_ONNX_ENABLE_DIRECTML)
 endif()
 
 set(onnxruntime_URL  "https://globalcdn.nuget.org/packages/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
-set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
 set(onnxruntime_HASH "SHA256=c8ae7623385b19cd5de968d0df5383e13b97d1b3a6771c9177eac15b56013a5a")
 
 # If you don't have access to the Internet,
@@ -158,4 +158,4 @@ file(GLOB directml_lib_files "${directml_SOURCE_DIR}/bin/x64-win/DirectML.*")
 message(STATUS "DirectML lib files: ${directml_lib_files}")
 
 install(FILES ${directml_lib_files} DESTINATION lib)
-install(FILES ${directml_lib_files} DESTINATION bin)
\ No newline at end of file
+install(FILES ${directml_lib_files} DESTINATION bin)
diff --git a/cmake/onnxruntime-win-x64-gpu.cmake b/cmake/onnxruntime-win-x64-gpu.cmake
index 18b64d01f7..5265653a57 100644
--- a/cmake/onnxruntime-win-x64-gpu.cmake
+++ b/cmake/onnxruntime-win-x64-gpu.cmake
@@ -20,7 +20,7 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-gpu-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-gpu-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-gpu-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=b7a66f50ad146c2ccb43471d2d3b5ad78084c2d4ddbd3ea82d65f86c867408b2")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64-static-debug.cmake b/cmake/onnxruntime-win-x64-static-debug.cmake
index 3281f4989a..211873cf31 100644
--- a/cmake/onnxruntime-win-x64-static-debug.cmake
+++ b/cmake/onnxruntime-win-x64-static-debug.cmake
@@ -16,7 +16,7 @@ if(BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
 if(CMAKE_BUILD_TYPE STREQUAL Debug)
   set(onnxruntime_HASH "SHA256=ecc68d914541c3b6ebc36148af63fe2a6af0f4f955b35199d612698d23169fa5")
 elseif(CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
diff --git a/cmake/onnxruntime-win-x64-static.cmake b/cmake/onnxruntime-win-x64-static.cmake
index 009390872c..811d647536 100644
--- a/cmake/onnxruntime-win-x64-static.cmake
+++ b/cmake/onnxruntime-win-x64-static.cmake
@@ -20,7 +20,7 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static_lib-1.17.1.tar.bz2")
 set(onnxruntime_HASH "SHA256=42a0c02fda945d1d72433b2a7cdb2187d51cb4d7f3af462c6ae07b25314d5fb3")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x64.cmake b/cmake/onnxruntime-win-x64.cmake
index 26f96fdb0c..4dbe0caa6f 100644
--- a/cmake/onnxruntime-win-x64.cmake
+++ b/cmake/onnxruntime-win-x64.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x64-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=4802af9598db02153d7da39432a48823ff69b2fb4b59155461937f20782aa91c")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x86-static-debug.cmake b/cmake/onnxruntime-win-x86-static-debug.cmake
index a8d6858c69..8f00f2a506 100644
--- a/cmake/onnxruntime-win-x86-static-debug.cmake
+++ b/cmake/onnxruntime-win-x86-static-debug.cmake
@@ -17,7 +17,7 @@ endif()
 
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
-set(onnxruntime_URL2  "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
+set(onnxruntime_URL2  "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static_lib-${CMAKE_BUILD_TYPE}-1.17.1.tar.bz2")
 if(CMAKE_BUILD_TYPE STREQUAL Debug)
   set(onnxruntime_HASH "SHA256=b08b223fe09a5640472eec487ff42e4df6bf726e8aba9de40f443a1fabea3334")
 elseif(CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo)
diff --git a/cmake/onnxruntime-win-x86-static.cmake b/cmake/onnxruntime-win-x86-static.cmake
index 7e291a6164..ce424ee8cd 100644
--- a/cmake/onnxruntime-win-x86-static.cmake
+++ b/cmake/onnxruntime-win-x86-static.cmake
@@ -20,7 +20,7 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL Release)
 endif()
 
 set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static_lib-1.17.1.tar.bz2")
 set(onnxruntime_HASH "SHA256=52375d3fabc7b437c955a664bfeb9cb7a6391f5219c4b7d3b87ff690416d4b9e")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime-win-x86.cmake b/cmake/onnxruntime-win-x86.cmake
index 99ed71653a..cd8248300f 100644
--- a/cmake/onnxruntime-win-x86.cmake
+++ b/cmake/onnxruntime-win-x86.cmake
@@ -16,7 +16,7 @@ if(NOT BUILD_SHARED_LIBS)
 endif()
 
 set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x86-1.17.1.zip")
-set(onnxruntime_URL2 "https://hub.nuaa.cf/microsoft/onnxruntime/releases/download/v1.17.1/onnxruntime-win-x86-1.17.1.zip")
+set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-1.17.1.zip")
 set(onnxruntime_HASH "SHA256=9404130825474bd36b2538ed925d6b5f2cf1fb6a443f3e125054ae3470019291")
 
 # If you don't have access to the Internet,
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 6655b45cd3..6ed15c29cb 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -13,7 +13,9 @@ function(download_onnxruntime)
       include(onnxruntime-linux-riscv64-static)
     endif()
   elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
-    if(BUILD_SHARED_LIBS)
+    if(SHERPA_ONNX_ENABLE_GPU)
+      include(onnxruntime-linux-aarch64-gpu)
+    elseif(BUILD_SHARED_LIBS)
       include(onnxruntime-linux-aarch64)
     else()
       include(onnxruntime-linux-aarch64-static)
@@ -89,10 +91,11 @@ function(download_onnxruntime)
       endif()
     elseif(CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64)
       # for 64-bit windows (arm64)
-      if(NOT BUILD_SHARED_LIBS)
-        message(FATAL_ERROR "Please pass -DBUILD_SHARED_LIBS=ON to cmake")
+      if(BUILD_SHARED_LIBS)
+        include(onnxruntime-win-arm64)
+      else()
+        include(onnxruntime-win-arm64-static)
       endif()
-      include(onnxruntime-win-arm64)
     else()
       # for 64-bit windows (x64)
       if(SHERPA_ONNX_ENABLE_DIRECTML)
@@ -149,6 +152,8 @@ if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE)
   if(DEFINED ENV{SHERPA_ONNXRUNTIME_LIB_DIR})
     if(APPLE)
       set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib)
+    elseif(WIN32)
+      set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/onnxruntime.lib)
     else()
       set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so)
     endif()
@@ -195,6 +200,7 @@ if(location_onnxruntime_header_dir AND location_onnxruntime_lib)
     add_library(onnxruntime SHARED IMPORTED)
     set_target_properties(onnxruntime PROPERTIES
       IMPORTED_LOCATION ${location_onnxruntime_lib}
+      IMPORTED_IMPLIB ${location_onnxruntime_lib}
       INTERFACE_INCLUDE_DIRECTORIES "${location_onnxruntime_header_dir}"
     )
     if(SHERPA_ONNX_ENABLE_GPU AND location_onnxruntime_cuda_lib)
diff --git a/cmake/openfst.cmake b/cmake/openfst.cmake
index 0f5863b7c9..2309c2fbe0 100644
--- a/cmake/openfst.cmake
+++ b/cmake/openfst.cmake
@@ -4,7 +4,7 @@ function(download_openfst)
   include(FetchContent)
 
   set(openfst_URL  "https://github.com/csukuangfj/openfst/archive/refs/tags/sherpa-onnx-2024-06-19.tar.gz")
-  set(openfst_URL2 "https://hub.nuaa.cf/csukuangfj/openfst/archive/refs/tags/sherpa-onnx-2024-06-19.tar.gz")
+  set(openfst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/openfst-sherpa-onnx-2024-06-19.tar.gz")
   set(openfst_HASH "SHA256=5c98e82cc509c5618502dde4860b8ea04d843850ed57e6d6b590b644b268853d")
 
   # If you don't have access to the Internet,
diff --git a/cmake/piper-phonemize.cmake b/cmake/piper-phonemize.cmake
index 7ecf1791b1..0e11fd1762 100644
--- a/cmake/piper-phonemize.cmake
+++ b/cmake/piper-phonemize.cmake
@@ -1,18 +1,18 @@
 function(download_piper_phonemize)
   include(FetchContent)
 
-  set(piper_phonemize_URL  "https://github.com/csukuangfj/piper-phonemize/archive/dc6b5f4441bffe521047086930b0fc12686acd56.zip")
-  set(piper_phonemize_URL2 "https://hub.nuaa.cf/csukuangfj/piper-phonemize/archive/dc6b5f4441bffe521047086930b0fc12686acd56.zip")
-  set(piper_phonemize_HASH "SHA256=b9faa04204b1756fa455a962abb1f037041c040133d55be58d11f11ab9b3ce14")
+  set(piper_phonemize_URL  "https://github.com/csukuangfj/piper-phonemize/archive/78a788e0b719013401572d70fef372e77bff8e43.zip")
+  set(piper_phonemize_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip")
+  set(piper_phonemize_HASH "SHA256=89641a46489a4898754643ce57bda9c9b54b4ca46485fdc02bf0dc84b866645d")
 
   # If you don't have access to the Internet,
   # please pre-download kaldi-decoder
   set(possible_file_locations
-    $ENV{HOME}/Downloads/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
-    ${CMAKE_SOURCE_DIR}/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
-    ${CMAKE_BINARY_DIR}/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
-    /tmp/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
-    /star-fj/fangjun/download/github/piper-phonemize-dc6b5f4441bffe521047086930b0fc12686acd56.zip
+    $ENV{HOME}/Downloads/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+    ${CMAKE_SOURCE_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+    ${CMAKE_BINARY_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+    /tmp/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
+    /star-fj/fangjun/download/github/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
   )
 
   foreach(f IN LISTS possible_file_locations)
diff --git a/cmake/pybind11.cmake b/cmake/pybind11.cmake
index 0d4894eff6..bc06a3d1c8 100644
--- a/cmake/pybind11.cmake
+++ b/cmake/pybind11.cmake
@@ -1,18 +1,18 @@
 function(download_pybind11)
   include(FetchContent)
 
-  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz")
-  set(pybind11_URL2 "https://hub.nuaa.cf/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz")
-  set(pybind11_HASH "SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae")
+  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz")
+  set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz")
+  set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7")
 
   # If you don't have access to the Internet,
   # please pre-download pybind11
   set(possible_file_locations
-    $ENV{HOME}/Downloads/pybind11-2.10.2.tar.gz
-    ${CMAKE_SOURCE_DIR}/pybind11-2.10.2.tar.gz
-    ${CMAKE_BINARY_DIR}/pybind11-2.10.2.tar.gz
-    /tmp/pybind11-2.10.2.tar.gz
-    /star-fj/fangjun/download/github/pybind11-2.10.2.tar.gz
+    $ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz
+    ${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz
+    ${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz
+    /tmp/pybind11-2.12.0.tar.gz
+    /star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz
   )
 
   foreach(f IN LISTS possible_file_locations)
diff --git a/cmake/simple-sentencepiece.cmake b/cmake/simple-sentencepiece.cmake
index 09a640b11b..4b6750d0fc 100644
--- a/cmake/simple-sentencepiece.cmake
+++ b/cmake/simple-sentencepiece.cmake
@@ -2,7 +2,7 @@ function(download_simple_sentencepiece)
   include(FetchContent)
 
   set(simple-sentencepiece_URL  "https://github.com/pkufool/simple-sentencepiece/archive/refs/tags/v0.7.tar.gz")
-  set(simple-sentencepiece_URL2 "https://hub.nuaa.cf/pkufool/simple-sentencepiece/archive/refs/tags/v0.7.tar.gz")
+  set(simple-sentencepiece_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/simple-sentencepiece-0.7.tar.gz")
   set(simple-sentencepiece_HASH "SHA256=1748a822060a35baa9f6609f84efc8eb54dc0e74b9ece3d82367b7119fdc75af")
 
   # If you don't have access to the Internet,
diff --git a/cmake/websocketpp.cmake b/cmake/websocketpp.cmake
index 6ae9b89a3b..79b0585be4 100644
--- a/cmake/websocketpp.cmake
+++ b/cmake/websocketpp.cmake
@@ -3,7 +3,7 @@ function(download_websocketpp)
 
   # The latest commit on the develop branch os as 2022-10-22
   set(websocketpp_URL  "https://github.com/zaphoyd/websocketpp/archive/b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
-  set(websocketpp_URL2  "https://hub.nuaa.cf/zaphoyd/websocketpp/archive/b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
+  set(websocketpp_URL2  "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
   set(websocketpp_HASH "SHA256=1385135ede8191a7fbef9ec8099e3c5a673d48df0c143958216cd1690567f583")
 
   # If you don't have access to the Internet,
diff --git a/cxx-api-examples/CMakeLists.txt b/cxx-api-examples/CMakeLists.txt
new file mode 100644
index 0000000000..fe21d580c8
--- /dev/null
+++ b/cxx-api-examples/CMakeLists.txt
@@ -0,0 +1,33 @@
+include_directories(${CMAKE_SOURCE_DIR})
+
+add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
+target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(kws-cxx-api ./kws-cxx-api.cc)
+target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc)
+target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
+target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(moonshine-cxx-api ./moonshine-cxx-api.cc)
+target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
+
+add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
+target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
+
+if(SHERPA_ONNX_ENABLE_TTS)
+  add_executable(matcha-tts-zh-cxx-api ./matcha-tts-zh-cxx-api.cc)
+  target_link_libraries(matcha-tts-zh-cxx-api sherpa-onnx-cxx-api)
+
+  add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
+  target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)
+
+  add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc)
+  target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api)
+
+  add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc)
+  target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api)
+endif()
diff --git a/cxx-api-examples/kokoro-tts-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-en-cxx-api.cc
new file mode 100644
index 0000000000..66b28f03ef
--- /dev/null
+++ b/cxx-api-examples/kokoro-tts-en-cxx-api.cc
@@ -0,0 +1,73 @@
+// cxx-api-examples/kokoro-tts-en-cxx-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+./kokoro-tts-en-cxx-api
+
+ */
+// clang-format on
+
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress, void *arg) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineTtsConfig config;
+
+  config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
+  config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
+  config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
+  config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
+
+  config.model.num_threads = 2;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  std::string filename = "./generated-kokoro-en-cxx.wav";
+  std::string text =
+      "Today as always, men fall into two groups: slaves and free men. Whoever "
+      "does not have two-thirds of his day for himself, is a slave, whatever "
+      "he may be: a statesman, a businessman, an official, or a scholar. "
+      "Friends fell out often because life was changing so fast. The easiest "
+      "thing in the world was to lose touch with someone.";
+
+  auto tts = OfflineTts::Create(config);
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+  GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+  WriteWave(filename, {audio.samples, audio.sample_rate});
+
+  fprintf(stderr, "Input text is: %s\n", text.c_str());
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+  return 0;
+}
diff --git a/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
new file mode 100644
index 0000000000..c0228ad993
--- /dev/null
+++ b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
@@ -0,0 +1,74 @@
+// cxx-api-examples/kokoro-tts-zh-en-cxx-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese + English TTS with Kokoro.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+./kokoro-tts-zh-en-cxx-api
+
+ */
+// clang-format on
+
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress, void *arg) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineTtsConfig config;
+
+  config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
+  config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
+  config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+  config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+  config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
+  config.model.kokoro.lexicon =
+      "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
+      "lexicon-zh.txt";
+
+  config.model.num_threads = 2;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  std::string filename = "./generated-kokoro-zh-en-cxx.wav";
+  std::string text =
+      "中英文语音合成测试。This is generated by next generation Kaldi using "
+      "Kokoro without Misaki. 你觉得中英文说的如何呢？";
+
+  auto tts = OfflineTts::Create(config);
+  int32_t sid = 50;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+  GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+  WriteWave(filename, {audio.samples, audio.sample_rate});
+
+  fprintf(stderr, "Input text is: %s\n", text.c_str());
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+  return 0;
+}
diff --git a/cxx-api-examples/kws-cxx-api.cc b/cxx-api-examples/kws-cxx-api.cc
new file mode 100644
index 0000000000..12dc8d9cf0
--- /dev/null
+++ b/cxx-api-examples/kws-cxx-api.cc
@@ -0,0 +1,143 @@
+// cxx-api-examples/kws-cxx-api.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file demonstrates how to use keywords spotter with sherpa-onnx's C
+// clang-format off
+//
+// Usage
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
+//
+// ./kws-cxx-api
+//
+// clang-format on
+#include <array>
+#include <iostream>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+
+  KeywordSpotterConfig config;
+  config.model_config.transducer.encoder =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+  config.model_config.transducer.decoder =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+
+  config.model_config.transducer.joiner =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
+
+  config.model_config.tokens =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "tokens.txt";
+
+  config.model_config.provider = "cpu";
+  config.model_config.num_threads = 1;
+  config.model_config.debug = 1;
+
+  config.keywords_file =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "test_wavs/test_keywords.txt";
+
+  KeywordSpotter kws = KeywordSpotter::Create(config);
+  if (!kws.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+
+  std::cout
+      << "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n";
+
+  std::string wave_filename =
+      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
+      "test_wavs/3.wav";
+
+  std::array<float, 8000> tail_paddings = {0};  // 0.5 seconds
+
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  OnlineStream stream = kws.CreateStream();
+  if (!stream.Get()) {
+    std::cerr << "Failed to create stream\n";
+    return -1;
+  }
+
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+                        tail_paddings.size());
+  stream.InputFinished();
+
+  while (kws.IsReady(&stream)) {
+    kws.Decode(&stream);
+    auto r = kws.GetResult(&stream);
+    if (!r.keyword.empty()) {
+      std::cout << "Detected keyword: " << r.json << "\n";
+
+      // Remember to reset the keyword stream right after a keyword is detected
+      kws.Reset(&stream);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+
+  std::cout << "--Use pre-defined keywords + add a new keyword--\n";
+
+  stream = kws.CreateStream("y ǎn y uán @演员");
+
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+                        tail_paddings.size());
+  stream.InputFinished();
+
+  while (kws.IsReady(&stream)) {
+    kws.Decode(&stream);
+    auto r = kws.GetResult(&stream);
+    if (!r.keyword.empty()) {
+      std::cout << "Detected keyword: " << r.json << "\n";
+
+      // Remember to reset the keyword stream right after a keyword is detected
+      kws.Reset(&stream);
+    }
+  }
+
+  // --------------------------------------------------------------------------
+
+  std::cout << "--Use pre-defined keywords + add two new keywords--\n";
+
+  stream = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");
+
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
+                        tail_paddings.size());
+  stream.InputFinished();
+
+  while (kws.IsReady(&stream)) {
+    kws.Decode(&stream);
+    auto r = kws.GetResult(&stream);
+    if (!r.keyword.empty()) {
+      std::cout << "Detected keyword: " << r.json << "\n";
+
+      // Remember to reset the keyword stream right after a keyword is detected
+      kws.Reset(&stream);
+    }
+  }
+  return 0;
+}
diff --git a/cxx-api-examples/matcha-tts-en-cxx-api.cc b/cxx-api-examples/matcha-tts-en-cxx-api.cc
new file mode 100644
index 0000000000..ef4187d060
--- /dev/null
+++ b/cxx-api-examples/matcha-tts-en-cxx-api.cc
@@ -0,0 +1,80 @@
+// cxx-api-examples/matcha-tts-en-cxx-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-en-cxx-api
+
+ */
+// clang-format on
+
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress, void *arg) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineTtsConfig config;
+
+  config.model.matcha.acoustic_model =
+      "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
+
+  config.model.matcha.vocoder = "./hifigan_v2.onnx";
+
+  config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
+
+  config.model.matcha.data_dir =
+      "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
+
+  config.model.num_threads = 1;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  std::string filename = "./generated-matcha-en-cxx.wav";
+  std::string text =
+      "Today as always, men fall into two groups: slaves and free men. Whoever "
+      "does not have two-thirds of his day for himself, is a slave, whatever "
+      "he may be: a statesman, a businessman, an official, or a scholar. "
+      "Friends fell out often because life was changing so fast. The easiest "
+      "thing in the world was to lose touch with someone.";
+
+  auto tts = OfflineTts::Create(config);
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+  GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+  WriteWave(filename, {audio.samples, audio.sample_rate});
+
+  fprintf(stderr, "Input text is: %s\n", text.c_str());
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+  return 0;
+}
diff --git a/cxx-api-examples/matcha-tts-zh-cxx-api.cc b/cxx-api-examples/matcha-tts-zh-cxx-api.cc
new file mode 100644
index 0000000000..f63065994f
--- /dev/null
+++ b/cxx-api-examples/matcha-tts-zh-cxx-api.cc
@@ -0,0 +1,79 @@
+// cxx-api-examples/matcha-tts-zh-cxx-api.c
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx CXX API
+// for Chinese TTS with MatchaTTS.
+//
+// clang-format off
+/*
+Usage
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+./matcha-tts-zh-cxx-api
+
+ */
+// clang-format on
+
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+static int32_t ProgressCallback(const float *samples, int32_t num_samples,
+                                float progress, void *arg) {
+  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
+  // return 1 to continue generating
+  // return 0 to stop generating
+  return 1;
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineTtsConfig config;
+  config.model.matcha.acoustic_model =
+      "./matcha-icefall-zh-baker/model-steps-3.onnx";
+  config.model.matcha.vocoder = "./hifigan_v2.onnx";
+  config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
+  config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
+  config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
+  config.model.num_threads = 1;
+
+  // If you don't want to see debug messages, please set it to 0
+  config.model.debug = 1;
+
+  // clang-format off
+  config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";  // NOLINT
+  // clang-format on
+
+  std::string filename = "./generated-matcha-zh-cxx.wav";
+  std::string text =
+      "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如"
+      "涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感"
+      "受着生命的奇迹与温柔."
+      "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; "
+      "经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。";
+
+  auto tts = OfflineTts::Create(config);
+  int32_t sid = 0;
+  float speed = 1.0;  // larger -> faster in speech speed
+
+#if 0
+  // If you don't want to use a callback, then please enable this branch
+  GeneratedAudio audio = tts.Generate(text, sid, speed);
+#else
+  GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
+#endif
+
+  WriteWave(filename, {audio.samples, audio.sample_rate});
+
+  fprintf(stderr, "Input text is: %s\n", text.c_str());
+  fprintf(stderr, "Speaker ID is is: %d\n", sid);
+  fprintf(stderr, "Saved to: %s\n", filename.c_str());
+
+  return 0;
+}
diff --git a/cxx-api-examples/moonshine-cxx-api.cc b/cxx-api-examples/moonshine-cxx-api.cc
new file mode 100644
index 0000000000..c2ce565c3b
--- /dev/null
+++ b/cxx-api-examples/moonshine-cxx-api.cc
@@ -0,0 +1,81 @@
+// cxx-api-examples/moonshine-cxx-api.cc
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use Moonshine with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+//
+// clang-format on
+
+#include <chrono>  // NOLINT
+#include <iostream>
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineRecognizerConfig config;
+
+  config.model_config.moonshine.preprocessor =
+      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+  config.model_config.moonshine.encoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+  config.model_config.moonshine.uncached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+  config.model_config.moonshine.cached_decoder =
+      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+  config.model_config.tokens =
+      "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+  config.model_config.num_threads = 1;
+
+  std::cout << "Loading model\n";
+  OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+  if (!recongizer.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+  std::cout << "Loading model done\n";
+
+  std::string wave_filename =
+      "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  std::cout << "Start recognition\n";
+  const auto begin = std::chrono::steady_clock::now();
+
+  OfflineStream stream = recongizer.CreateStream();
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  recongizer.Decode(&stream);
+
+  OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+  const auto end = std::chrono::steady_clock::now();
+  const float elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+          .count() /
+      1000.;
+  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
+  float rtf = elapsed_seconds / duration;
+
+  std::cout << "text: " << result.text << "\n";
+  printf("Number of threads: %d\n", config.model_config.num_threads);
+  printf("Duration: %.3fs\n", duration);
+  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+         duration, rtf);
+
+  return 0;
+}
diff --git a/cxx-api-examples/sense-voice-cxx-api.cc b/cxx-api-examples/sense-voice-cxx-api.cc
new file mode 100644
index 0000000000..ea642b9802
--- /dev/null
+++ b/cxx-api-examples/sense-voice-cxx-api.cc
@@ -0,0 +1,78 @@
+// cxx-api-examples/sense-voice-cxx-api.cc
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+//
+// clang-format on
+
+#include <chrono>  // NOLINT
+#include <iostream>
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineRecognizerConfig config;
+
+  config.model_config.sense_voice.model =
+      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
+  config.model_config.sense_voice.use_itn = true;
+  config.model_config.sense_voice.language = "auto";
+  config.model_config.tokens =
+      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
+
+  config.model_config.num_threads = 1;
+
+  std::cout << "Loading model\n";
+  OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+  if (!recongizer.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+  std::cout << "Loading model done\n";
+
+  std::string wave_filename =
+      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav";
+
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  std::cout << "Start recognition\n";
+  const auto begin = std::chrono::steady_clock::now();
+
+  OfflineStream stream = recongizer.CreateStream();
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  recongizer.Decode(&stream);
+
+  OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+  const auto end = std::chrono::steady_clock::now();
+  const float elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+          .count() /
+      1000.;
+  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
+  float rtf = elapsed_seconds / duration;
+
+  std::cout << "text: " << result.text << "\n";
+  printf("Number of threads: %d\n", config.model_config.num_threads);
+  printf("Duration: %.3fs\n", duration);
+  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+         duration, rtf);
+
+  return 0;
+}
diff --git a/cxx-api-examples/streaming-zipformer-cxx-api.cc b/cxx-api-examples/streaming-zipformer-cxx-api.cc
new file mode 100644
index 0000000000..ac4abc4796
--- /dev/null
+++ b/cxx-api-examples/streaming-zipformer-cxx-api.cc
@@ -0,0 +1,93 @@
+// cxx-api-examples/streaming-zipformer-cxx-api.cc
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use streaming Zipformer
+// with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+//
+// clang-format on
+
+#include <chrono>  // NOLINT
+#include <iostream>
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OnlineRecognizerConfig config;
+
+  // please see
+  // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
+  config.model_config.transducer.encoder =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "encoder-epoch-99-avg-1.int8.onnx";
+
+  // Note: We recommend not using int8.onnx for the decoder.
+  config.model_config.transducer.decoder =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "decoder-epoch-99-avg-1.onnx";
+
+  config.model_config.transducer.joiner =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "joiner-epoch-99-avg-1.int8.onnx";
+
+  config.model_config.tokens =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
+
+  config.model_config.num_threads = 1;
+
+  std::cout << "Loading model\n";
+  OnlineRecognizer recongizer = OnlineRecognizer::Create(config);
+  if (!recongizer.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+  std::cout << "Loading model done\n";
+
+  std::string wave_filename =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
+      "0.wav";
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  std::cout << "Start recognition\n";
+  const auto begin = std::chrono::steady_clock::now();
+
+  OnlineStream stream = recongizer.CreateStream();
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+  stream.InputFinished();
+
+  while (recongizer.IsReady(&stream)) {
+    recongizer.Decode(&stream);
+  }
+
+  OnlineRecognizerResult result = recongizer.GetResult(&stream);
+
+  const auto end = std::chrono::steady_clock::now();
+  const float elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+          .count() /
+      1000.;
+  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
+  float rtf = elapsed_seconds / duration;
+
+  std::cout << "text: " << result.text << "\n";
+  printf("Number of threads: %d\n", config.model_config.num_threads);
+  printf("Duration: %.3fs\n", duration);
+  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+         duration, rtf);
+
+  return 0;
+}
diff --git a/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc b/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
new file mode 100644
index 0000000000..2e74d30bec
--- /dev/null
+++ b/cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
@@ -0,0 +1,132 @@
+// cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use streaming Zipformer
+// with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// cd /path/sherpa-onnx/
+// mkdir build
+// cd build
+// cmake ..
+// make
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+//
+// #  1. Test on CPU, run once
+//
+// ./bin/streaming-zipformer-rtf-cxx-api
+//
+// #  2. Test on CPU, run 10 times
+//
+// ./bin/streaming-zipformer-rtf-cxx-api 10
+//
+// #  3. Test on GPU, run 10 times
+//
+// ./bin/streaming-zipformer-rtf-cxx-api 10 cuda
+//
+// clang-format on
+
+#include <chrono>  // NOLINT
+#include <iostream>
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main(int argc, char *argv[]) {
+  int32_t num_runs = 1;
+  if (argc >= 2) {
+    num_runs = atoi(argv[1]);
+    if (num_runs < 0) {
+      num_runs = 1;
+    }
+  }
+
+  bool use_gpu = (argc == 3);
+
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OnlineRecognizerConfig config;
+
+  // please see
+  // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
+  config.model_config.transducer.encoder =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "encoder-epoch-99-avg-1.int8.onnx";
+
+  // Note: We recommend not using int8.onnx for the decoder.
+  config.model_config.transducer.decoder =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "decoder-epoch-99-avg-1.onnx";
+
+  config.model_config.transducer.joiner =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
+      "joiner-epoch-99-avg-1.int8.onnx";
+
+  config.model_config.tokens =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
+
+  config.model_config.num_threads = 1;
+  config.model_config.provider = use_gpu ? "cuda" : "cpu";
+
+  std::cout << "Loading model\n";
+  OnlineRecognizer recongizer = OnlineRecognizer::Create(config);
+  if (!recongizer.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+  std::cout << "Loading model done\n";
+
+  std::string wave_filename =
+      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
+      "0.wav";
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  std::cout << "Start recognition\n";
+  float total_elapsed_seconds = 0;
+  OnlineRecognizerResult result;
+  for (int32_t i = 0; i < num_runs; ++i) {
+    const auto begin = std::chrono::steady_clock::now();
+
+    OnlineStream stream = recongizer.CreateStream();
+    stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                          wave.samples.size());
+    stream.InputFinished();
+
+    while (recongizer.IsReady(&stream)) {
+      recongizer.Decode(&stream);
+    }
+
+    result = recongizer.GetResult(&stream);
+
+    auto end = std::chrono::steady_clock::now();
+    float elapsed_seconds =
+        std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+            .count() /
+        1000.;
+    printf("Run %d/%d, elapsed seconds: %.3f\n", i, num_runs, elapsed_seconds);
+    total_elapsed_seconds += elapsed_seconds;
+  }
+  float average_elapsed_secodns = total_elapsed_seconds / num_runs;
+  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
+  float rtf = total_elapsed_seconds / num_runs / duration;
+
+  std::cout << "text: " << result.text << "\n";
+  printf("Number of threads: %d\n", config.model_config.num_threads);
+  printf("Duration: %.3fs\n", duration);
+  printf("Total Elapsed seconds: %.3fs\n", total_elapsed_seconds);
+  printf("Num runs: %d\n", num_runs);
+  printf("Elapsed seconds per run: %.3f/%d=%.3f\n", total_elapsed_seconds,
+         num_runs, average_elapsed_secodns);
+  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n",
+         average_elapsed_secodns, duration, rtf);
+
+  return 0;
+}
diff --git a/cxx-api-examples/whisper-cxx-api.cc b/cxx-api-examples/whisper-cxx-api.cc
new file mode 100644
index 0000000000..348d115bd3
--- /dev/null
+++ b/cxx-api-examples/whisper-cxx-api.cc
@@ -0,0 +1,76 @@
+// cxx-api-examples/whisper-cxx-api.cc
+// Copyright (c)  2024  Xiaomi Corporation
+
+//
+// This file demonstrates how to use whisper with sherpa-onnx's C++ API.
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+// rm sherpa-onnx-whisper-tiny.en.tar.bz2
+//
+// clang-format on
+
+#include <chrono>  // NOLINT
+#include <iostream>
+#include <string>
+
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+int32_t main() {
+  using namespace sherpa_onnx::cxx;  // NOLINT
+  OfflineRecognizerConfig config;
+
+  config.model_config.whisper.encoder =
+      "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
+  config.model_config.whisper.decoder =
+      "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
+  config.model_config.tokens =
+      "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";
+
+  config.model_config.num_threads = 1;
+
+  std::cout << "Loading model\n";
+  OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
+  if (!recongizer.Get()) {
+    std::cerr << "Please check your config\n";
+    return -1;
+  }
+  std::cout << "Loading model done\n";
+
+  std::string wave_filename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav";
+  Wave wave = ReadWave(wave_filename);
+  if (wave.samples.empty()) {
+    std::cerr << "Failed to read: '" << wave_filename << "'\n";
+    return -1;
+  }
+
+  std::cout << "Start recognition\n";
+  const auto begin = std::chrono::steady_clock::now();
+
+  OfflineStream stream = recongizer.CreateStream();
+  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
+                        wave.samples.size());
+
+  recongizer.Decode(&stream);
+
+  OfflineRecognizerResult result = recongizer.GetResult(&stream);
+
+  const auto end = std::chrono::steady_clock::now();
+  const float elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+          .count() /
+      1000.;
+  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
+  float rtf = elapsed_seconds / duration;
+
+  std::cout << "text: " << result.text << "\n";
+  printf("Number of threads: %d\n", config.model_config.num_threads);
+  printf("Duration: %.3fs\n", duration);
+  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
+  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
+         duration, rtf);
+
+  return 0;
+}
diff --git a/dart-api-examples/README.md b/dart-api-examples/README.md
index 9370372e79..3d66cb04ea 100644
--- a/dart-api-examples/README.md
+++ b/dart-api-examples/README.md
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
 
 | Directory | Description |
 |-----------|-------------|
+| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
 | [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
 | [./audio-tagging](./audio-tagging)| Example for audio tagging.|
 | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
diff --git a/dart-api-examples/add-punctuations/pubspec.yaml b/dart-api-examples/add-punctuations/pubspec.yaml
index 9c03139c3b..801744d95d 100644
--- a/dart-api-examples/add-punctuations/pubspec.yaml
+++ b/dart-api-examples/add-punctuations/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/audio-tagging/pubspec.yaml b/dart-api-examples/audio-tagging/pubspec.yaml
index bae6aa72cb..fb470975a1 100644
--- a/dart-api-examples/audio-tagging/pubspec.yaml
+++ b/dart-api-examples/audio-tagging/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart b/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
index ebef1fd7c5..47d587989f 100644
--- a/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
+++ b/dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
@@ -73,6 +73,8 @@ void main(List<String> arguments) async {
       spotter.decode(stream);
       final result = spotter.getResult(stream);
       if (result.keyword != '') {
+        // Remember to reset the stream right after detecting a keyword
+        spotter.reset(stream);
         print('Detected: ${result.keyword}');
       }
     }
diff --git a/dart-api-examples/keyword-spotter/pubspec.yaml b/dart-api-examples/keyword-spotter/pubspec.yaml
index eeae130c88..cd86f374d6 100644
--- a/dart-api-examples/keyword-spotter/pubspec.yaml
+++ b/dart-api-examples/keyword-spotter/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   # sherpa_onnx:
   #   path: ../../flutter/sherpa_onnx
   path: ^1.9.0
diff --git a/dart-api-examples/non-streaming-asr/bin/moonshine.dart b/dart-api-examples/non-streaming-asr/bin/moonshine.dart
new file mode 100644
index 0000000000..68b653648e
--- /dev/null
+++ b/dart-api-examples/non-streaming-asr/bin/moonshine.dart
@@ -0,0 +1,69 @@
+// Copyright (c)  2024  Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('preprocessor',
+        help: 'Path to the moonshine preprocessor model')
+    ..addOption('encoder', help: 'Path to the moonshine encoder model')
+    ..addOption('uncached-decoder',
+        help: 'Path to moonshine uncached decoder model')
+    ..addOption('cached-decoder',
+        help: 'Path to moonshine cached decoder model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+  final res = parser.parse(arguments);
+  if (res['preprocessor'] == null ||
+      res['encoder'] == null ||
+      res['uncached-decoder'] == null ||
+      res['cached-decoder'] == null ||
+      res['tokens'] == null ||
+      res['input-wav'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+
+  final preprocessor = res['preprocessor'] as String;
+  final encoder = res['encoder'] as String;
+  final uncachedDecoder = res['uncached-decoder'] as String;
+  final cachedDecoder = res['cached-decoder'] as String;
+  final tokens = res['tokens'] as String;
+  final inputWav = res['input-wav'] as String;
+
+  final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
+    preprocessor: preprocessor,
+    encoder: encoder,
+    uncachedDecoder: uncachedDecoder,
+    cachedDecoder: cachedDecoder,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineModelConfig(
+    moonshine: moonshine,
+    tokens: tokens,
+    debug: false,
+    numThreads: 1,
+  );
+  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+  final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+  final waveData = sherpa_onnx.readWave(inputWav);
+  final stream = recognizer.createStream();
+
+  stream.acceptWaveform(
+      samples: waveData.samples, sampleRate: waveData.sampleRate);
+  recognizer.decode(stream);
+
+  final result = recognizer.getResult(stream);
+  print(result.text);
+
+  stream.free();
+  recognizer.free();
+}
diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml
index d348b84515..fd93095daf 100644
--- a/dart-api-examples/non-streaming-asr/pubspec.yaml
+++ b/dart-api-examples/non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/non-streaming-asr/run-moonshine.sh b/dart-api-examples/non-streaming-asr/run-moonshine.sh
new file mode 100755
index 0000000000..213a230d0e
--- /dev/null
+++ b/dart-api-examples/non-streaming-asr/run-moonshine.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+dart run \
+  ./bin/moonshine.dart \
+  --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  --input-wav ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
diff --git a/dart-api-examples/speaker-diarization/.gitignore b/dart-api-examples/speaker-diarization/.gitignore
new file mode 100644
index 0000000000..3a85790408
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/.gitignore
@@ -0,0 +1,3 @@
+# https://dart.dev/guides/libraries/private-files
+# Created by `dart pub`
+.dart_tool/
diff --git a/dart-api-examples/speaker-diarization/CHANGELOG.md b/dart-api-examples/speaker-diarization/CHANGELOG.md
new file mode 100644
index 0000000000..effe43c82c
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+- Initial version.
diff --git a/dart-api-examples/speaker-diarization/README.md b/dart-api-examples/speaker-diarization/README.md
new file mode 100644
index 0000000000..d4d8c4fd27
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/README.md
@@ -0,0 +1,7 @@
+# Introduction
+
+This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
+
+# Usage
+
+Please see [./run.sh](./run.sh)
diff --git a/dart-api-examples/speaker-diarization/analysis_options.yaml b/dart-api-examples/speaker-diarization/analysis_options.yaml
new file mode 100644
index 0000000000..dee8927aaf
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/analysis_options.yaml
@@ -0,0 +1,30 @@
+# This file configures the static analysis results for your project (errors,
+# warnings, and lints).
+#
+# This enables the 'recommended' set of lints from `package:lints`.
+# This set helps identify many issues that may lead to problems when running
+# or consuming Dart code, and enforces writing Dart using a single, idiomatic
+# style and format.
+#
+# If you want a smaller set of lints you can change this to specify
+# 'package:lints/core.yaml'. These are just the most critical lints
+# (the recommended set includes the core lints).
+# The core lints are also what is used by pub.dev for scoring packages.
+
+include: package:lints/recommended.yaml
+
+# Uncomment the following section to specify additional rules.
+
+# linter:
+#   rules:
+#     - camel_case_types
+
+# analyzer:
+#   exclude:
+#     - path/to/excluded/files/**
+
+# For more information about the core and recommended set of lints, see
+# https://dart.dev/go/core-lints
+
+# For additional information about configuring this file, see
+# https://dart.dev/guides/language/analysis-options
diff --git a/dart-api-examples/speaker-diarization/bin/init.dart b/dart-api-examples/speaker-diarization/bin/init.dart
new file mode 120000
index 0000000000..48508cfd39
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/bin/init.dart
@@ -0,0 +1 @@
+../../vad/bin/init.dart
\ No newline at end of file
diff --git a/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
new file mode 100644
index 0000000000..760adc8680
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
@@ -0,0 +1,100 @@
+// Copyright (c)  2024  Xiaomi Corporation
+import 'dart:io';
+import 'dart:typed_data';
+import 'dart:ffi';
+
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  /* Please use the following commands to download files used in this file
+    Step 1: Download a speaker segmentation model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+    Step 2: Download a speaker embedding extractor model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+    Step 3. Download test wave files
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available test wave files. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+    Step 4. Run it
+        */
+
+  final segmentationModel =
+      "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+
+  final embeddingModel =
+      "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+  final waveFilename = "./0-four-speakers-zh.wav";
+
+  final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
+    pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
+        model: segmentationModel),
+  );
+
+  final embeddingConfig =
+      sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
+
+  // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
+  // numClusters to 4. If you don't know the exact number, please set it to -1.
+  // in that case, you have to set threshold. A larger threshold leads to
+  // fewer clusters, i.e., fewer speakers.
+  final clusteringConfig =
+      sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
+
+  var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
+      segmentation: segmentationConfig,
+      embedding: embeddingConfig,
+      clustering: clusteringConfig,
+      minDurationOn: 0.2,
+      minDurationOff: 0.5);
+
+  final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
+  if (sd.ptr == nullptr) {
+    return;
+  }
+
+  final waveData = sherpa_onnx.readWave(waveFilename);
+  if (sd.sampleRate != waveData.sampleRate) {
+    print(
+        'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
+    return;
+  }
+
+  print('started');
+
+  // Use the following statement if you don't want to use a callback
+  // final segments = sd.process(samples: waveData.samples);
+
+  final segments = sd.processWithCallback(
+      samples: waveData.samples,
+      callback: (int numProcessedChunk, int numTotalChunks) {
+        final progress = 100.0 * numProcessedChunk / numTotalChunks;
+
+        print('Progress ${progress.toStringAsFixed(2)}%');
+
+        return 0;
+      });
+
+  for (int i = 0; i < segments.length; ++i) {
+    print(
+        '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)}  speaker_${segments[i].speaker}');
+  }
+}
diff --git a/dart-api-examples/speaker-diarization/pubspec.yaml b/dart-api-examples/speaker-diarization/pubspec.yaml
new file mode 100644
index 0000000000..7f18b469cb
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/pubspec.yaml
@@ -0,0 +1,17 @@
+name: speaker_diarization
+description: >
+  This example demonstrates how to use the Dart API for speaker diarization.
+
+version: 1.0.0
+
+environment:
+  sdk: ">=3.0.0 <4.0.0"
+
+dependencies:
+  sherpa_onnx: ^1.10.42
+  # sherpa_onnx:
+  #   path: ../../flutter/sherpa_onnx
+  path: ^1.9.0
+
+dev_dependencies:
+  lints: ^3.0.0
diff --git a/dart-api-examples/speaker-diarization/run.sh b/dart-api-examples/speaker-diarization/run.sh
new file mode 100755
index 0000000000..7717870dce
--- /dev/null
+++ b/dart-api-examples/speaker-diarization/run.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+dart run ./bin/speaker-diarization.dart
diff --git a/dart-api-examples/speaker-identification/pubspec.yaml b/dart-api-examples/speaker-identification/pubspec.yaml
index fa31c272e2..6608cf8215 100644
--- a/dart-api-examples/speaker-identification/pubspec.yaml
+++ b/dart-api-examples/speaker-identification/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/streaming-asr/pubspec.yaml b/dart-api-examples/streaming-asr/pubspec.yaml
index 24bd1decca..6289e1acca 100644
--- a/dart-api-examples/streaming-asr/pubspec.yaml
+++ b/dart-api-examples/streaming-asr/pubspec.yaml
@@ -11,7 +11,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/tts/bin/kokoro-en.dart b/dart-api-examples/tts/bin/kokoro-en.dart
new file mode 100644
index 0000000000..b92d92883f
--- /dev/null
+++ b/dart-api-examples/tts/bin/kokoro-en.dart
@@ -0,0 +1,86 @@
+// Copyright (c)  2025  Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('model', help: 'Path to the onnx model')
+    ..addOption('voices', help: 'Path to the voices.bin')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption(
+      'data-dir',
+      help: 'Path to espeak-ng-data directory',
+      defaultsTo: '',
+    )
+    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+    ..addOption('text', help: 'Text to generate TTS for')
+    ..addOption('output-wav', help: 'Filename to save the generated audio')
+    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+    ..addOption(
+      'sid',
+      help: 'Speaker ID to select. Used only for multi-speaker TTS',
+      defaultsTo: '0',
+    );
+  final res = parser.parse(arguments);
+  if (res['model'] == null ||
+      res['voices'] == null ||
+      res['tokens'] == null ||
+      res['data-dir'] == null ||
+      res['output-wav'] == null ||
+      res['text'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+  final model = res['model'] as String;
+  final voices = res['voices'] as String;
+  final tokens = res['tokens'] as String;
+  final dataDir = res['data-dir'] as String;
+  final ruleFsts = res['rule-fsts'] as String;
+  final ruleFars = res['rule-fars'] as String;
+  final text = res['text'] as String;
+  final outputWav = res['output-wav'] as String;
+  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+  final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+  if (speed == 0) {
+    speed = 1.0;
+  }
+
+  final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+    model: model,
+    voices: voices,
+    tokens: tokens,
+    dataDir: dataDir,
+    lengthScale: 1 / speed,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+    kokoro: kokoro,
+    numThreads: 1,
+    debug: true,
+  );
+  final config = sherpa_onnx.OfflineTtsConfig(
+    model: modelConfig,
+    maxNumSenetences: 1,
+    ruleFsts: ruleFsts,
+    ruleFars: ruleFars,
+  );
+
+  final tts = sherpa_onnx.OfflineTts(config);
+  final audio = tts.generate(text: text, sid: sid, speed: speed);
+  tts.free();
+
+  sherpa_onnx.writeWave(
+    filename: outputWav,
+    samples: audio.samples,
+    sampleRate: audio.sampleRate,
+  );
+  print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/kokoro-zh-en.dart b/dart-api-examples/tts/bin/kokoro-zh-en.dart
new file mode 100644
index 0000000000..31ee4c49fb
--- /dev/null
+++ b/dart-api-examples/tts/bin/kokoro-zh-en.dart
@@ -0,0 +1,102 @@
+// Copyright (c)  2025  Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('model', help: 'Path to the onnx model')
+    ..addOption('voices', help: 'Path to the voices.bin')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption(
+      'data-dir',
+      help: 'Path to espeak-ng-data directory',
+      defaultsTo: '',
+    )
+    ..addOption(
+      'dict-dir',
+      help: 'Path to dict directory',
+      defaultsTo: '',
+    )
+    ..addOption(
+      'lexicon',
+      help: 'Path to lexicon files',
+      defaultsTo: '',
+    )
+    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+    ..addOption('text', help: 'Text to generate TTS for')
+    ..addOption('output-wav', help: 'Filename to save the generated audio')
+    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+    ..addOption(
+      'sid',
+      help: 'Speaker ID to select. Used only for multi-speaker TTS',
+      defaultsTo: '0',
+    );
+  final res = parser.parse(arguments);
+  if (res['model'] == null ||
+      res['voices'] == null ||
+      res['tokens'] == null ||
+      res['data-dir'] == null ||
+      res['dict-dir'] == null ||
+      res['lexicon'] == null ||
+      res['output-wav'] == null ||
+      res['text'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+  final model = res['model'] as String;
+  final voices = res['voices'] as String;
+  final tokens = res['tokens'] as String;
+  final dataDir = res['data-dir'] as String;
+  final dictDir = res['dict-dir'] as String;
+  final lexicon = res['lexicon'] as String;
+  final ruleFsts = res['rule-fsts'] as String;
+  final ruleFars = res['rule-fars'] as String;
+  final text = res['text'] as String;
+  final outputWav = res['output-wav'] as String;
+  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+  final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+  if (speed == 0) {
+    speed = 1.0;
+  }
+
+  final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+    model: model,
+    voices: voices,
+    tokens: tokens,
+    dataDir: dataDir,
+    lengthScale: 1 / speed,
+    dictDir: dictDir,
+    lexicon: lexicon,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+    kokoro: kokoro,
+    numThreads: 1,
+    debug: true,
+  );
+  final config = sherpa_onnx.OfflineTtsConfig(
+    model: modelConfig,
+    maxNumSenetences: 1,
+    ruleFsts: ruleFsts,
+    ruleFars: ruleFars,
+  );
+
+  final tts = sherpa_onnx.OfflineTts(config);
+  final audio = tts.generate(text: text, sid: sid, speed: speed);
+  tts.free();
+
+  sherpa_onnx.writeWave(
+    filename: outputWav,
+    samples: audio.samples,
+    sampleRate: audio.sampleRate,
+  );
+  print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/matcha-en.dart b/dart-api-examples/tts/bin/matcha-en.dart
new file mode 100644
index 0000000000..fa4c076530
--- /dev/null
+++ b/dart-api-examples/tts/bin/matcha-en.dart
@@ -0,0 +1,86 @@
+// Copyright (c)  2025  Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('acoustic-model', help: 'Path to the acoustic model')
+    ..addOption('vocoder', help: 'Path to the vocoder model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption(
+      'data-dir',
+      help: 'Path to espeak-ng-data directory',
+      defaultsTo: '',
+    )
+    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+    ..addOption('text', help: 'Text to generate TTS for')
+    ..addOption('output-wav', help: 'Filename to save the generated audio')
+    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+    ..addOption(
+      'sid',
+      help: 'Speaker ID to select. Used only for multi-speaker TTS',
+      defaultsTo: '0',
+    );
+  final res = parser.parse(arguments);
+  if (res['acoustic-model'] == null ||
+      res['vocoder'] == null ||
+      res['tokens'] == null ||
+      res['data-dir'] == null ||
+      res['output-wav'] == null ||
+      res['text'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+  final acousticModel = res['acoustic-model'] as String;
+  final vocoder = res['vocoder'] as String;
+  final tokens = res['tokens'] as String;
+  final dataDir = res['data-dir'] as String;
+  final ruleFsts = res['rule-fsts'] as String;
+  final ruleFars = res['rule-fars'] as String;
+  final text = res['text'] as String;
+  final outputWav = res['output-wav'] as String;
+  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+  final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+  if (speed == 0) {
+    speed = 1.0;
+  }
+
+  final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
+    acousticModel: acousticModel,
+    vocoder: vocoder,
+    tokens: tokens,
+    dataDir: dataDir,
+    lengthScale: 1 / speed,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+    matcha: matcha,
+    numThreads: 1,
+    debug: true,
+  );
+  final config = sherpa_onnx.OfflineTtsConfig(
+    model: modelConfig,
+    maxNumSenetences: 1,
+    ruleFsts: ruleFsts,
+    ruleFars: ruleFars,
+  );
+
+  final tts = sherpa_onnx.OfflineTts(config);
+  final audio = tts.generate(text: text, sid: sid, speed: speed);
+  tts.free();
+
+  sherpa_onnx.writeWave(
+    filename: outputWav,
+    samples: audio.samples,
+    sampleRate: audio.sampleRate,
+  );
+  print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/matcha-zh.dart b/dart-api-examples/tts/bin/matcha-zh.dart
new file mode 100644
index 0000000000..d52175e747
--- /dev/null
+++ b/dart-api-examples/tts/bin/matcha-zh.dart
@@ -0,0 +1,90 @@
+// Copyright (c)  2025  Xiaomi Corporation
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('acoustic-model', help: 'Path to the acoustic model')
+    ..addOption('vocoder', help: 'Path to the vocoder model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('lexicon', help: 'Path to lexicon.txt')
+    ..addOption(
+      'dict-dir',
+      help: 'Path to jieba dict directory',
+      defaultsTo: '',
+    )
+    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+    ..addOption('text', help: 'Text to generate TTS for')
+    ..addOption('output-wav', help: 'Filename to save the generated audio')
+    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+    ..addOption(
+      'sid',
+      help: 'Speaker ID to select. Used only for multi-speaker TTS',
+      defaultsTo: '0',
+    );
+  final res = parser.parse(arguments);
+  if (res['acoustic-model'] == null ||
+      res['vocoder'] == null ||
+      res['lexicon'] == null ||
+      res['tokens'] == null ||
+      res['dict-dir'] == null ||
+      res['output-wav'] == null ||
+      res['text'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+  final acousticModel = res['acoustic-model'] as String;
+  final vocoder = res['vocoder'] as String;
+  final lexicon = res['lexicon'] as String;
+  final tokens = res['tokens'] as String;
+  final dictDir = res['dict-dir'] as String;
+  final ruleFsts = res['rule-fsts'] as String;
+  final ruleFars = res['rule-fars'] as String;
+  final text = res['text'] as String;
+  final outputWav = res['output-wav'] as String;
+  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+  final sid = int.tryParse(res['sid'] as String) ?? 0;
+
+  if (speed == 0) {
+    speed = 1.0;
+  }
+
+  final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
+    acousticModel: acousticModel,
+    vocoder: vocoder,
+    lexicon: lexicon,
+    tokens: tokens,
+    dictDir: dictDir,
+    lengthScale: 1 / speed,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+    matcha: matcha,
+    numThreads: 1,
+    debug: true,
+  );
+  final config = sherpa_onnx.OfflineTtsConfig(
+    model: modelConfig,
+    maxNumSenetences: 1,
+    ruleFsts: ruleFsts,
+    ruleFars: ruleFars,
+  );
+
+  final tts = sherpa_onnx.OfflineTts(config);
+  final audio = tts.generate(text: text, sid: sid, speed: speed);
+  tts.free();
+
+  sherpa_onnx.writeWave(
+    filename: outputWav,
+    samples: audio.samples,
+    sampleRate: audio.sampleRate,
+  );
+  print('Saved to $outputWav');
+}
diff --git a/dart-api-examples/tts/bin/zh.dart b/dart-api-examples/tts/bin/vits-zh.dart
similarity index 100%
rename from dart-api-examples/tts/bin/zh.dart
rename to dart-api-examples/tts/bin/vits-zh.dart
diff --git a/dart-api-examples/tts/pubspec.yaml b/dart-api-examples/tts/pubspec.yaml
index 51e7a9d9ec..860ed94404 100644
--- a/dart-api-examples/tts/pubspec.yaml
+++ b/dart-api-examples/tts/pubspec.yaml
@@ -8,7 +8,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/tts/run-kokoro-en.sh b/dart-api-examples/tts/run-kokoro-en.sh
new file mode 100755
index 0000000000..78e21a2860
--- /dev/null
+++ b/dart-api-examples/tts/run-kokoro-en.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+dart run \
+  ./bin/kokoro-en.dart \
+  --model ./kokoro-en-v0_19/model.onnx \
+  --voices ./kokoro-en-v0_19/voices.bin \
+  --tokens ./kokoro-en-v0_19/tokens.txt \
+  --data-dir ./kokoro-en-v0_19/espeak-ng-data \
+  --sid 9 \
+  --speed 1.0 \
+  --output-wav kokoro-en-9.wav \
+  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-kokoro-zh-en.sh b/dart-api-examples/tts/run-kokoro-zh-en.sh
new file mode 100755
index 0000000000..42e4851d88
--- /dev/null
+++ b/dart-api-examples/tts/run-kokoro-zh-en.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+dart run \
+  ./bin/kokoro-zh-en.dart \
+  --model ./kokoro-multi-lang-v1_0/model.onnx \
+  --voices ./kokoro-multi-lang-v1_0/voices.bin \
+  --tokens ./kokoro-multi-lang-v1_0/tokens.txt \
+  --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
+  --dict-dir ./kokoro-multi-lang-v1_0/dict \
+  --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+  --sid 45 \
+  --speed 1.0 \
+  --output-wav kokoro-zh-en-45.wav \
+  --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-matcha-en.sh b/dart-api-examples/tts/run-matcha-en.sh
new file mode 100755
index 0000000000..f727ee5c82
--- /dev/null
+++ b/dart-api-examples/tts/run-matcha-en.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dart run \
+  ./bin/matcha-en.dart \
+  --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --vocoder ./hifigan_v2.onnx \
+  --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --sid 0 \
+  --speed 1.0 \
+  --output-wav matcha-en-1.wav \
+  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-matcha-zh.sh b/dart-api-examples/tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..be95a827aa
--- /dev/null
+++ b/dart-api-examples/tts/run-matcha-zh.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dart run \
+  ./bin/matcha-zh.dart \
+  --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --vocoder ./hifigan_v2.onnx \
+  --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens ./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir ./matcha-icefall-zh-baker/dict \
+  --rule-fsts ./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --sid 0 \
+  --speed 1.0 \
+  --output-wav matcha-zh-1.wav \
+  --text "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。" \
+
+dart run \
+  ./bin/matcha-zh.dart \
+  --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --vocoder ./hifigan_v2.onnx \
+  --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens ./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir ./matcha-icefall-zh-baker/dict \
+  --sid 0 \
+  --speed 1.0 \
+  --output-wav matcha-zh-2.wav \
+  --text "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔." \
+
+ls -lh *.wav
diff --git a/dart-api-examples/tts/run-zh.sh b/dart-api-examples/tts/run-vits-zh.sh
similarity index 92%
rename from dart-api-examples/tts/run-zh.sh
rename to dart-api-examples/tts/run-vits-zh.sh
index 057260b619..2298f9eb16 100755
--- a/dart-api-examples/tts/run-zh.sh
+++ b/dart-api-examples/tts/run-vits-zh.sh
@@ -16,7 +16,7 @@ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
 fi
 
 dart run \
-  ./bin/zh.dart \
+  ./bin/vits-zh.dart \
   --model ./sherpa-onnx-vits-zh-ll/model.onnx \
   --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
   --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
@@ -24,10 +24,10 @@ dart run \
   --sid 2 \
   --speed 1.0 \
   --text '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。' \
-  --output-wav zh-jieba-2.wav
+  --output-wav vits-zh-jieba-2.wav
 
 dart run \
-  ./bin/zh.dart \
+  ./bin/vits-zh.dart \
   --model ./sherpa-onnx-vits-zh-ll/model.onnx \
   --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
   --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
@@ -36,6 +36,6 @@ dart run \
   --sid 3 \
   --speed 1.0 \
   --text '今天是2024年6月15号，13点23分。如果有困难，请拨打110或者18920240511。123456块钱。' \
-  --output-wav zh-jieba-3.wav
+  --output-wav vits-zh-jieba-3.wav
 
 ls -lh *.wav
diff --git a/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart b/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart
new file mode 100644
index 0000000000..f9d96e694b
--- /dev/null
+++ b/dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart
@@ -0,0 +1,134 @@
+// Copyright (c)  2024  Xiaomi Corporation
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
+    ..addOption('preprocessor',
+        help: 'Path to the moonshine preprocessor model')
+    ..addOption('encoder', help: 'Path to the moonshine encoder model')
+    ..addOption('uncached-decoder',
+        help: 'Path to moonshine uncached decoder model')
+    ..addOption('cached-decoder',
+        help: 'Path to moonshine cached decoder model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+  final res = parser.parse(arguments);
+  if (res['silero-vad'] == null ||
+      res['preprocessor'] == null ||
+      res['encoder'] == null ||
+      res['uncached-decoder'] == null ||
+      res['cached-decoder'] == null ||
+      res['tokens'] == null ||
+      res['input-wav'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+
+  // create VAD
+  final sileroVad = res['silero-vad'] as String;
+
+  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
+    model: sileroVad,
+    minSilenceDuration: 0.25,
+    minSpeechDuration: 0.5,
+    maxSpeechDuration: 5.0,
+  );
+
+  final vadConfig = sherpa_onnx.VadModelConfig(
+    sileroVad: sileroVadConfig,
+    numThreads: 1,
+    debug: true,
+  );
+
+  final vad = sherpa_onnx.VoiceActivityDetector(
+      config: vadConfig, bufferSizeInSeconds: 10);
+
+  // create whisper recognizer
+  final preprocessor = res['preprocessor'] as String;
+  final encoder = res['encoder'] as String;
+  final uncachedDecoder = res['uncached-decoder'] as String;
+  final cachedDecoder = res['cached-decoder'] as String;
+  final tokens = res['tokens'] as String;
+  final inputWav = res['input-wav'] as String;
+
+  final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
+    preprocessor: preprocessor,
+    encoder: encoder,
+    uncachedDecoder: uncachedDecoder,
+    cachedDecoder: cachedDecoder,
+  );
+  final modelConfig = sherpa_onnx.OfflineModelConfig(
+    moonshine: moonshine,
+    tokens: tokens,
+    debug: false,
+    numThreads: 1,
+  );
+  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+  final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+  final waveData = sherpa_onnx.readWave(inputWav);
+  if (waveData.sampleRate != 16000) {
+    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
+    exit(1);
+  }
+
+  int numSamples = waveData.samples.length;
+  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
+
+  for (int i = 0; i != numIter; ++i) {
+    int start = i * vadConfig.sileroVad.windowSize;
+    vad.acceptWaveform(Float32List.sublistView(
+        waveData.samples, start, start + vadConfig.sileroVad.windowSize));
+
+    while (!vad.isEmpty()) {
+      final samples = vad.front().samples;
+      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+      final endTime =
+          startTime + samples.length.toDouble() / waveData.sampleRate;
+
+      final stream = recognizer.createStream();
+      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+      recognizer.decode(stream);
+
+      final result = recognizer.getResult(stream);
+      stream.free();
+      print(
+          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+
+      vad.pop();
+    }
+  }
+
+  vad.flush();
+
+  while (!vad.isEmpty()) {
+    final samples = vad.front().samples;
+    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
+
+    final stream = recognizer.createStream();
+    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+    recognizer.decode(stream);
+
+    final result = recognizer.getResult(stream);
+    stream.free();
+    print(
+        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+
+    vad.pop();
+  }
+
+  vad.free();
+
+  recognizer.free();
+}
diff --git a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
index 66d8ca1127..0de40da6f0 100644
--- a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
+++ b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh b/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh
new file mode 100755
index 0000000000..cd531fec5a
--- /dev/null
+++ b/dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -ex
+
+dart pub get
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+if [ ! -f ./Obama.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+fi
+
+if [[ ! -f ./silero_vad.onnx ]]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+dart run \
+  ./bin/moonshine.dart \
+  --silero-vad ./silero_vad.onnx \
+  --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  --input-wav ./Obama.wav
diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml
index 2535e60748..9063da114b 100644
--- a/dart-api-examples/vad/pubspec.yaml
+++ b/dart-api-examples/vad/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ">=3.0.0 <4.0.0"
 
 dependencies:
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   path: ^1.9.0
   args: ^2.5.0
 
diff --git a/dotnet-examples/Common/Common.csproj b/dotnet-examples/Common/Common.csproj
index a9630614f4..57c0ff743f 100644
--- a/dotnet-examples/Common/Common.csproj
+++ b/dotnet-examples/Common/Common.csproj
@@ -1,7 +1,7 @@
 ﻿<Project Sdk="Microsoft.NET.Sdk">
 
     <PropertyGroup>
-        <TargetFramework>net6.0</TargetFramework>
+        <TargetFramework>net8.0</TargetFramework>
         <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     </PropertyGroup>
     <ItemGroup>
diff --git a/dotnet-examples/Common/WaveHeader.cs b/dotnet-examples/Common/WaveHeader.cs
index 7d13b35537..0a6ca52845 100644
--- a/dotnet-examples/Common/WaveHeader.cs
+++ b/dotnet-examples/Common/WaveHeader.cs
@@ -4,171 +4,166 @@
 
 using System.Runtime.InteropServices;
 
-namespace SherpaOnnx
-{
+namespace SherpaOnnx;
 
-  [StructLayout(LayoutKind.Sequential)]
-  public struct WaveHeader
+[StructLayout(LayoutKind.Sequential)]
+public struct WaveHeader
+{
+  public int ChunkID;
+  public int ChunkSize;
+  public int Format;
+  public int SubChunk1ID;
+  public int SubChunk1Size;
+  public short AudioFormat;
+  public short NumChannels;
+  public int SampleRate;
+  public int ByteRate;
+  public short BlockAlign;
+  public short BitsPerSample;
+  public int SubChunk2ID;
+  public int SubChunk2Size;
+
+  public bool Validate()
   {
-    public Int32 ChunkID;
-    public Int32 ChunkSize;
-    public Int32 Format;
-    public Int32 SubChunk1ID;
-    public Int32 SubChunk1Size;
-    public Int16 AudioFormat;
-    public Int16 NumChannels;
-    public Int32 SampleRate;
-    public Int32 ByteRate;
-    public Int16 BlockAlign;
-    public Int16 BitsPerSample;
-    public Int32 SubChunk2ID;
-    public Int32 SubChunk2Size;
-
-    public bool Validate()
+    if (ChunkID != 0x46464952)
+    {
+      Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
+      return false;
+    }
+
+    //               E V A W
+    if (Format != 0x45564157)
+    {
+      Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
+      return false;
+    }
+
+    //                      t m f
+    if (SubChunk1ID != 0x20746d66)
+    {
+      Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
+      return false;
+    }
+
+    if (SubChunk1Size != 16)
+    {
+      Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
+      return false;
+    }
+
+    if (AudioFormat != 1)
+    {
+      Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
+      return false;
+    }
+
+    if (NumChannels != 1)
+    {
+      Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
+      return false;
+    }
+
+    if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
+    {
+      Console.WriteLine($"Invalid byte rate: {ByteRate}.");
+      return false;
+    }
+
+    if (BlockAlign != (NumChannels * BitsPerSample / 8))
     {
-      if (ChunkID != 0x46464952)
-      {
-        Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
-        return false;
-      }
-
-      //               E V A W
-      if (Format != 0x45564157)
-      {
-        Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
-        return false;
-      }
-
-      //                      t m f
-      if (SubChunk1ID != 0x20746d66)
-      {
-        Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
-        return false;
-      }
-
-      if (SubChunk1Size != 16)
-      {
-        Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
-        return false;
-      }
-
-      if (AudioFormat != 1)
-      {
-        Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
-        return false;
-      }
-
-      if (NumChannels != 1)
-      {
-        Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
-        return false;
-      }
-
-      if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
-      {
-        Console.WriteLine($"Invalid byte rate: {ByteRate}.");
-        return false;
-      }
-
-      if (BlockAlign != (NumChannels * BitsPerSample / 8))
-      {
-        Console.WriteLine($"Invalid block align: {ByteRate}.");
-        return false;
-      }
-
-      if (BitsPerSample != 16)
-      {  // we support only 16 bits per sample
-        Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
-        return false;
-      }
-
-      return true;
+      Console.WriteLine($"Invalid block align: {ByteRate}.");
+      return false;
     }
+
+    if (BitsPerSample != 16)
+    {  // we support only 16 bits per sample
+      Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
+      return false;
+    }
+
+    return true;
   }
+}
 
-  // It supports only 16-bit, single channel WAVE format.
-  // The sample rate can be any value.
-  public class WaveReader
+// It supports only 16-bit, single channel WAVE format.
+// The sample rate can be any value.
+public class WaveReader
+{
+  public WaveReader(string fileName)
   {
-    public WaveReader(String fileName)
+    if (!File.Exists(fileName))
     {
-      if (!File.Exists(fileName))
-      {
-        throw new ApplicationException($"{fileName} does not exist!");
-      }
-
-      using (var stream = File.Open(fileName, FileMode.Open))
-      {
-        using (var reader = new BinaryReader(stream))
-        {
-          _header = ReadHeader(reader);
-
-          if (!_header.Validate())
-          {
-            throw new ApplicationException($"Invalid wave file ${fileName}");
-          }
-
-          SkipMetaData(reader);
-
-          // now read samples
-          // _header.SubChunk2Size contains number of bytes in total.
-          // we assume each sample is of type int16
-          byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
-          short[] samples_int16 = new short[_header.SubChunk2Size / 2];
-          Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
-
-          _samples = new float[samples_int16.Length];
-
-          for (var i = 0; i < samples_int16.Length; ++i)
-          {
-            _samples[i] = samples_int16[i] / 32768.0F;
-          }
-        }
-      }
+      throw new ApplicationException($"{fileName} does not exist!");
     }
 
-    private static WaveHeader ReadHeader(BinaryReader reader)
-    {
-      byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
+    using var stream = File.Open(fileName, FileMode.Open);
+    using var reader = new BinaryReader(stream);
 
-      GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
-      WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
-      handle.Free();
+    _header = ReadHeader(reader);
 
-      return header;
+    if (!_header.Validate())
+    {
+      throw new ApplicationException($"Invalid wave file ${fileName}");
     }
 
-    private void SkipMetaData(BinaryReader reader)
+    SkipMetaData(reader);
+
+    // now read samples
+    // _header.SubChunk2Size contains number of bytes in total.
+    // we assume each sample is of type int16
+    var buffer = reader.ReadBytes(_header.SubChunk2Size);
+    var samples_int16 = new short[_header.SubChunk2Size / 2];
+    Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
+
+    _samples = new float[samples_int16.Length];
+
+    for (var i = 0; i < samples_int16.Length; ++i)
     {
-      var bs = reader.BaseStream;
-
-      Int32 subChunk2ID = _header.SubChunk2ID;
-      Int32 subChunk2Size = _header.SubChunk2Size;
-
-      while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
-      {
-        bs.Seek(subChunk2Size, SeekOrigin.Current);
-        subChunk2ID = reader.ReadInt32();
-        subChunk2Size = reader.ReadInt32();
-      }
-      _header.SubChunk2ID = subChunk2ID;
-      _header.SubChunk2Size = subChunk2Size;
+      _samples[i] = samples_int16[i] / 32768.0F;
     }
+  }
 
-    private WaveHeader _header;
+  private static WaveHeader ReadHeader(BinaryReader reader)
+  {
+    var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
+
+    GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
+    WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
+    handle.Free();
+
+    return header;
+  }
 
-    // Samples are normalized to the range [-1, 1]
-    private float[] _samples;
+  private void SkipMetaData(BinaryReader reader)
+  {
+    var bs = reader.BaseStream;
 
-    public int SampleRate => _header.SampleRate;
-    public float[] Samples => _samples;
+    var subChunk2ID = _header.SubChunk2ID;
+    var subChunk2Size = _header.SubChunk2Size;
 
-    public static void Test(String fileName)
+    while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
     {
-      WaveReader reader = new WaveReader(fileName);
-      Console.WriteLine($"samples length: {reader.Samples.Length}");
-      Console.WriteLine($"samples rate: {reader.SampleRate}");
+      bs.Seek(subChunk2Size, SeekOrigin.Current);
+      subChunk2ID = reader.ReadInt32();
+      subChunk2Size = reader.ReadInt32();
     }
+    _header.SubChunk2ID = subChunk2ID;
+    _header.SubChunk2Size = subChunk2Size;
   }
 
+  private WaveHeader _header;
+
+  // Samples are normalized to the range [-1, 1]
+  private float[] _samples;
+
+  public int SampleRate => _header.SampleRate;
+
+  public float[] Samples => _samples;
+
+  public static void Test(string fileName)
+  {
+    WaveReader reader = new WaveReader(fileName);
+    Console.WriteLine($"samples length: {reader.Samples.Length}");
+    Console.WriteLine($"samples rate: {reader.SampleRate}");
+  }
 }
diff --git a/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs b/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs
deleted file mode 100644
index 1eb1e3568c..0000000000
--- a/dotnet-examples/TTS/PlayAudioPartial/SherpaOnnxGeneratedAudioResultPlayAudio.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-﻿using NAudio.Wave;
-
-namespace TTS.Struct
-{
-    public sealed partial class SherpaOnnxGeneratedAudioResult
-    {
-        private WaveOutEvent waveOut;
-        private WaveFormat waveFormat;
-        private BufferedWaveProvider bufferedWaveProvider;
-
-        private int bufferLength = 1;
-
-        public TimeSpan? AudioDuration => bufferedWaveProvider?.BufferedDuration;
-
-        public float PlayProgress => (waveOut?.GetPosition() * 1.0f / bufferLength).Value;
-
-        public void Play()
-        {
-            waveOut ??= new WaveOutEvent();
-
-            waveFormat ??= new WaveFormat(sample_rate, AudioDataBit, Channels); // 32-bit 浮点，单声道
-
-            if (bufferedWaveProvider == null)
-            {
-                bufferedWaveProvider ??= new BufferedWaveProvider(waveFormat);
-
-                var buffer = AudioByteData;
-
-                bufferLength = buffer.Length;
-
-                bufferedWaveProvider.AddSamples(buffer, 0, bufferLength);
-                bufferedWaveProvider.BufferLength = bufferLength;
-                waveOut.Init(bufferedWaveProvider);
-            }
-            waveOut.Play();
-        }
-
-        public void Stop()
-        {
-            waveOut?.Stop();
-        }
-
-    }
-}
diff --git a/dotnet-examples/TTS/Program.cs b/dotnet-examples/TTS/Program.cs
deleted file mode 100644
index 07bb1325ff..0000000000
--- a/dotnet-examples/TTS/Program.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-﻿using System.Text;
-using TTS;
-using TTS.Struct;
-
-internal class Program
-{
-    private static void Main(string[] args)
-    {
-        SherpaOnnxOfflineTtsConfig sherpaOnnxOfflineTtsConfig = new SherpaOnnxOfflineTtsConfig();
-        sherpaOnnxOfflineTtsConfig.model = new SherpaOnnxOfflineTtsModelConfig
-        {
-            debug = 0,
-            num_threads = 4,
-            provider = "cpu",
-            vits = new SherpaOnnxOfflineTtsVitsModelConfig
-            {
-                //lexicon = "vits-zh-aishell3/lexicon.txt",
-                //model = "vits-zh-aishell3/vits-aishell3.onnx",
-                //tokens = "vits-zh-aishell3/tokens.txt",
-                model = @"C:\Services\Sherpa\model.onnx",
-                lexicon = "",
-                tokens = @"C:\Services\Sherpa\tokens.txt",
-                data_dir = @"C:\Services\Sherpa\espeak-ng-data",
-
-                noise_scale = 0.667f,
-                noise_scale_w = 0.8f,
-                length_scale = 1,
-            },
-
-        };
-
-        TTSCore i = new TTSCore(sherpaOnnxOfflineTtsConfig);
-
-        Console.InputEncoding = Encoding.Unicode;
-        Console.OutputEncoding = Encoding.UTF8;
-
-        while (true)
-        {
-            var str = Console.ReadLine();
-            var audioResult = i.ToSpeech(str, 40, 1f);
-
-            //  audioResult.WriteWAVFile("123.wav");保存本地
-
-            audioResult.Play();
-
-            int lastIndex = -1;
-            while (audioResult.PlayProgress <= 1f)
-            {
-                int index = (int)(audioResult.PlayProgress * (str.Length - 1));
-                if (lastIndex != index)
-                {
-                    Console.Write(str[index]);
-                    lastIndex = index;
-                }
-                Thread.Sleep(100);
-            }
-
-            if (++lastIndex < str.Length)
-                Console.Write(str[lastIndex]);
-
-            Console.WriteLine();
-
-        }
-
-    }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs b/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs
deleted file mode 100644
index affc3a0347..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxGeneratedAudio.cs
+++ /dev/null
@@ -1,198 +0,0 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Runtime.InteropServices;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace TTS.Struct
-{
-    /// <summary>
-    /// 生成语音结果
-    /// </summary> 
-    public sealed partial class SherpaOnnxGeneratedAudioResult : IDisposable
-    {
-        public const string Filename = "sherpa-onnx-c-api";
-
-        /// <summary>
-        /// 销毁非托管内存
-        /// </summary>
-        /// <param name="ttsGenerateIntptr"></param>
-        [DllImport(Filename)]
-        private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr ttsGenerateIntptr);
-
-        [DllImport(Filename)]
-        private static extern int SherpaOnnxWriteWave(IntPtr q, int n, int sample_rate, string filename);
-
-        /// <summary>
-        /// 音频数据比特
-        /// </summary>
-        public const int AudioDataBit = 16;
-        /// <summary>
-        /// 单通道
-        /// </summary>
-        public const int Channels = 1;
-
-        /// <summary>
-        /// 原生句柄
-        /// </summary>
-        internal IntPtr thisHandle;
-
-        internal readonly IntPtr audioData;
-        internal readonly int dataSize;
-
-        /// <summary>
-        /// 采样率
-        /// </summary>
-        public readonly int sample_rate;
-
-        /// <summary>
-        /// 音频数据指针
-        /// </summary>
-        public IntPtr AudioDataIntPtr => audioData;
-
-        /// <summary>
-        /// 数据的大小
-        /// </summary>
-        public unsafe int AudioDataLength
-        {
-            get
-            {
-                return dataSize;
-
-                //float* buffer = (float*)audioData;
-                //while (*buffer != 0)
-                //    ++buffer;
-                //return (int)(buffer - (float*)audioData);
-            }
-        }
-
-        /// <summary>
-        /// 获得音频数据 float[]
-        /// 这个内部创建一个数组
-        /// </summary>
-        public unsafe float[] AudioFloatData
-        {
-            get
-            {
-                int length = AudioDataLength;
-
-                float[] floatAudioData = new float[length];
-                Marshal.Copy(audioData, floatAudioData, 0, floatAudioData.Length);
-                return floatAudioData;
-            }
-        }
-
-
-        /// <summary>
-        /// 获得音频数据 byte[]
-        /// 这个内部创建一个数组
-        /// </summary>
-        public byte[] AudioByteData
-        {
-            get
-            {
-                byte[] bytes = new byte[AudioDataLength * 2];
-                ReadData(bytes, 0);
-                return bytes;
-            }
-        }
-
-        internal SherpaOnnxGeneratedAudioResult(IntPtr intPtr, SherpaOnnxGeneratedAudio sherpaOnnx)
-        {
-            this.thisHandle = intPtr;
-            this.audioData = sherpaOnnx.audioData;
-            this.dataSize = sherpaOnnx.dataSize;
-            this.sample_rate = sherpaOnnx.sample_rate;
-        }
-
-        ~SherpaOnnxGeneratedAudioResult()
-        {
-            Dispose();
-        }
-
-        /// <summary>
-        /// 读取数据
-        /// 没有垃圾产生，自己传递数组进来
-        /// </summary>
-        /// <param name="audioFloats">数组</param>
-        /// <param name="offset">数组那个位置写入</param>
-        /// <returns>写入了多少个</returns>
-        public int ReadData(float[] audioFloats, int offset)
-        {
-            int length = AudioDataLength;
-
-            int c = audioFloats.Length - offset;
-            length = c >= length ? length : c;
-
-            Marshal.Copy(audioData, audioFloats, offset, length);
-            return length;
-        }
-
-        /// <summary>
-        /// 读取数据
-        /// 这个内部转换成byte[] 音频数组
-        /// 没有垃圾产生，自己传递数组进来
-        /// </summary>
-        /// <param name="audioFloats">数组，这个长度需要是AudioDataLength*2大小</param>
-        /// <param name="offset">数组那个位置写入</param>
-        /// <returns>写入了多少个</returns>
-        public int ReadData(byte[] audioFloats, int offset)
-        {
-            //因为是16bit存储音频数据，所以float会转换成两个字节存储
-            var audiodata = AudioFloatData;
-
-            int length = audiodata.Length * 2;
-
-            int c = audioFloats.Length - offset;
-            c = c % 2 == 0 ? c : c - 1;
-
-            length = c >= length ? length : c;
-
-            int p = length / 2;
-
-            for (int i = 0; i < p; i++)
-            {
-                short value = (short)(audiodata[i] * short.MaxValue);
-
-                audioFloats[offset++] = (byte)value;
-                audioFloats[offset++] = (byte)(value >> 8);
-            }
-
-            return length;
-
-        }
-
-        /// <summary>
-        /// 写入WAV音频数据
-        /// </summary>
-        /// <param name="filename"></param>
-        /// <returns></returns>
-        public bool WriteWAVFile(string filename)
-        {
-            return 1 == SherpaOnnxWriteWave(audioData, this.dataSize, this.sample_rate, filename);
-        }
-
-        public void Dispose()
-        {
-            if (this.thisHandle != IntPtr.Zero)
-            {
-                SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.thisHandle);
-                GC.SuppressFinalize(this);
-                this.thisHandle = IntPtr.Zero;
-            }
-        }
-    }
-
-    [StructLayout(LayoutKind.Sequential)]
-    internal struct SherpaOnnxGeneratedAudio
-    {
-        internal readonly IntPtr audioData;
-        internal readonly int dataSize;
-
-        /// <summary>
-        /// 采样率
-        /// </summary>
-        public readonly int sample_rate;
-    }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs
deleted file mode 100644
index f33e37dcd7..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsConfig.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
-    [StructLayout(LayoutKind.Sequential)]
-    public struct SherpaOnnxOfflineTtsConfig
-    {
-        public SherpaOnnxOfflineTtsModelConfig model;
-
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string rule_fsts;
-
-        public int max_num_sentences;
-
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string rule_fars;
-    }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs
deleted file mode 100644
index 46dd558599..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsModelConfig.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-﻿using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
-    [StructLayout(LayoutKind.Sequential)]
-    public struct SherpaOnnxOfflineTtsModelConfig
-    {
-        /// <summary>
-        /// 模型配置
-        /// </summary>
-        public SherpaOnnxOfflineTtsVitsModelConfig vits;
-        /// <summary>
-        /// 线程数
-        /// </summary>
-        public int num_threads;
-        public int debug;
-        /// <summary>
-        /// 使用cpu
-        /// </summary>
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string provider;
-    }
-}
diff --git a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs b/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs
deleted file mode 100644
index 266df5ae74..0000000000
--- a/dotnet-examples/TTS/Struct/SherpaOnnxOfflineTtsVitsModelConfig.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-﻿using System.Runtime.InteropServices;
-
-namespace TTS.Struct
-{
-    [StructLayout(LayoutKind.Sequential)]
-    public struct SherpaOnnxOfflineTtsVitsModelConfig
-    {
-        /// <summary>
-        /// 模型
-        /// "vits-zh-aishell3/vits-aishell3.onnx"
-        /// </summary>
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string model;
-        /// <summary>
-        /// 词典文件
-        /// "vits-zh-aishell3/lexicon.txt"
-        /// </summary>
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string lexicon;
-
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string tokens;
-
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string data_dir;
-
-        /// <summary>
-        /// VITS模型的noise_scale (float，默认值= 0.667)
-        /// </summary>
-        public float noise_scale = 0.667f;
-        /// <summary>
-        /// VITS模型的noise_scale_w (float，默认值= 0.8)
-        /// </summary>
-        public float noise_scale_w = 0.8f;
-        /// <summary>
-        /// 演讲的速度。大→慢;小→更快。(float, default = 1)
-        /// </summary>
-        public float length_scale = 1f;
-
-        [MarshalAs(UnmanagedType.LPStr)]
-        public string dict_dir;
-
-        public SherpaOnnxOfflineTtsVitsModelConfig()
-        {
-            noise_scale = 0.667f;
-            noise_scale_w = 0.8f;
-            length_scale = 1f;
-
-            model = "vits-zh-aishell3/vits-aishell3.onnx";
-            lexicon = "vits-zh-aishell3/lexicon.txt";
-            tokens = "vits-zh-aishell3/tokens.txt";
-            data_dir = "";
-            dict_dir = "";
-        }
-    }
-}
diff --git a/dotnet-examples/TTS/TTS.csproj b/dotnet-examples/TTS/TTS.csproj
deleted file mode 100644
index cb1a419ea8..0000000000
--- a/dotnet-examples/TTS/TTS.csproj
+++ /dev/null
@@ -1,15 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-	<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-  </PropertyGroup>
-  <ItemGroup>
-    <PackageReference Include="NAudio" Version="2.2.1" />
-    <PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.10.23" />
-  </ItemGroup>
-  
-</Project>
diff --git a/dotnet-examples/TTS/TTSCore.cs b/dotnet-examples/TTS/TTSCore.cs
deleted file mode 100644
index a15cb19e69..0000000000
--- a/dotnet-examples/TTS/TTSCore.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-﻿using System.Runtime.InteropServices;
-using TTS.Struct;
-
-namespace TTS
-{
-    internal sealed class TTSCore : IDisposable
-    {
-        public const string Filename = "sherpa-onnx-c-api";
-
-        [DllImport(Filename, CallingConvention = CallingConvention.Cdecl)]
-        private static extern IntPtr SherpaOnnxCreateOfflineTts(SherpaOnnxOfflineTtsConfig handle);
-
-        [DllImport(Filename)]
-        private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr createOfflineTtsIntptr, IntPtr text, int sid, float speed);
-
-        [DllImport(Filename)]
-        private static extern void SherpaOnnxDestroyOfflineTts(IntPtr intPtr);
-
-        /// <summary>
-        /// 原生句柄
-        /// </summary>
-        private IntPtr thisHandle;
-
-        public TTSCore(SherpaOnnxOfflineTtsConfig modelConfig)
-        {
-          IntPtr ttsHandle = SherpaOnnxCreateOfflineTts(modelConfig);
-          if (ttsHandle == IntPtr.Zero)
-          {
-            throw new InvalidOperationException("Failed to create SherpaOnnx TTS engine.");
-          }
-          thisHandle = ttsHandle;
-        }
-
-        /// <summary>
-        /// 文字转语音
-        /// </summary>
-        /// <param name="text">文字</param>
-        /// <param name="sid">音色</param>
-        /// <param name="speed">速度</param>
-        /// <returns></returns>
-        public SherpaOnnxGeneratedAudioResult ToSpeech(string text, int sid, float speed = 1f)
-        {
-            var result = SherpaOnnxOfflineTtsGenerate(thisHandle, Marshal.StringToCoTaskMemUTF8(text), sid, speed);
-            SherpaOnnxGeneratedAudio impl = (SherpaOnnxGeneratedAudio)Marshal.PtrToStructure(result, typeof(SherpaOnnxGeneratedAudio));
-            return new SherpaOnnxGeneratedAudioResult(result, impl);
-        }
-
-        /// <summary>
-        /// 文字转语音
-        /// </summary>
-        /// <param name="text">文字</param>
-        /// <param name="sid">音色</param>
-        /// <param name="speed">速度</param>
-        /// <returns></returns>
-        public Task<SherpaOnnxGeneratedAudioResult> ToSpeechAsync(string text, int sid, float speed = 1f)
-        {
-            return Task.Run(() => ToSpeech(text, sid, speed));
-        }
-
-        ~TTSCore()
-        {
-            Dispose();
-        }
-
-        public void Dispose()
-        {
-            if (this.thisHandle != IntPtr.Zero)
-            {
-                SherpaOnnxDestroyOfflineTts(this.thisHandle);
-                GC.SuppressFinalize(this);
-                this.thisHandle = IntPtr.Zero;
-            }
-        }
-    }
-}
diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs
index 2fea260d1d..7ab0da2fa6 100644
--- a/dotnet-examples/keyword-spotting-from-files/Program.cs
+++ b/dotnet-examples/keyword-spotting-from-files/Program.cs
@@ -13,8 +13,6 @@
 // dotnet run
 
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class KeywordSpotterDemo
 {
@@ -38,11 +36,11 @@ static void Main(string[] args)
 
     var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
 
-    WaveReader waveReader = new WaveReader(filename);
+    var waveReader = new WaveReader(filename);
 
     Console.WriteLine("----------Use pre-defined keywords----------");
 
-    OnlineStream s = kws.CreateStream();
+    var s = kws.CreateStream();
     s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
 
     float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
@@ -53,8 +51,10 @@ static void Main(string[] args)
     {
       kws.Decode(s);
       var result = kws.GetResult(s);
-      if (result.Keyword != "")
+      if (result.Keyword != string.Empty)
       {
+        // Remember to call Reset() right after detecting a keyword
+        kws.Reset(s);
         Console.WriteLine("Detected: {0}", result.Keyword);
       }
     }
@@ -70,8 +70,10 @@ static void Main(string[] args)
     {
       kws.Decode(s);
       var result = kws.GetResult(s);
-      if (result.Keyword != "")
+      if (result.Keyword != string.Empty)
       {
+        // Remember to call Reset() right after detecting a keyword
+        kws.Reset(s);
         Console.WriteLine("Detected: {0}", result.Keyword);
       }
     }
@@ -89,8 +91,10 @@ static void Main(string[] args)
     {
       kws.Decode(s);
       var result = kws.GetResult(s);
-      if (result.Keyword != "")
+      if (result.Keyword != string.Empty)
       {
+        // Remember to call Reset() right after detecting a keyword
+        kws.Reset(s);
         Console.WriteLine("Detected: {0}", result.Keyword);
       }
     }
diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
index 992f8e0e34..21b9d3ea56 100644
--- a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
+++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>keyword_spotting_from_files</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/keyword-spotting-from-microphone/Program.cs b/dotnet-examples/keyword-spotting-from-microphone/Program.cs
index cb0c922f49..140e6a40e5 100644
--- a/dotnet-examples/keyword-spotting-from-microphone/Program.cs
+++ b/dotnet-examples/keyword-spotting-from-microphone/Program.cs
@@ -12,12 +12,9 @@
 //
 // dotnet run
 
+using PortAudioSharp;
 using SherpaOnnx;
-using System.Collections.Generic;
 using System.Runtime.InteropServices;
-using System;
-
-using PortAudioSharp;
 
 class KeywordSpotterDemo
 {
@@ -41,11 +38,11 @@ static void Main(string[] args)
 
     var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
 
-    WaveReader waveReader = new WaveReader(filename);
+    var waveReader = new WaveReader(filename);
 
     Console.WriteLine("----------Use pre-defined keywords----------");
 
-    OnlineStream s = kws.CreateStream();
+    var s = kws.CreateStream();
 
     Console.WriteLine(PortAudio.VersionInfo.versionText);
     PortAudio.Initialize();
@@ -54,7 +51,7 @@ static void Main(string[] args)
     for (int i = 0; i != PortAudio.DeviceCount; ++i)
     {
       Console.WriteLine($" Device {i}");
-      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
+      var deviceInfo = PortAudio.GetDeviceInfo(i);
       Console.WriteLine($"   Name: {deviceInfo.name}");
       Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
       Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
@@ -66,12 +63,12 @@ static void Main(string[] args)
       Environment.Exit(1);
     }
 
-    DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+    var info = PortAudio.GetDeviceInfo(deviceIndex);
 
     Console.WriteLine();
     Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
 
-    StreamParameters param = new StreamParameters();
+    var param = new StreamParameters();
     param.device = deviceIndex;
     param.channelCount = 1;
     param.sampleFormat = SampleFormat.Float32;
@@ -79,21 +76,21 @@ static void Main(string[] args)
     param.hostApiSpecificStreamInfo = IntPtr.Zero;
 
     PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
-        UInt32 frameCount,
+        uint frameCount,
         ref StreamCallbackTimeInfo timeInfo,
         StreamCallbackFlags statusFlags,
         IntPtr userData
         ) =>
     {
-      float[] samples = new float[frameCount];
-      Marshal.Copy(input, samples, 0, (Int32)frameCount);
+      var samples = new float[frameCount];
+      Marshal.Copy(input, samples, 0, (int)frameCount);
 
       s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
 
       return StreamCallbackResult.Continue;
     };
 
-    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
+    var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
         framesPerBuffer: 0,
         streamFlags: StreamFlags.ClipOff,
         callback: callback,
@@ -110,18 +107,19 @@ IntPtr userData
       while (kws.IsReady(s))
       {
         kws.Decode(s);
-      }
 
-      var result = kws.GetResult(s);
-      if (result.Keyword != "")
-      {
-        Console.WriteLine("Detected: {0}", result.Keyword);
+        var result = kws.GetResult(s);
+        if (result.Keyword != string.Empty)
+        {
+          // Remember to call Reset() right after detecting a keyword
+          kws.Reset(s);
+
+          Console.WriteLine("Detected: {0}", result.Keyword);
+        }
       }
 
       Thread.Sleep(200); // ms
     }
-
-    PortAudio.Terminate();
   }
 }
 
diff --git a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
index b3afae784d..12415b81bc 100644
--- a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
+++ b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>keyword_spotting_from_microphone</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/kokoro-tts-play/Program.cs b/dotnet-examples/kokoro-tts-play/Program.cs
new file mode 100644
index 0000000000..eea22cc2fd
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/Program.cs
@@ -0,0 +1,189 @@
+﻿// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file shows how to use a non-streaming Kokoro TTS model
+// for text-to-speech
+// Please refer to
+// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+// and
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+// to download pre-trained models
+using PortAudioSharp;
+using SherpaOnnx;
+using System.Collections.Concurrent;
+using System.Runtime.InteropServices;
+
+class OfflineTtsDemo
+{
+  static void Main(string[] args)
+  {
+    var config = new OfflineTtsConfig();
+    config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
+    config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
+    config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
+    config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";
+
+    config.Model.NumThreads = 2;
+    config.Model.Debug = 1;
+    config.Model.Provider = "cpu";
+
+    var tts = new OfflineTts(config);
+    var speed = 1.0f;
+    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
+      "does not have two-thirds of his day for himself, is a slave, whatever " +
+      "he may be: a statesman, a businessman, an official, or a scholar. " +
+      "Friends fell out often because life was changing so fast. The easiest " +
+      "thing in the world was to lose touch with someone.";
+
+    // mapping of sid to voice name
+    // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+    // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+    var sid = 0;
+
+
+    Console.WriteLine(PortAudio.VersionInfo.versionText);
+    PortAudio.Initialize();
+    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
+
+    for (int i = 0; i != PortAudio.DeviceCount; ++i)
+    {
+      Console.WriteLine($" Device {i}");
+      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
+      Console.WriteLine($"   Name: {deviceInfo.name}");
+      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
+      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
+    }
+    int deviceIndex = PortAudio.DefaultOutputDevice;
+    if (deviceIndex == PortAudio.NoDevice)
+    {
+      Console.WriteLine("No default output device found. Please use ../offline-tts instead");
+      Environment.Exit(1);
+    }
+
+    var info = PortAudio.GetDeviceInfo(deviceIndex);
+    Console.WriteLine();
+    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
+
+    var param = new StreamParameters();
+    param.device = deviceIndex;
+    param.channelCount = 1;
+    param.sampleFormat = SampleFormat.Float32;
+    param.suggestedLatency = info.defaultLowOutputLatency;
+    param.hostApiSpecificStreamInfo = IntPtr.Zero;
+
+    // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
+    var dataItems = new BlockingCollection<float[]>();
+
+    var MyCallback = (IntPtr samples, int n, float progress) =>
+    {
+      Console.WriteLine($"Progress {progress*100}%");
+
+      float[] data = new float[n];
+
+      Marshal.Copy(samples, data, 0, n);
+
+      dataItems.Add(data);
+
+      // 1 means to keep generating
+      // 0 means to stop generating
+      return 1;
+    };
+
+    var playFinished = false;
+
+    float[]? lastSampleArray = null;
+    int lastIndex = 0; // not played
+
+    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
+        UInt32 frameCount,
+        ref StreamCallbackTimeInfo timeInfo,
+        StreamCallbackFlags statusFlags,
+        IntPtr userData
+        ) =>
+    {
+      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
+      {
+        Console.WriteLine($"Finished playing");
+        playFinished = true;
+        return StreamCallbackResult.Complete;
+      }
+
+      int expected = Convert.ToInt32(frameCount);
+      int i = 0;
+
+      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
+      {
+        int needed = expected - i;
+
+        if (lastSampleArray != null)
+        {
+          int remaining = lastSampleArray.Length - lastIndex;
+          if (remaining >= needed)
+          {
+            float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
+            lastIndex += needed;
+            if (lastIndex == lastSampleArray.Length)
+            {
+              lastSampleArray = null;
+              lastIndex = 0;
+            }
+
+            Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
+            return StreamCallbackResult.Continue;
+          }
+
+          float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
+          lastIndex = 0;
+          lastSampleArray = null;
+
+          Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
+          i += remaining;
+          continue;
+        }
+
+        if (dataItems.Count != 0)
+        {
+          lastSampleArray = dataItems.Take();
+          lastIndex = 0;
+        }
+      }
+
+      if (i < expected)
+      {
+        int sizeInBytes = (expected - i) * 4;
+        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
+      }
+
+      return StreamCallbackResult.Continue;
+    };
+
+    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
+        framesPerBuffer: 0,
+        streamFlags: StreamFlags.ClipOff,
+        callback: playCallback,
+        userData: IntPtr.Zero
+        );
+
+    stream.Start();
+
+    var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+    var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+    var outputFilename = "./generated-kokoro-0.wav";
+    var ok = audio.SaveToWaveFile(outputFilename);
+
+    if (ok)
+    {
+      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+    }
+    else
+    {
+      Console.WriteLine($"Failed to write {outputFilename}");
+    }
+    dataItems.CompleteAdding();
+
+    while (!playFinished)
+    {
+      Thread.Sleep(100); // 100ms
+    }
+  }
+}
diff --git a/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj b/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
new file mode 100644
index 0000000000..6c725686cf
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>kokoro_tts_play</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="PortAudioSharp2" Version="*" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Common\Common.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh b/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
new file mode 100755
index 0000000000..08bdc693a6
--- /dev/null
+++ b/dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+dotnet run
diff --git a/dotnet-examples/kokoro-tts/Program.cs b/dotnet-examples/kokoro-tts/Program.cs
new file mode 100644
index 0000000000..b72f98fcbc
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/Program.cs
@@ -0,0 +1,127 @@
+﻿// Copyright (c)  2025  Xiaomi Corporation
+//
+// This file shows how to use a non-streaming Kokoro TTS model
+// for text-to-speech
+// Please refer to
+// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+// and
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+// to download pre-trained models
+using SherpaOnnx;
+using System.Runtime.InteropServices;
+
+class OfflineTtsDemo
+{
+  static void Main(string[] args)
+  {
+
+    TestZhEn();
+    TestEn();
+  }
+
+  static void TestZhEn()
+  {
+    var config = new OfflineTtsConfig();
+    config.Model.Kokoro.Model = "./kokoro-multi-lang-v1_0/model.onnx";
+    config.Model.Kokoro.Voices = "./kokoro-multi-lang-v1_0/voices.bin";
+    config.Model.Kokoro.Tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+    config.Model.Kokoro.DataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+    config.Model.Kokoro.DictDir = "./kokoro-multi-lang-v1_0/dict";
+    config.Model.Kokoro.Lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt";
+
+    config.Model.NumThreads = 2;
+    config.Model.Debug = 1;
+    config.Model.Provider = "cpu";
+
+    var tts = new OfflineTts(config);
+    var speed = 1.0f;
+    var text = "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？";
+
+    var sid = 50;
+
+    var MyCallback = (IntPtr samples, int n, float progress) =>
+    {
+      float[] data = new float[n];
+      Marshal.Copy(samples, data, 0, n);
+      // You can process samples here, e.g., play them.
+      // See ../kokoro-tts-playback for how to play them
+      Console.WriteLine($"Progress {progress*100}%");
+
+      // 1 means to keep generating
+      // 0 means to stop generating
+      return 1;
+    };
+
+    var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+    var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+
+    var outputFilename = "./generated-kokoro-zh-en.wav";
+    var ok = audio.SaveToWaveFile(outputFilename);
+
+    if (ok)
+    {
+      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+    }
+    else
+    {
+      Console.WriteLine($"Failed to write {outputFilename}");
+    }
+  }
+
+  static void TestEn()
+  {
+    var config = new OfflineTtsConfig();
+    config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
+    config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
+    config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
+    config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";
+
+    config.Model.NumThreads = 2;
+    config.Model.Debug = 1;
+    config.Model.Provider = "cpu";
+
+    var tts = new OfflineTts(config);
+    var speed = 1.0f;
+    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
+      "does not have two-thirds of his day for himself, is a slave, whatever " +
+      "he may be: a statesman, a businessman, an official, or a scholar. " +
+      "Friends fell out often because life was changing so fast. The easiest " +
+      "thing in the world was to lose touch with someone.";
+
+    // mapping of sid to voice name
+    // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
+    // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
+    var sid = 0;
+
+    var MyCallback = (IntPtr samples, int n, float progress) =>
+    {
+      float[] data = new float[n];
+      Marshal.Copy(samples, data, 0, n);
+      // You can process samples here, e.g., play them.
+      // See ../kokoro-tts-playback for how to play them
+      Console.WriteLine($"Progress {progress*100}%");
+
+      // 1 means to keep generating
+      // 0 means to stop generating
+      return 1;
+    };
+
+    var callback = new OfflineTtsCallbackProgress(MyCallback);
+
+    var audio = tts.GenerateWithCallbackProgress(text, speed, sid, callback);
+
+    var outputFilename = "./generated-kokoro-en.wav";
+    var ok = audio.SaveToWaveFile(outputFilename);
+
+    if (ok)
+    {
+      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
+    }
+    else
+    {
+      Console.WriteLine($"Failed to write {outputFilename}");
+    }
+  }
+}
+
diff --git a/dotnet-examples/kokoro-tts/kokoro-tts.csproj b/dotnet-examples/kokoro-tts/kokoro-tts.csproj
new file mode 100644
index 0000000000..132819c6fd
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/kokoro-tts.csproj
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>kokoro_tts</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Common\Common.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet-examples/kokoro-tts/run-kokoro.sh b/dotnet-examples/kokoro-tts/run-kokoro.sh
new file mode 100755
index 0000000000..117a2fc1bc
--- /dev/null
+++ b/dotnet-examples/kokoro-tts/run-kokoro.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -ex
+
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+dotnet run
diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs
index d971becd3e..0d944e5a39 100644
--- a/dotnet-examples/offline-decode-files/Program.cs
+++ b/dotnet-examples/offline-decode-files/Program.cs
@@ -5,64 +5,73 @@
 // Please refer to
 // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
 // to download non-streaming models
-using CommandLine.Text;
 using CommandLine;
+using CommandLine.Text;
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class OfflineDecodeFiles
 {
   class Options
   {
-
     [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
-    public int SampleRate { get; set; }  = 16000;
+    public int SampleRate { get; set; } = 16000;
 
     [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
     public int FeatureDim { get; set; } = 80;
 
     [Option(Required = false, HelpText = "Path to tokens.txt")]
-    public string Tokens { get; set; } = "";
+    public string Tokens { get; set; } = string.Empty;
 
     [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
-    public string Encoder { get; set; } = "";
+    public string Encoder { get; set; } = string.Empty;
 
     [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
-    public string Decoder { get; set; } = "";
+    public string Decoder { get; set; } = string.Empty;
 
-    [Option(Required = false,  Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
-    public string Joiner { get; set; } = "";
+    [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
+    public string Joiner { get; set; } = string.Empty;
 
     [Option("model-type", Required = false, Default = "", HelpText = "model type")]
-    public string ModelType { get; set; } = "";
+    public string ModelType { get; set; } = string.Empty;
 
     [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
-    public string WhisperEncoder { get; set; } = "";
+    public string WhisperEncoder { get; set; } = string.Empty;
 
     [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
-    public string WhisperDecoder { get; set; } = "";
+    public string WhisperDecoder { get; set; } = string.Empty;
 
     [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
-    public string WhisperLanguage{ get; set; } = "";
+    public string WhisperLanguage { get; set; } = string.Empty;
 
     [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
-    public string WhisperTask{ get; set; } = "transcribe";
+    public string WhisperTask { get; set; } = "transcribe";
+
+    [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
+    public string MoonshinePreprocessor { get; set; } = string.Empty;
+
+    [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
+    public string MoonshineEncoder { get; set; } = string.Empty;
+
+    [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
+    public string MoonshineUncachedDecoder { get; set; } = string.Empty;
+
+    [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
+    public string MoonshineCachedDecoder { get; set; } = string.Empty;
 
     [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
-    public string TdnnModel { get; set; } = "";
+    public string TdnnModel { get; set; } = string.Empty;
 
     [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
-    public string Paraformer { get; set; } = "";
+    public string Paraformer { get; set; } = string.Empty;
 
     [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
-    public string NeMoCtc { get; set; } = "";
+    public string NeMoCtc { get; set; } = string.Empty;
 
     [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
-    public string TeleSpeechCtc { get; set; } = "";
+    public string TeleSpeechCtc { get; set; } = string.Empty;
 
     [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
-    public string SenseVoiceModel { get; set; } = "";
+    public string SenseVoiceModel { get; set; } = string.Empty;
 
     [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
     public int SenseVoiceUseItn { get; set; } = 1;
@@ -76,7 +85,7 @@ class Options
 
     [Option("rule-fsts", Required = false, Default = "",
             HelpText = "If not empty, path to rule fst for inverse text normalization")]
-    public string RuleFsts { get; set; } = "";
+    public string RuleFsts { get; set; } = string.Empty;
 
     [Option("max-active-paths", Required = false, Default = 4,
         HelpText = @"Used only when --decoding--method is modified_beam_search.
@@ -84,13 +93,13 @@ class Options
     public int MaxActivePaths { get; set; } = 4;
 
     [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
-    public string HotwordsFile { get; set; } = "";
+    public string HotwordsFile { get; set; } = string.Empty;
 
     [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
     public float HotwordsScore { get; set; } = 1.5F;
 
     [Option("files", Required = true, HelpText = "Audio files for decoding")]
-    public IEnumerable<string> Files { get; set; } = new string[] {};
+    public IEnumerable<string> Files { get; set; } = new string[] { };
   }
 
   static void Main(string[] args)
@@ -105,7 +114,7 @@ static void Main(string[] args)
 
   private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
   {
-    string usage = @"
+    var usage = @"
 # Zipformer
 
 dotnet run \
@@ -201,41 +210,48 @@ private static void Run(Options options)
 
     config.ModelConfig.Tokens = options.Tokens;
 
-    if (!String.IsNullOrEmpty(options.Encoder))
+    if (!string.IsNullOrEmpty(options.Encoder))
     {
       // this is a transducer model
       config.ModelConfig.Transducer.Encoder = options.Encoder;
       config.ModelConfig.Transducer.Decoder = options.Decoder;
       config.ModelConfig.Transducer.Joiner = options.Joiner;
     }
-    else if (!String.IsNullOrEmpty(options.Paraformer))
+    else if (!string.IsNullOrEmpty(options.Paraformer))
     {
       config.ModelConfig.Paraformer.Model = options.Paraformer;
     }
-    else if (!String.IsNullOrEmpty(options.NeMoCtc))
+    else if (!string.IsNullOrEmpty(options.NeMoCtc))
     {
       config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
     }
-    else if (!String.IsNullOrEmpty(options.TeleSpeechCtc))
+    else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
     {
       config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
     }
-    else if (!String.IsNullOrEmpty(options.WhisperEncoder))
+    else if (!string.IsNullOrEmpty(options.WhisperEncoder))
     {
       config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
       config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
       config.ModelConfig.Whisper.Language = options.WhisperLanguage;
       config.ModelConfig.Whisper.Task = options.WhisperTask;
     }
-    else if (!String.IsNullOrEmpty(options.TdnnModel))
+    else if (!string.IsNullOrEmpty(options.TdnnModel))
     {
       config.ModelConfig.Tdnn.Model = options.TdnnModel;
     }
-    else if (!String.IsNullOrEmpty(options.SenseVoiceModel))
+    else if (!string.IsNullOrEmpty(options.SenseVoiceModel))
     {
       config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
       config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
     }
+    else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor))
+    {
+      config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
+      config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
+      config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
+      config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
+    }
     else
     {
       Console.WriteLine("Please provide a model");
@@ -251,17 +267,17 @@ private static void Run(Options options)
 
     config.ModelConfig.Debug = 0;
 
-    OfflineRecognizer recognizer = new OfflineRecognizer(config);
+    var recognizer = new OfflineRecognizer(config);
 
-    string[] files = options.Files.ToArray();
+    var files = options.Files.ToArray();
 
     // We create a separate stream for each file
-    List<OfflineStream> streams = new List<OfflineStream>();
+    var streams = new List<OfflineStream>();
     streams.EnsureCapacity(files.Length);
 
     for (int i = 0; i != files.Length; ++i)
     {
-      OfflineStream s = recognizer.CreateStream();
+      var s = recognizer.CreateStream();
 
       WaveReader waveReader = new WaveReader(files[i]);
       s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
@@ -273,10 +289,21 @@ private static void Run(Options options)
     // display results
     for (int i = 0; i != files.Length; ++i)
     {
-      var text = streams[i].Result.Text;
+      var r = streams[i].Result;
       Console.WriteLine("--------------------");
       Console.WriteLine(files[i]);
-      Console.WriteLine(text);
+      Console.WriteLine("Text: {0}", r.Text);
+      Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
+      if (r.Timestamps != null && r.Timestamps.Length > 0) {
+        Console.Write("Timestamps: [");
+        var sep = string.Empty;
+        for (int k = 0; k != r.Timestamps.Length; ++k)
+        {
+          Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
+          sep = ", ";
+        }
+        Console.WriteLine("]");
+      }
     }
     Console.WriteLine("--------------------");
   }
diff --git a/dotnet-examples/offline-decode-files/offline-decode-files.csproj b/dotnet-examples/offline-decode-files/offline-decode-files.csproj
index ffdfb6acee..5b28d48b72 100644
--- a/dotnet-examples/offline-decode-files/offline-decode-files.csproj
+++ b/dotnet-examples/offline-decode-files/offline-decode-files.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>offline_decode_files</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/offline-decode-files/run-moonshine.sh b/dotnet-examples/offline-decode-files/run-moonshine.sh
new file mode 100755
index 0000000000..025e0902db
--- /dev/null
+++ b/dotnet-examples/offline-decode-files/run-moonshine.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+dotnet run \
+  --num-threads=2 \
+  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  --files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
diff --git a/dotnet-examples/offline-punctuation/Program.cs b/dotnet-examples/offline-punctuation/Program.cs
index d299f8abcf..6f85237b64 100644
--- a/dotnet-examples/offline-punctuation/Program.cs
+++ b/dotnet-examples/offline-punctuation/Program.cs
@@ -12,8 +12,6 @@
 // dotnet run
 
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class OfflinePunctuationDemo
 {
@@ -25,14 +23,14 @@ static void Main(string[] args)
     config.Model.NumThreads = 1;
     var punct = new OfflinePunctuation(config);
 
-    string[] textList = new string[] {
+    var textList = new string[] {
         "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
         "我们都是木头人不会说话不会动",
         "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
     };
 
     Console.WriteLine("---------");
-    foreach (string text in textList)
+    foreach (var text in textList)
     {
       string textWithPunct = punct.AddPunct(text);
       Console.WriteLine("Input text: {0}", text);
diff --git a/dotnet-examples/offline-punctuation/offline-punctuation.csproj b/dotnet-examples/offline-punctuation/offline-punctuation.csproj
index 2d94fcb384..0e3ee42a9a 100644
--- a/dotnet-examples/offline-punctuation/offline-punctuation.csproj
+++ b/dotnet-examples/offline-punctuation/offline-punctuation.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>offline_punctuation</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/offline-speaker-diarization/Program.cs b/dotnet-examples/offline-speaker-diarization/Program.cs
new file mode 100644
index 0000000000..4d8d91b0e5
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/Program.cs
@@ -0,0 +1,82 @@
+﻿// Copyright (c)  2024  Xiaomi Corporation
+//
+
+// This file shows how to use sherpa-onnx C# API for speaker diarization
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+
+  dotnet run
+*/
+
+using SherpaOnnx;
+
+class OfflineSpeakerDiarizationDemo
+{
+  static void Main(string[] args)
+  {
+    var config = new OfflineSpeakerDiarizationConfig();
+    config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+    config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+
+    // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
+    // we set num_clusters to 4
+    //
+    config.Clustering.NumClusters = 4;
+    // If you don't know the number of speakers in the test wave file, please
+    // use
+    // config.Clustering.Threshold = 0.5; // You need to tune this threshold
+    var sd = new OfflineSpeakerDiarization(config);
+
+    var testWaveFile = "./0-four-speakers-zh.wav";
+    var waveReader = new WaveReader(testWaveFile);
+    if (sd.SampleRate != waveReader.SampleRate)
+    {
+      Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
+      return;
+    }
+
+    Console.WriteLine("Started");
+
+     // var segments = sd.Process(waveReader.Samples); // this one is also ok
+
+    var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
+    {
+      var progress = 100.0F * numProcessedChunks / numTotalChunks;
+      Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress));
+      return 0;
+    };
+
+    var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback);
+    var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);
+
+    foreach (var s in segments)
+    {
+      Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker);
+    }
+  }
+}
diff --git a/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
new file mode 100644
index 0000000000..c7b15faa53
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>offline_speaker_diarization</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\Common\Common.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet-examples/offline-speaker-diarization/run.sh b/dotnet-examples/offline-speaker-diarization/run.sh
new file mode 100755
index 0000000000..fe64412f96
--- /dev/null
+++ b/dotnet-examples/offline-speaker-diarization/run.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+dotnet run
diff --git a/dotnet-examples/offline-tts-play/Program.cs b/dotnet-examples/offline-tts-play/Program.cs
index a142c127eb..543a50cdd6 100644
--- a/dotnet-examples/offline-tts-play/Program.cs
+++ b/dotnet-examples/offline-tts-play/Program.cs
@@ -10,62 +10,67 @@
 // Note that you need a speaker to run this file since it will play
 // the generated audio as it is generating.
 
-using CommandLine.Text;
 using CommandLine;
+using CommandLine.Text;
 using PortAudioSharp;
 using SherpaOnnx;
 using System.Collections.Concurrent;
-using System.Collections.Generic;
 using System.Runtime.InteropServices;
-using System.Threading;
-using System;
 
 class OfflineTtsPlayDemo
 {
   class Options
   {
-
     [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
-    public string RuleFsts { get; set; }
+    public string RuleFsts { get; set; } = string.Empty;
 
-    [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
-    public string DictDir { get; set; }
+    [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
+    public string RuleFars { get; set; } = string.Empty;
 
-    [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
-    public string DataDir { get; set; }
+    [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+    public string DictDir { get; set; } = string.Empty;
 
-    [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
-    public float LengthScale { get; set; }
+    [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+    public string DataDir { get; set; } = string.Empty;
 
-    [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
-    public float NoiseScale { get; set; }
+    [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+    public float LengthScale { get; set; } = 1;
 
-    [Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")]
-    public float NoiseScaleW { get; set; }
+    [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
+    public float NoiseScale { get; set; } = 0.667F;
 
-    [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
-    public string Lexicon { get; set; }
+    [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
+    public float NoiseScaleW { get; set; } = 0.8F;
 
-    [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
-    public string Tokens { get; set; }
+    [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+    public string Lexicon { get; set; } = string.Empty;
+
+    [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
+    public string Tokens { get; set; } = string.Empty;
 
     [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
-    public int MaxNumSentences { get; set; }
+    public int MaxNumSentences { get; set; } = 1;
 
     [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
-    public int Debug { get; set; }
+    public int Debug { get; set; } = 0;
+
+    [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
+    public string Model { get; set; } = string.Empty;
+
+    [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+    public string AcousticModel { get; set; } = "";
 
-    [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
-    public string Model { get; set; }
+    [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+    public string Vocoder { get; set; } = "";
 
     [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
-    public int SpeakerId { get; set; }
+    public int SpeakerId { get; set; } = 0;
 
     [Option("text", Required = true, HelpText = "Text to synthesize")]
-    public string Text { get; set; }
+    public string Text { get; set; } = string.Empty;
 
     [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
-    public string OutputFilename { get; set; }
+    public string OutputFilename { get; set; } = "./generated.wav";
   }
 
   static void Main(string[] args)
@@ -81,6 +86,42 @@ static void Main(string[] args)
   private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
   {
     string usage = @"
+# matcha-icefall-zh-baker
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir=./matcha-icefall-zh-baker/dict \
+  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+
+# matcha-icefall-en_US-ljspeech
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
 # vits-aishell3
 
 wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
@@ -88,8 +129,8 @@ tar xf vits-zh-aishell3.tar.bz2
 
 dotnet run \
   --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
-  --vits-tokens=./vits-zh-aishell3/tokens.txt \
-  --vits-lexicon=./vits-zh-aishell3/lexicon.txt \
+  --tokens=./vits-zh-aishell3/tokens.txt \
+  --lexicon=./vits-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-zh-aishell3/rule.fst \
   --sid=66 \
   --debug=1 \
@@ -103,8 +144,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-  --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-  --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+  ---tokens=./vits-piper-en_US-amy-low/tokens.txt \
+  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -124,10 +165,10 @@ to download more models.
     Console.WriteLine(helpText);
   }
 
-
   private static void Run(Options options)
   {
-    OfflineTtsConfig config = new OfflineTtsConfig();
+    var config = new OfflineTtsConfig();
+
     config.Model.Vits.Model = options.Model;
     config.Model.Vits.Lexicon = options.Lexicon;
     config.Model.Vits.Tokens = options.Tokens;
@@ -136,16 +177,25 @@ private static void Run(Options options)
     config.Model.Vits.NoiseScale = options.NoiseScale;
     config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
     config.Model.Vits.LengthScale = options.LengthScale;
+
+    config.Model.Matcha.AcousticModel = options.AcousticModel;
+    config.Model.Matcha.Vocoder = options.Vocoder;
+    config.Model.Matcha.Lexicon = options.Lexicon;
+    config.Model.Matcha.Tokens = options.Tokens;
+    config.Model.Matcha.DataDir = options.DataDir;
+    config.Model.Matcha.DictDir = options.DictDir;
+    config.Model.Matcha.NoiseScale = options.NoiseScale;
+    config.Model.Matcha.LengthScale = options.LengthScale;
+
     config.Model.NumThreads = 1;
     config.Model.Debug = options.Debug;
     config.Model.Provider = "cpu";
     config.RuleFsts = options.RuleFsts;
     config.MaxNumSentences = options.MaxNumSentences;
 
-    OfflineTts tts = new OfflineTts(config);
-    float speed = 1.0f / options.LengthScale;
-    int sid = options.SpeakerId;
-
+    var tts = new OfflineTts(config);
+    var speed = 1.0f / options.LengthScale;
+    var sid = options.SpeakerId;
 
     Console.WriteLine(PortAudio.VersionInfo.versionText);
     PortAudio.Initialize();
@@ -166,11 +216,11 @@ private static void Run(Options options)
       Environment.Exit(1);
     }
 
-    DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+    var info = PortAudio.GetDeviceInfo(deviceIndex);
     Console.WriteLine();
     Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");
 
-    StreamParameters param = new StreamParameters();
+    var param = new StreamParameters();
     param.device = deviceIndex;
     param.channelCount = 1;
     param.sampleFormat = SampleFormat.Float32;
@@ -178,7 +228,7 @@ private static void Run(Options options)
     param.hostApiSpecificStreamInfo = IntPtr.Zero;
 
     // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
-    BlockingCollection<float[]> dataItems = new BlockingCollection<float[]>();
+    var dataItems = new BlockingCollection<float[]>();
 
     var MyCallback = (IntPtr samples, int n) =>
     {
@@ -193,9 +243,9 @@ private static void Run(Options options)
       return 1;
     };
 
-    bool playFinished = false;
+    var playFinished = false;
 
-    float[] lastSampleArray = null;
+    float[]? lastSampleArray = null;
     int lastIndex = 0; // not played
 
     PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
@@ -270,10 +320,10 @@ IntPtr userData
 
     stream.Start();
 
-    OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback);
+    var callback = new OfflineTtsCallback(MyCallback);
 
-    OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
-    bool ok = audio.SaveToWaveFile(options.OutputFilename);
+    var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
+    var ok = audio.SaveToWaveFile(options.OutputFilename);
 
     if (ok)
     {
diff --git a/dotnet-examples/offline-tts-play/offline-tts-play.csproj b/dotnet-examples/offline-tts-play/offline-tts-play.csproj
index d28ae62c87..b777bcafe0 100644
--- a/dotnet-examples/offline-tts-play/offline-tts-play.csproj
+++ b/dotnet-examples/offline-tts-play/offline-tts-play.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>offline_tts_play</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/offline-tts-play/run-hf-fanchen.sh b/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
index b16a3ca686..84e6685788 100755
--- a/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
+++ b/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
@@ -8,8 +8,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
-  --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
-  --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+  --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+  --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
   --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
   --vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
   --sid=100 \
diff --git a/dotnet-examples/offline-tts-play/run-matcha-en.sh b/dotnet-examples/offline-tts-play/run-matcha-en.sh
new file mode 100755
index 0000000000..0f7caa2154
--- /dev/null
+++ b/dotnet-examples/offline-tts-play/run-matcha-en.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -ex
+
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./matcha-en.wav \
+  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
diff --git a/dotnet-examples/offline-tts-play/run-matcha-zh.sh b/dotnet-examples/offline-tts-play/run-matcha-zh.sh
new file mode 100755
index 0000000000..e3b34268c7
--- /dev/null
+++ b/dotnet-examples/offline-tts-play/run-matcha-zh.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir=./matcha-icefall-zh-baker/dict \
+  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
diff --git a/dotnet-examples/offline-tts-play/run-piper.sh b/dotnet-examples/offline-tts-play/run-piper.sh
index 7c97498d21..1a4d108069 100755
--- a/dotnet-examples/offline-tts-play/run-piper.sh
+++ b/dotnet-examples/offline-tts-play/run-piper.sh
@@ -9,8 +9,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-  --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-  --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+  --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text="This is a text to speech application in dotnet with Next Generation Kaldi"
diff --git a/dotnet-examples/offline-tts/Program.cs b/dotnet-examples/offline-tts/Program.cs
index 6216095f4f..21f90c525d 100644
--- a/dotnet-examples/offline-tts/Program.cs
+++ b/dotnet-examples/offline-tts/Program.cs
@@ -6,43 +6,40 @@
 // and
 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
 // to download pre-trained models
-using CommandLine.Text;
 using CommandLine;
+using CommandLine.Text;
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class OfflineTtsDemo
 {
   class Options
   {
-
     [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
-    public string RuleFsts { get; set; } = "";
+    public string RuleFsts { get; set; } = string.Empty;
 
     [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
-    public string RuleFars { get; set; } = "";
+    public string RuleFars { get; set; } = string.Empty;
 
-    [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
-    public string DictDir { get; set; } = "";
+    [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+    public string DictDir { get; set; } = string.Empty;
 
-    [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
-    public string DataDir { get; set; } = "";
+    [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+    public string DataDir { get; set; } = string.Empty;
 
-    [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+    [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
     public float LengthScale { get; set; } = 1;
 
-    [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
+    [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
     public float NoiseScale { get; set; } = 0.667F;
 
     [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
     public float NoiseScaleW { get; set; } = 0.8F;
 
-    [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
-    public string Lexicon { get; set; } = "";
+    [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+    public string Lexicon { get; set; } = string.Empty;
 
-    [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
-    public string Tokens { get; set; } = "";
+    [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
+    public string Tokens { get; set; } = string.Empty;
 
     [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
     public int MaxNumSentences { get; set; } = 1;
@@ -50,14 +47,20 @@ class Options
     [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
     public int Debug { get; set; } = 0;
 
-    [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
-    public string Model { get; set; } = "";
+    [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
+    public string Model { get; set; } = string.Empty;
+
+    [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+    public string AcousticModel { get; set; } = "";
+
+    [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+    public string Vocoder { get; set; } = "";
 
     [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
     public int SpeakerId { get; set; } = 0;
 
     [Option("text", Required = true, HelpText = "Text to synthesize")]
-    public string Text { get; set; } = "";
+    public string Text { get; set; } = string.Empty;
 
     [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
     public string OutputFilename { get; set; } = "./generated.wav";
@@ -65,7 +68,7 @@ class Options
 
   static void Main(string[] args)
   {
-    var parser = new CommandLine.Parser(with => with.HelpWriter = null);
+    var parser = new Parser(with => with.HelpWriter = null);
     var parserResult = parser.ParseArguments<Options>(args);
 
     parserResult
@@ -75,7 +78,43 @@ static void Main(string[] args)
 
   private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
   {
-    string usage = @"
+    var usage = @"
+# matcha-icefall-zh-baker
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir=./matcha-icefall-zh-baker/dict \
+  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+
+# matcha-icefall-en_US-ljspeech
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
 # vits-aishell3
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
@@ -83,8 +122,8 @@ tar xvf vits-icefall-zh-aishell3.tar.bz2
 
 dotnet run \
   --vits-model=./vits-icefall-zh-aishell3/model.onnx \
-  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
-  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+  --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+  --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
   --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
   --sid=66 \
@@ -99,8 +138,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-  --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-  --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+  --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -122,7 +161,7 @@ to download more models.
 
   private static void Run(Options options)
   {
-    OfflineTtsConfig config = new OfflineTtsConfig();
+    var config = new OfflineTtsConfig();
     config.Model.Vits.Model = options.Model;
     config.Model.Vits.Lexicon = options.Lexicon;
     config.Model.Vits.Tokens = options.Tokens;
@@ -131,6 +170,16 @@ private static void Run(Options options)
     config.Model.Vits.NoiseScale = options.NoiseScale;
     config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
     config.Model.Vits.LengthScale = options.LengthScale;
+
+    config.Model.Matcha.AcousticModel = options.AcousticModel;
+    config.Model.Matcha.Vocoder = options.Vocoder;
+    config.Model.Matcha.Lexicon = options.Lexicon;
+    config.Model.Matcha.Tokens = options.Tokens;
+    config.Model.Matcha.DataDir = options.DataDir;
+    config.Model.Matcha.DictDir = options.DictDir;
+    config.Model.Matcha.NoiseScale = options.NoiseScale;
+    config.Model.Matcha.LengthScale = options.LengthScale;
+
     config.Model.NumThreads = 1;
     config.Model.Debug = options.Debug;
     config.Model.Provider = "cpu";
@@ -138,11 +187,11 @@ private static void Run(Options options)
     config.RuleFars = options.RuleFars;
     config.MaxNumSentences = options.MaxNumSentences;
 
-    OfflineTts tts = new OfflineTts(config);
-    float speed = 1.0f / options.LengthScale;
-    int sid = options.SpeakerId;
-    OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid);
-    bool ok = audio.SaveToWaveFile(options.OutputFilename);
+    var tts = new OfflineTts(config);
+    var speed = 1.0f / options.LengthScale;
+    var sid = options.SpeakerId;
+    var audio = tts.Generate(options.Text, speed, sid);
+    var ok = audio.SaveToWaveFile(options.OutputFilename);
 
     if (ok)
     {
diff --git a/dotnet-examples/offline-tts/offline-tts.csproj b/dotnet-examples/offline-tts/offline-tts.csproj
index 48548fc4c5..20b048f198 100644
--- a/dotnet-examples/offline-tts/offline-tts.csproj
+++ b/dotnet-examples/offline-tts/offline-tts.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>offline_tts</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/offline-tts/run-aishell3.sh b/dotnet-examples/offline-tts/run-aishell3.sh
index 02380f07c2..9a54df349d 100755
--- a/dotnet-examples/offline-tts/run-aishell3.sh
+++ b/dotnet-examples/offline-tts/run-aishell3.sh
@@ -8,8 +8,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-icefall-zh-aishell3/model.onnx \
-  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
-  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+  --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+  --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
   --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
   --sid=66 \
diff --git a/dotnet-examples/offline-tts/run-hf-fanchen.sh b/dotnet-examples/offline-tts/run-hf-fanchen.sh
index b16a3ca686..a7a52e7330 100755
--- a/dotnet-examples/offline-tts/run-hf-fanchen.sh
+++ b/dotnet-examples/offline-tts/run-hf-fanchen.sh
@@ -8,10 +8,10 @@ fi
 
 dotnet run \
   --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
-  --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
-  --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+  --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+  --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
   --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
-  --vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
+  --dict-dir=./vits-zh-hf-fanchen-C/dict \
   --sid=100 \
   --debug=1 \
   --output-filename=./fanchen-100.wav \
diff --git a/dotnet-examples/offline-tts/run-matcha-en.sh b/dotnet-examples/offline-tts/run-matcha-en.sh
new file mode 100755
index 0000000000..0f7caa2154
--- /dev/null
+++ b/dotnet-examples/offline-tts/run-matcha-en.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -ex
+
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./matcha-en.wav \
+  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
diff --git a/dotnet-examples/offline-tts/run-matcha-zh.sh b/dotnet-examples/offline-tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..e3b34268c7
--- /dev/null
+++ b/dotnet-examples/offline-tts/run-matcha-zh.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+
+dotnet run \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --dict-dir=./matcha-icefall-zh-baker/dict \
+  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --debug=1 \
+  --output-filename=./matcha-zh.wav \
+  --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
diff --git a/dotnet-examples/offline-tts/run-piper.sh b/dotnet-examples/offline-tts/run-piper.sh
index ff639c5707..273799bb3b 100755
--- a/dotnet-examples/offline-tts/run-piper.sh
+++ b/dotnet-examples/offline-tts/run-piper.sh
@@ -10,8 +10,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-  --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-  --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+  --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text="This is a text to speech application in dotnet with Next Generation Kaldi"
diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs
index ad53624deb..a1f01be57e 100644
--- a/dotnet-examples/online-decode-files/Program.cs
+++ b/dotnet-examples/online-decode-files/Program.cs
@@ -6,40 +6,37 @@
 // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
 // to download streaming models
 
-using CommandLine.Text;
 using CommandLine;
+using CommandLine.Text;
 using SherpaOnnx;
-using System.Collections.Generic;
-using System.Linq;
-using System;
 
 class OnlineDecodeFiles
 {
   class Options
   {
     [Option(Required = true, HelpText = "Path to tokens.txt")]
-    public string Tokens { get; set; } = "";
+    public string Tokens { get; set; } = string.Empty;
 
     [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
-    public string Provider { get; set; } = "";
+    public string Provider { get; set; } = string.Empty;
 
     [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
-    public string Encoder { get; set; } = "";
+    public string Encoder { get; set; } = string.Empty;
 
     [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
-    public string Decoder { get; set; } = "";
+    public string Decoder { get; set; } = string.Empty;
 
     [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
-    public string Joiner { get; set; } = "";
+    public string Joiner { get; set; } = string.Empty;
 
     [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
-    public string ParaformerEncoder { get; set; } = "";
+    public string ParaformerEncoder { get; set; } = string.Empty;
 
     [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
-    public string ParaformerDecoder { get; set; } = "";
+    public string ParaformerDecoder { get; set; } = string.Empty;
 
     [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
-    public string Zipformer2Ctc { get; set; } = "";
+    public string Zipformer2Ctc { get; set; } = string.Empty;
 
     [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
     public int NumThreads { get; set; } = 1;
@@ -80,15 +77,14 @@ larger than this value after something that is not blank has been decoded. Used
     public float Rule3MinUtteranceLength { get; set; } = 20.0F;
 
     [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
-    public string HotwordsFile { get; set; } = "";
+    public string HotwordsFile { get; set; } = string.Empty;
 
     [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
     public float HotwordsScore { get; set; } = 1.5F;
 
     [Option("rule-fsts", Required = false, Default = "",
             HelpText = "If not empty, path to rule fst for inverse text normalization")]
-    public string RuleFsts { get; set; } = "";
-
+    public string RuleFsts { get; set; } = string.Empty;
 
     [Option("files", Required = true, HelpText = "Audio files for decoding")]
     public IEnumerable<string> Files { get; set; } = new string[] {};
@@ -162,7 +158,7 @@ to download pre-trained streaming models.
 
   private static void Run(Options options)
   {
-    OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+    var config = new OnlineRecognizerConfig();
     config.FeatConfig.SampleRate = options.SampleRate;
 
     // All models from icefall using feature dim 80.
@@ -194,22 +190,22 @@ private static void Run(Options options)
     config.HotwordsScore = options.HotwordsScore;
     config.RuleFsts = options.RuleFsts;
 
-    OnlineRecognizer recognizer = new OnlineRecognizer(config);
+    var recognizer = new OnlineRecognizer(config);
 
-    string[] files = options.Files.ToArray();
+    var files = options.Files.ToArray();
 
     // We create a separate stream for each file
-    List<OnlineStream> streams = new List<OnlineStream>();
+    var streams = new List<OnlineStream>();
     streams.EnsureCapacity(files.Length);
 
     for (int i = 0; i != files.Length; ++i)
     {
-      OnlineStream s = recognizer.CreateStream();
+      var s = recognizer.CreateStream();
 
-      WaveReader waveReader = new WaveReader(files[i]);
+      var waveReader = new WaveReader(files[i]);
       s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
 
-      float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+      var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
       s.AcceptWaveform(waveReader.SampleRate, tailPadding);
       s.InputFinished();
 
@@ -230,7 +226,7 @@ private static void Run(Options options)
     // display results
     for (int i = 0; i != files.Length; ++i)
     {
-      OnlineRecognizerResult r = recognizer.GetResult(streams[i]);
+      var r = recognizer.GetResult(streams[i]);
       var text = r.Text;
       var tokens = r.Tokens;
       Console.WriteLine("--------------------");
@@ -238,7 +234,7 @@ private static void Run(Options options)
       Console.WriteLine("text: {0}", text);
       Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
       Console.Write("timestamps: [");
-      r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+      r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
       Console.WriteLine("]");
     }
     Console.WriteLine("--------------------");
diff --git a/dotnet-examples/online-decode-files/online-decode-files.csproj b/dotnet-examples/online-decode-files/online-decode-files.csproj
index 0ff581102e..f1cc3baa72 100644
--- a/dotnet-examples/online-decode-files/online-decode-files.csproj
+++ b/dotnet-examples/online-decode-files/online-decode-files.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>online_decode_files</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln
index 397fe99e5d..404c49762e 100644
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -29,7 +29,11 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
 EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\kokoro-tts.csproj", "{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -89,10 +93,18 @@ Global
 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
-		{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
+		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
+		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.Build.0 = Release|Any CPU
+		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/dotnet-examples/speaker-identification/Program.cs b/dotnet-examples/speaker-identification/Program.cs
index aef53e8519..20ac703905 100644
--- a/dotnet-examples/speaker-identification/Program.cs
+++ b/dotnet-examples/speaker-identification/Program.cs
@@ -16,20 +16,18 @@
 // dotnet run
 
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class SpeakerIdentificationDemo
 {
-  public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
+  public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename)
   {
-    WaveReader reader = new WaveReader(filename);
+    var reader = new WaveReader(filename);
 
-    OnlineStream stream = extractor.CreateStream();
+    var stream = extractor.CreateStream();
     stream.AcceptWaveform(reader.SampleRate, reader.Samples);
     stream.InputFinished();
 
-    float[] embedding = extractor.Compute(stream);
+    var embedding = extractor.Compute(stream);
 
     return embedding;
   }
@@ -43,25 +41,25 @@ static void Main(string[] args)
 
     var manager = new SpeakerEmbeddingManager(extractor.Dim);
 
-    string[] spk1Files =
+    var spk1Files =
         new string[] {
           "./sr-data/enroll/fangjun-sr-1.wav",
           "./sr-data/enroll/fangjun-sr-2.wav",
           "./sr-data/enroll/fangjun-sr-3.wav",
         };
-    float[][] spk1Vec = new float[spk1Files.Length][];
+    var spk1Vec = new float[spk1Files.Length][];
 
     for (int i = 0; i < spk1Files.Length; ++i)
     {
       spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
     }
 
-    string[] spk2Files =
+    var spk2Files =
         new string[] {
           "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
         };
 
-    float[][] spk2Vec = new float[spk2Files.Length][];
+    var spk2Vec = new float[spk2Files.Length][];
 
     for (int i = 0; i < spk2Files.Length; ++i)
     {
@@ -100,14 +98,14 @@ static void Main(string[] args)
 
     Console.WriteLine("---All speakers---");
 
-    string[] allSpeakers = manager.GetAllSpeakers();
+    var allSpeakers = manager.GetAllSpeakers();
     foreach (var s in allSpeakers)
     {
       Console.WriteLine(s);
     }
     Console.WriteLine("------------");
 
-    string[] testFiles =
+    var testFiles =
         new string[] {
           "./sr-data/test/fangjun-test-sr-1.wav",
           "./sr-data/test/leijun-test-sr-1.wav",
@@ -117,9 +115,9 @@ static void Main(string[] args)
     float threshold = 0.6f;
     foreach (var file in testFiles)
     {
-      float[] embedding = ComputeEmbedding(extractor, file);
+      var embedding = ComputeEmbedding(extractor, file);
 
-      String name = manager.Search(embedding, threshold);
+      var name = manager.Search(embedding, threshold);
       if (name == "")
       {
         name = "<Unknown>";
diff --git a/dotnet-examples/speaker-identification/speaker-identification.csproj b/dotnet-examples/speaker-identification/speaker-identification.csproj
index 7c857fa54c..45a42f49e6 100644
--- a/dotnet-examples/speaker-identification/speaker-identification.csproj
+++ b/dotnet-examples/speaker-identification/speaker-identification.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>speaker_identification</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/speech-recognition-from-microphone/Program.cs b/dotnet-examples/speech-recognition-from-microphone/Program.cs
index 586e3b1624..aa0e7803f4 100644
--- a/dotnet-examples/speech-recognition-from-microphone/Program.cs
+++ b/dotnet-examples/speech-recognition-from-microphone/Program.cs
@@ -6,47 +6,43 @@
 // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
 // to download streaming models
 
-using CommandLine.Text;
 using CommandLine;
+using CommandLine.Text;
 using PortAudioSharp;
-using System.Threading;
 using SherpaOnnx;
-using System.Collections.Generic;
 using System.Runtime.InteropServices;
-using System;
-
 
 class SpeechRecognitionFromMicrophone
 {
   class Options
   {
     [Option(Required = true, HelpText = "Path to tokens.txt")]
-    public string Tokens { get; set; }
+    public string? Tokens { get; set; }
 
     [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
-    public string Provider { get; set; }
+    public string? Provider { get; set; }
 
     [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
-    public string Encoder { get; set; }
+    public string? Encoder { get; set; }
 
     [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
-    public string Decoder { get; set; }
+    public string? Decoder { get; set; }
 
     [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
-    public string Joiner { get; set; }
+    public string? Joiner { get; set; }
 
     [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
-    public string ParaformerEncoder { get; set; }
+    public string? ParaformerEncoder { get; set; }
 
     [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
-    public string ParaformerDecoder { get; set; }
+    public string? ParaformerDecoder { get; set; }
 
     [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
     public int NumThreads { get; set; }
 
     [Option("decoding-method", Required = false, Default = "greedy_search",
             HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
-    public string DecodingMethod { get; set; }
+    public string? DecodingMethod { get; set; }
 
     [Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
     public bool Debug { get; set; }
@@ -126,7 +122,7 @@ to download pre-trained streaming models.
 
   private static void Run(Options options)
   {
-    OnlineRecognizerConfig config = new OnlineRecognizerConfig();
+    var config = new OnlineRecognizerConfig();
     config.FeatConfig.SampleRate = options.SampleRate;
 
     // All models from icefall using feature dim 80.
@@ -153,9 +149,9 @@ private static void Run(Options options)
     config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
     config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
 
-    OnlineRecognizer recognizer = new OnlineRecognizer(config);
+    var recognizer = new OnlineRecognizer(config);
 
-    OnlineStream s = recognizer.CreateStream();
+    var s = recognizer.CreateStream();
 
     Console.WriteLine(PortAudio.VersionInfo.versionText);
     PortAudio.Initialize();
@@ -176,12 +172,12 @@ private static void Run(Options options)
       Environment.Exit(1);
     }
 
-    DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+    var info = PortAudio.GetDeviceInfo(deviceIndex);
 
     Console.WriteLine();
     Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
 
-    StreamParameters param = new StreamParameters();
+    var param = new StreamParameters();
     param.device = deviceIndex;
     param.channelCount = 1;
     param.sampleFormat = SampleFormat.Float32;
@@ -189,14 +185,14 @@ private static void Run(Options options)
     param.hostApiSpecificStreamInfo = IntPtr.Zero;
 
     PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
-        UInt32 frameCount,
+        uint frameCount,
         ref StreamCallbackTimeInfo timeInfo,
         StreamCallbackFlags statusFlags,
         IntPtr userData
         ) =>
     {
-      float[] samples = new float[frameCount];
-      Marshal.Copy(input, samples, 0, (Int32)frameCount);
+      var samples = new float[frameCount];
+      Marshal.Copy(input, samples, 0, (int)frameCount);
 
       s.AcceptWaveform(options.SampleRate, samples);
 
@@ -215,7 +211,7 @@ IntPtr userData
 
     stream.Start();
 
-    String lastText = "";
+    var lastText = string.Empty;
     int segmentIndex = 0;
 
     while (true)
@@ -245,9 +241,5 @@ IntPtr userData
 
       Thread.Sleep(200); // ms
     }
-
-    PortAudio.Terminate();
-
-
   }
 }
diff --git a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
index 901c8a1586..72b7b6c914 100644
--- a/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
+++ b/dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>speech_recognition_from_microphone</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/spoken-language-identification/Program.cs b/dotnet-examples/spoken-language-identification/Program.cs
index 05a785d7cf..d2f210e85f 100644
--- a/dotnet-examples/spoken-language-identification/Program.cs
+++ b/dotnet-examples/spoken-language-identification/Program.cs
@@ -15,12 +15,9 @@
 // dotnet run
 
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class SpokenLanguageIdentificationDemo
 {
-
   static void Main(string[] args)
   {
     var config = new SpokenLanguageIdentificationConfig();
@@ -30,7 +27,7 @@ static void Main(string[] args)
     var slid = new SpokenLanguageIdentification(config);
     var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
 
-    WaveReader waveReader = new WaveReader(filename);
+    var waveReader = new WaveReader(filename);
 
     var s = slid.CreateStream();
     s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
diff --git a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
index b8b431a484..e424b2d570 100644
--- a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
+++ b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>spoken_language_identification</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/streaming-hlg-decoding/Program.cs b/dotnet-examples/streaming-hlg-decoding/Program.cs
index 6ac7c8c945..e522b81649 100644
--- a/dotnet-examples/streaming-hlg-decoding/Program.cs
+++ b/dotnet-examples/streaming-hlg-decoding/Program.cs
@@ -13,12 +13,9 @@
 // dotnet run
 
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class StreamingHlgDecodingDemo
 {
-
   static void Main(string[] args)
   {
     var config = new OnlineRecognizerConfig();
@@ -32,15 +29,15 @@ static void Main(string[] args)
     config.ModelConfig.Debug = 0;
     config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
 
-    OnlineRecognizer recognizer = new OnlineRecognizer(config);
+    var recognizer = new OnlineRecognizer(config);
 
     var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
 
-    WaveReader waveReader = new WaveReader(filename);
-    OnlineStream s = recognizer.CreateStream();
+    var waveReader = new WaveReader(filename);
+    var s = recognizer.CreateStream();
     s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
 
-    float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+    var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
     s.AcceptWaveform(waveReader.SampleRate, tailPadding);
     s.InputFinished();
 
@@ -49,7 +46,7 @@ static void Main(string[] args)
       recognizer.Decode(s);
     }
 
-    OnlineRecognizerResult r = recognizer.GetResult(s);
+    var r = recognizer.GetResult(s);
     var text = r.Text;
     var tokens = r.Tokens;
     Console.WriteLine("--------------------");
@@ -57,10 +54,8 @@ static void Main(string[] args)
     Console.WriteLine("text: {0}", text);
     Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
     Console.Write("timestamps: [");
-    r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+    r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
     Console.WriteLine("]");
     Console.WriteLine("--------------------");
   }
 }
-
-
diff --git a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
index 66e0401f13..6ed8fc6991 100644
--- a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
+++ b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>streaming_hlg_decoding</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
index abc080b882..e8dfbe6fa1 100644
--- a/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
@@ -3,8 +3,6 @@
 // This file shows how to use a silero_vad model with a non-streaming Paraformer
 // for speech recognition.
 using SherpaOnnx;
-using System.Collections.Generic;
-using System;
 
 class VadNonStreamingAsrParaformer
 {
@@ -12,45 +10,49 @@ static void Main(string[] args)
   {
     // please download model files from
     // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
-    OfflineRecognizerConfig config = new OfflineRecognizerConfig();
+    var config = new OfflineRecognizerConfig();
     config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
     config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
     config.ModelConfig.Debug = 0;
-    OfflineRecognizer recognizer = new OfflineRecognizer(config);
+    var recognizer = new OfflineRecognizer(config);
 
-    VadModelConfig vadModelConfig = new VadModelConfig();
+    var vadModelConfig = new VadModelConfig();
     vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
     vadModelConfig.Debug = 0;
 
-    VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
+    var vad = new VoiceActivityDetector(vadModelConfig, 60);
 
-    string testWaveFilename = "./lei-jun-test.wav";
-    WaveReader reader = new WaveReader(testWaveFilename);
+    var testWaveFilename = "./lei-jun-test.wav";
+    var reader = new WaveReader(testWaveFilename);
 
     int numSamples = reader.Samples.Length;
     int windowSize = vadModelConfig.SileroVad.WindowSize;
     int sampleRate = vadModelConfig.SampleRate;
     int numIter = numSamples / windowSize;
 
-    for (int i = 0; i != numIter; ++i) {
+    for (int i = 0; i != numIter; ++i)
+    {
       int start = i * windowSize;
-      float[] samples = new float[windowSize];
+      var samples = new float[windowSize];
       Array.Copy(reader.Samples, start, samples, 0, windowSize);
       vad.AcceptWaveform(samples);
-      if (vad.IsSpeechDetected()) {
-        while (!vad.IsEmpty()) {
+      if (vad.IsSpeechDetected())
+      {
+        while (!vad.IsEmpty())
+        {
           SpeechSegment segment = vad.Front();
-          float startTime = segment.Start / (float)sampleRate;
-          float duration = segment.Samples.Length / (float)sampleRate;
+          var startTime = segment.Start / (float)sampleRate;
+          var duration = segment.Samples.Length / (float)sampleRate;
 
           OfflineStream stream = recognizer.CreateStream();
           stream.AcceptWaveform(sampleRate, segment.Samples);
           recognizer.Decode(stream);
-          String text = stream.Result.Text;
+          var text = stream.Result.Text;
 
-          if (!String.IsNullOrEmpty(text)) {
-            Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
-                String.Format("{0:0.00}", startTime+duration), text);
+          if (!string.IsNullOrEmpty(text))
+          {
+            Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
+                string.Format("{0:0.00}", startTime + duration), text);
           }
 
           vad.Pop();
@@ -60,19 +62,21 @@ static void Main(string[] args)
 
     vad.Flush();
 
-    while (!vad.IsEmpty()) {
-      SpeechSegment segment = vad.Front();
+    while (!vad.IsEmpty())
+    {
+      var segment = vad.Front();
       float startTime = segment.Start / (float)sampleRate;
       float duration = segment.Samples.Length / (float)sampleRate;
 
-      OfflineStream stream = recognizer.CreateStream();
+      var stream = recognizer.CreateStream();
       stream.AcceptWaveform(sampleRate, segment.Samples);
       recognizer.Decode(stream);
-      String text = stream.Result.Text;
+      var text = stream.Result.Text;
 
-      if (!String.IsNullOrEmpty(text)) {
-        Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
-            String.Format("{0:0.00}", startTime+duration), text);
+      if (!string.IsNullOrEmpty(text))
+      {
+        Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
+            string.Format("{0:0.00}", startTime + duration), text);
       }
 
       vad.Pop();
diff --git a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
index a5c5f1022d..1736869a87 100644
--- a/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
+++ b/dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
diff --git a/ffmpeg-examples/sherpa-onnx-ffmpeg.c b/ffmpeg-examples/sherpa-onnx-ffmpeg.c
index f99ac0bdc8..82cff11738 100644
--- a/ffmpeg-examples/sherpa-onnx-ffmpeg.c
+++ b/ffmpeg-examples/sherpa-onnx-ffmpeg.c
@@ -214,8 +214,8 @@ static int init_filters(const char *filters_descr) {
 }
 
 static void sherpa_decode_frame(const AVFrame *frame,
-                                SherpaOnnxOnlineRecognizer *recognizer,
-                                SherpaOnnxOnlineStream *stream,
+                                const SherpaOnnxOnlineRecognizer *recognizer,
+                                const SherpaOnnxOnlineStream *stream,
                                 const SherpaOnnxDisplay *display,
                                 int32_t *segment_id) {
 #define N 3200  // 100s. Sample rate is fixed to 16 kHz
@@ -290,7 +290,7 @@ int main(int argc, char **argv) {
   }
 
   SherpaOnnxOnlineRecognizerConfig config;
-	memset(&config, 0, sizeof(config));
+  memset(&config, 0, sizeof(config));
   config.model_config.tokens = argv[1];
   config.model_config.transducer.encoder = argv[2];
   config.model_config.transducer.decoder = argv[3];
@@ -318,9 +318,10 @@ int main(int argc, char **argv) {
   config.rule2_min_trailing_silence = 1.2;
   config.rule3_min_utterance_length = 300;
 
-  SherpaOnnxOnlineRecognizer *recognizer =
+  const SherpaOnnxOnlineRecognizer *recognizer =
       SherpaOnnxCreateOnlineRecognizer(&config);
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
   const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
   int32_t segment_id = 0;
 
diff --git a/flutter-examples/README.md b/flutter-examples/README.md
index b7ed667156..3bb6b52c5a 100644
--- a/flutter-examples/README.md
+++ b/flutter-examples/README.md
@@ -136,14 +136,16 @@ flutter create --platforms ios ./
 Connect your iPhone to the computer, and run `flutter devices`, which will print:
 
 ```bash
-Found 3 connected devices:
-  iPhone (mobile) • 00008030-001064212E85802E • ios            • iOS 16.3 20D47
-  macOS (desktop) • macos                     • darwin-x64     • macOS 13.1 22C65 darwin-x64
-  Chrome (web)    • chrome                    • web-javascript • Google Chrome 126.0.6478.127
+Found 4 connected devices:
+  iPhone 14 (mobile) • 634110C4-168D-408F-A938-D7FC62222579 • ios            • com.apple.CoreSimulator.SimRuntime.iOS-16-2 (simulator)
+  iPhone (mobile)    • 00008030-001064212E85802E            • ios            • iOS 16.3 20D47
+  macOS (desktop)    • macos                                • darwin-x64     • macOS 13.1 22C65 darwin-x64
+  Chrome (web)       • chrome                               • web-javascript • Google Chrome 126.0.6478.127
 
 No wireless devices were found.
 
 Run "flutter emulators" to list and start any available device emulators.
+(E.g., flutter emulators --launch ios)
 
 If you expected another device to be detected, please run "flutter doctor" to diagnose potential issues. You may also try increasing the time to wait
 for connected devices with the "--device-timeout" flag. Visit https://flutter.dev/setup/ for troubleshooting tips.
diff --git a/flutter-examples/streaming_asr/pubspec.yaml b/flutter-examples/streaming_asr/pubspec.yaml
index a09ecdeed4..0fc3f36631 100644
--- a/flutter-examples/streaming_asr/pubspec.yaml
+++ b/flutter-examples/streaming_asr/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
 
 publish_to: 'none'
 
-version: 1.10.27
+version: 1.10.42
 
 topics:
   - speech-recognition
@@ -31,7 +31,7 @@ dependencies:
   record: ^5.1.0
   url_launcher: ^6.2.6
 
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   # sherpa_onnx:
   #   path: ../../flutter/sherpa_onnx
 
diff --git a/flutter-examples/tts/lib/isolate_tts.dart b/flutter-examples/tts/lib/isolate_tts.dart
new file mode 100644
index 0000000000..950503c3c2
--- /dev/null
+++ b/flutter-examples/tts/lib/isolate_tts.dart
@@ -0,0 +1,246 @@
+import 'dart:io';
+import 'dart:isolate';
+
+import 'package:flutter/material.dart';
+import 'package:flutter/services.dart';
+import 'package:media_kit/media_kit.dart';
+import 'package:path/path.dart' as p;
+import 'package:path_provider/path_provider.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import 'utils.dart';
+
+class _IsolateTask<T> {
+  final SendPort sendPort;
+
+  RootIsolateToken? rootIsolateToken;
+
+  _IsolateTask(this.sendPort, this.rootIsolateToken);
+}
+
+class _PortModel {
+  final String method;
+
+  final SendPort? sendPort;
+  dynamic data;
+
+  _PortModel({
+    required this.method,
+    this.sendPort,
+    this.data,
+  });
+}
+
+class _TtsManager {
+  /// 主进程通信端口
+  final ReceivePort receivePort;
+
+  final Isolate isolate;
+
+  final SendPort isolatePort;
+
+  _TtsManager({
+    required this.receivePort,
+    required this.isolate,
+    required this.isolatePort,
+  });
+}
+
+class IsolateTts {
+  static late final _TtsManager _ttsManager;
+
+  /// 获取线程里的通信端口
+  static SendPort get _sendPort => _ttsManager.isolatePort;
+
+  static late sherpa_onnx.OfflineTts _tts;
+
+  static late Player _player;
+
+  static Future<void> init() async {
+    ReceivePort port = ReceivePort();
+    RootIsolateToken? rootIsolateToken = RootIsolateToken.instance;
+
+    Isolate isolate = await Isolate.spawn(
+      _isolateEntry,
+      _IsolateTask(port.sendPort, rootIsolateToken),
+      errorsAreFatal: false,
+    );
+    port.listen((msg) async {
+      if (msg is SendPort) {
+        print(11);
+        _ttsManager = _TtsManager(receivePort: port, isolate: isolate, isolatePort: msg);
+        return;
+      }
+    });
+  }
+
+  static Future<void> _isolateEntry(_IsolateTask task) async {
+    if (task.rootIsolateToken != null) {
+      BackgroundIsolateBinaryMessenger.ensureInitialized(task.rootIsolateToken!);
+    }
+    MediaKit.ensureInitialized();
+    _player = Player();
+    sherpa_onnx.initBindings();
+    final receivePort = ReceivePort();
+    task.sendPort.send(receivePort.sendPort);
+
+    String modelDir = '';
+    String modelName = '';
+    String ruleFsts = '';
+    String ruleFars = '';
+    String lexicon = '';
+    String dataDir = '';
+    String dictDir = '';
+
+    // Example 7
+    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+    // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
+    modelDir = 'vits-melo-tts-zh_en';
+    modelName = 'model.onnx';
+    lexicon = 'lexicon.txt';
+    dictDir = 'vits-melo-tts-zh_en/dict';
+
+    if (modelName == '') {
+      throw Exception('You are supposed to select a model by changing the code before you run the app');
+    }
+
+    final Directory directory = await getApplicationDocumentsDirectory();
+    modelName = p.join(directory.path, modelDir, modelName);
+
+    if (ruleFsts != '') {
+      final all = ruleFsts.split(',');
+      var tmp = <String>[];
+      for (final f in all) {
+        tmp.add(p.join(directory.path, f));
+      }
+      ruleFsts = tmp.join(',');
+    }
+
+    if (ruleFars != '') {
+      final all = ruleFars.split(',');
+      var tmp = <String>[];
+      for (final f in all) {
+        tmp.add(p.join(directory.path, f));
+      }
+      ruleFars = tmp.join(',');
+    }
+
+    if (lexicon != '') {
+      lexicon = p.join(directory.path, modelDir, lexicon);
+    }
+
+    if (dataDir != '') {
+      dataDir = p.join(directory.path, dataDir);
+    }
+
+    if (dictDir != '') {
+      dictDir = p.join(directory.path, dictDir);
+    }
+
+    final tokens = p.join(directory.path, modelDir, 'tokens.txt');
+
+    final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+      model: modelName,
+      lexicon: lexicon,
+      tokens: tokens,
+      dataDir: dataDir,
+      dictDir: dictDir,
+    );
+
+    final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+      vits: vits,
+      numThreads: 2,
+      debug: true,
+      provider: 'cpu',
+    );
+
+    final config = sherpa_onnx.OfflineTtsConfig(
+      model: modelConfig,
+      ruleFsts: ruleFsts,
+      ruleFars: ruleFars,
+      maxNumSenetences: 1,
+    );
+    // print(config);
+    receivePort.listen((msg) async {
+      print(msg);
+      if (msg is _PortModel) {
+        switch (msg.method) {
+          case 'generate':
+            {
+              _PortModel _v = msg;
+              final stopwatch = Stopwatch();
+              stopwatch.start();
+              final audio = _tts.generate(text: _v.data['text'], sid: _v.data['sid'], speed: _v.data['speed']);
+              final suffix = '-sid-${_v.data['sid']}-speed-${_v.data['sid'].toStringAsPrecision(2)}';
+              final filename = await generateWaveFilename(suffix);
+
+              final ok = sherpa_onnx.writeWave(
+                filename: filename,
+                samples: audio.samples,
+                sampleRate: audio.sampleRate,
+              );
+
+              if (ok) {
+                stopwatch.stop();
+                double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
+
+                double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
+
+                print('Saved to\n$filename\n'
+                    'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
+                    'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
+                    'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
+                    '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ');
+
+                await _player.open(Media('file:///$filename'));
+                await _player.play();
+              }
+            }
+            break;
+        }
+      }
+    });
+    _tts = sherpa_onnx.OfflineTts(config);
+  }
+
+  static Future<void> generate({required String text, int sid = 0, double speed = 1.0}) async {
+    ReceivePort receivePort = ReceivePort();
+    _sendPort.send(_PortModel(
+      method: 'generate',
+      data: {'text': text, 'sid': sid, 'speed': speed},
+      sendPort: receivePort.sendPort,
+    ));
+    await receivePort.first;
+    receivePort.close();
+  }
+}
+
+/// 这里是页面
+class IsolateTtsView extends StatefulWidget {
+  const IsolateTtsView({super.key});
+
+  @override
+  State<IsolateTtsView> createState() => _IsolateTtsViewState();
+}
+
+class _IsolateTtsViewState extends State<IsolateTtsView> {
+  @override
+  void initState() {
+    super.initState();
+    IsolateTts.init();
+  }
+
+  @override
+  Widget build(BuildContext context) {
+    return Scaffold(
+      body: Center(
+        child: ElevatedButton(
+          onPressed: () {
+            IsolateTts.generate(text: '这是已退出的 isolate TTS');
+          },
+          child: Text('Isolate TTS'),
+        ),
+      ),
+    );
+  }
+}
diff --git a/flutter-examples/tts/lib/main.dart b/flutter-examples/tts/lib/main.dart
index 91bc120e87..78042254ab 100644
--- a/flutter-examples/tts/lib/main.dart
+++ b/flutter-examples/tts/lib/main.dart
@@ -1,8 +1,9 @@
 // Copyright (c)  2024  Xiaomi Corporation
 import 'package:flutter/material.dart';
 
-import './tts.dart';
 import './info.dart';
+import './tts.dart';
+import 'isolate_tts.dart';
 
 void main() {
   runApp(const MyApp());
@@ -38,6 +39,7 @@ class _MyHomePageState extends State<MyHomePage> {
   final List<Widget> _tabs = [
     TtsScreen(),
     InfoScreen(),
+    IsolateTtsView(),
   ];
   @override
   Widget build(BuildContext context) {
@@ -62,6 +64,10 @@ class _MyHomePageState extends State<MyHomePage> {
             icon: Icon(Icons.info),
             label: 'Info',
           ),
+          BottomNavigationBarItem(
+            icon: Icon(Icons.multiline_chart),
+            label: 'isolate',
+          ),
         ],
       ),
     );
diff --git a/flutter-examples/tts/lib/model.dart b/flutter-examples/tts/lib/model.dart
index 16ada98c38..b95ebca53f 100644
--- a/flutter-examples/tts/lib/model.dart
+++ b/flutter-examples/tts/lib/model.dart
@@ -24,13 +24,14 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
 
   String modelDir = '';
   String modelName = '';
+  String voices = ''; // for Kokoro only
   String ruleFsts = '';
   String ruleFars = '';
   String lexicon = '';
   String dataDir = '';
   String dictDir = '';
 
-  // You can select an example below and change it according to match your
+  // You can select an example below and change it accordingly to match your
   // selected tts model
 
   // ============================================================
@@ -84,6 +85,22 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
   // lexicon = 'lexicon.txt';
   // dictDir = 'vits-melo-tts-zh_en/dict';
 
+  // Example 8
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
+  // modelDir = 'kokoro-en-v0_19';
+  // modelName = 'model.onnx';
+  // voices = 'voices.bin';
+  // dataDir = 'kokoro-en-v0_19/espeak-ng-data';
+
+  // Example 9
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+  // modelDir = 'kokoro-multi-lang-v1_0';
+  // modelName = 'model.onnx';
+  // voices = 'voices.bin';
+  // dataDir = 'kokoro-multi-lang-v1_0/espeak-ng-data';
+  // dictDir = 'kokoro-multi-lang-v1_0/dict';
+  // lexicon = 'kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt';
+
   // ============================================================
   // Please don't change the remaining part of this function
   // ============================================================
@@ -113,7 +130,14 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
     ruleFars = tmp.join(',');
   }
 
-  if (lexicon != '') {
+  if (lexicon.contains(',')) {
+    final all = lexicon.split(',');
+    var tmp = <String>[];
+    for (final f in all) {
+      tmp.add(p.join(directory.path, f));
+    }
+    lexicon = tmp.join(',');
+  } else if (lexicon != '') {
     lexicon = p.join(directory.path, modelDir, lexicon);
   }
 
@@ -126,17 +150,38 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
   }
 
   final tokens = p.join(directory.path, modelDir, 'tokens.txt');
+  if (voices != '') {
+    voices = p.join(directory.path, modelDir, voices);
+  }
 
-  final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
-    model: modelName,
-    lexicon: lexicon,
-    tokens: tokens,
-    dataDir: dataDir,
-    dictDir: dictDir,
-  );
+  late final sherpa_onnx.OfflineTtsVitsModelConfig vits;
+  late final sherpa_onnx.OfflineTtsKokoroModelConfig kokoro;
+
+  if (voices != '') {
+    vits = sherpa_onnx.OfflineTtsVitsModelConfig();
+    kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
+      model: modelName,
+      voices: voices,
+      tokens: tokens,
+      dataDir: dataDir,
+      dictDir: dictDir,
+      lexicon: lexicon,
+    );
+  } else {
+    vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+      model: modelName,
+      lexicon: lexicon,
+      tokens: tokens,
+      dataDir: dataDir,
+      dictDir: dictDir,
+    );
+
+    kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig();
+  }
 
   final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
     vits: vits,
+    kokoro: kokoro,
     numThreads: 2,
     debug: true,
     provider: 'cpu',
diff --git a/flutter-examples/tts/lib/tts.dart b/flutter-examples/tts/lib/tts.dart
index 342bf070b4..cdf799612e 100644
--- a/flutter-examples/tts/lib/tts.dart
+++ b/flutter-examples/tts/lib/tts.dart
@@ -77,9 +77,7 @@ class _TtsScreenState extends State<TtsScreen> {
                   onTapOutside: (PointerDownEvent event) {
                     FocusManager.instance.primaryFocus?.unfocus();
                   },
-                  inputFormatters: <TextInputFormatter>[
-                    FilteringTextInputFormatter.digitsOnly
-                  ]),
+                  inputFormatters: <TextInputFormatter>[FilteringTextInputFormatter.digitsOnly]),
               Slider(
                 // decoration: InputDecoration(
                 //   labelText: "speech speed",
@@ -108,125 +106,117 @@ class _TtsScreenState extends State<TtsScreen> {
                 },
               ),
               const SizedBox(height: 5),
-              Row(
-                  mainAxisAlignment: MainAxisAlignment.center,
-                  children: <Widget>[
-                    OutlinedButton(
-                      child: Text("Generate"),
-                      onPressed: () async {
-                        await _init();
-                        await _player?.stop();
-
-                        setState(() {
-                          _maxSpeakerID = _tts?.numSpeakers ?? 0;
-                          if (_maxSpeakerID > 0) {
-                            _maxSpeakerID -= 1;
-                          }
-                        });
-
-                        if (_tts == null) {
-                          _controller_hint.value = TextEditingValue(
-                            text: 'Failed to initialize tts',
-                          );
-                          return;
-                        }
-
-                        _controller_hint.value = TextEditingValue(
-                          text: '',
-                        );
-
-                        final text = _controller_text_input.text.trim();
-                        if (text == '') {
-                          _controller_hint.value = TextEditingValue(
-                            text: 'Please first input your text to generate',
-                          );
-                          return;
-                        }
-
-                        final sid =
-                            int.tryParse(_controller_sid.text.trim()) ?? 0;
-
-                        final stopwatch = Stopwatch();
-                        stopwatch.start();
-                        final audio =
-                            _tts!.generate(text: text, sid: sid, speed: _speed);
-                        final suffix =
-                            '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
-                        final filename = await generateWaveFilename(suffix);
-
-                        final ok = sherpa_onnx.writeWave(
-                          filename: filename,
-                          samples: audio.samples,
-                          sampleRate: audio.sampleRate,
-                        );
-
-                        if (ok) {
-                          stopwatch.stop();
-                          double elapsed =
-                              stopwatch.elapsed.inMilliseconds.toDouble();
-
-                          double waveDuration =
-                              audio.samples.length.toDouble() /
-                                  audio.sampleRate.toDouble();
-
-                          _controller_hint.value = TextEditingValue(
-                            text: 'Saved to\n$filename\n'
-                                'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
-                                'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
-                                'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
-                                '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
-                          );
-                          _lastFilename = filename;
-
-                          await _player?.play(DeviceFileSource(_lastFilename));
-                        } else {
-                          _controller_hint.value = TextEditingValue(
-                            text: 'Failed to save generated audio',
-                          );
-                        }
-                      },
-                    ),
-                    const SizedBox(width: 5),
-                    OutlinedButton(
-                      child: Text("Clear"),
-                      onPressed: () {
-                        _controller_text_input.value = TextEditingValue(
-                          text: '',
-                        );
-
-                        _controller_hint.value = TextEditingValue(
-                          text: '',
-                        );
-                      },
-                    ),
-                    const SizedBox(width: 5),
-                    OutlinedButton(
-                      child: Text("Play"),
-                      onPressed: () async {
-                        if (_lastFilename == '') {
-                          _controller_hint.value = TextEditingValue(
-                            text: 'No generated wave file found',
-                          );
-                          return;
-                        }
-                        await _player?.stop();
-                        await _player?.play(DeviceFileSource(_lastFilename));
-                        _controller_hint.value = TextEditingValue(
-                          text: 'Playing\n$_lastFilename',
-                        );
-                      },
-                    ),
-                    const SizedBox(width: 5),
-                    OutlinedButton(
-                      child: Text("Stop"),
-                      onPressed: () async {
-                        await _player?.stop();
-                        _controller_hint.value = TextEditingValue(
-                          text: '',
-                        );
-                      },
-                    ),
-                  ]),
+              Row(mainAxisAlignment: MainAxisAlignment.center, children: <Widget>[
+                OutlinedButton(
+                  child: Text("Generate"),
+                  onPressed: () async {
+                    await _init();
+                    await _player?.stop();
+
+                    setState(() {
+                      _maxSpeakerID = _tts?.numSpeakers ?? 0;
+                      if (_maxSpeakerID > 0) {
+                        _maxSpeakerID -= 1;
+                      }
+                    });
+
+                    if (_tts == null) {
+                      _controller_hint.value = TextEditingValue(
+                        text: 'Failed to initialize tts',
+                      );
+                      return;
+                    }
+
+                    _controller_hint.value = TextEditingValue(
+                      text: '',
+                    );
+
+                    final text = _controller_text_input.text.trim();
+                    if (text == '') {
+                      _controller_hint.value = TextEditingValue(
+                        text: 'Please first input your text to generate',
+                      );
+                      return;
+                    }
+
+                    final sid = int.tryParse(_controller_sid.text.trim()) ?? 0;
+
+                    final stopwatch = Stopwatch();
+                    stopwatch.start();
+                    final audio = _tts!.generate(text: text, sid: sid, speed: _speed);
+                    final suffix = '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
+                    final filename = await generateWaveFilename(suffix);
+
+                    final ok = sherpa_onnx.writeWave(
+                      filename: filename,
+                      samples: audio.samples,
+                      sampleRate: audio.sampleRate,
+                    );
+
+                    if (ok) {
+                      stopwatch.stop();
+                      double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
+
+                      double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
+
+                      _controller_hint.value = TextEditingValue(
+                        text: 'Saved to\n$filename\n'
+                            'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
+                            'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
+                            'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
+                            '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
+                      );
+                      _lastFilename = filename;
+
+                      await _player?.play(DeviceFileSource(_lastFilename));
+                    } else {
+                      _controller_hint.value = TextEditingValue(
+                        text: 'Failed to save generated audio',
+                      );
+                    }
+                  },
+                ),
+                const SizedBox(width: 5),
+                OutlinedButton(
+                  child: Text("Clear"),
+                  onPressed: () {
+                    _controller_text_input.value = TextEditingValue(
+                      text: '',
+                    );
+
+                    _controller_hint.value = TextEditingValue(
+                      text: '',
+                    );
+                  },
+                ),
+                const SizedBox(width: 5),
+                OutlinedButton(
+                  child: Text("Play"),
+                  onPressed: () async {
+                    if (_lastFilename == '') {
+                      _controller_hint.value = TextEditingValue(
+                        text: 'No generated wave file found',
+                      );
+                      return;
+                    }
+                    await _player?.stop();
+                    await _player?.play(DeviceFileSource(_lastFilename));
+                    _controller_hint.value = TextEditingValue(
+                      text: 'Playing\n$_lastFilename',
+                    );
+                  },
+                ),
+                const SizedBox(width: 5),
+                OutlinedButton(
+                  child: Text("Stop"),
+                  onPressed: () async {
+                    await _player?.stop();
+                    _controller_hint.value = TextEditingValue(
+                      text: '',
+                    );
+                  },
+                ),
+              ]),
               const SizedBox(height: 5),
               TextField(
                 decoration: InputDecoration(
diff --git a/flutter-examples/tts/pubspec.yaml b/flutter-examples/tts/pubspec.yaml
index 975c0330d3..6e54bd734c 100644
--- a/flutter-examples/tts/pubspec.yaml
+++ b/flutter-examples/tts/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
 
 publish_to: 'none' # Remove this line if you wish to publish to pub.dev
 
-version: 1.10.27
+version: 1.10.42
 
 environment:
   sdk: ">=2.17.0 <4.0.0"
@@ -18,12 +18,18 @@ dependencies:
   cupertino_icons: ^1.0.6
   path_provider: ^2.1.3
   path: ^1.9.0
-  sherpa_onnx: ^1.10.27
+  sherpa_onnx: ^1.10.42
   # sherpa_onnx:
   #   path: ../../flutter/sherpa_onnx
   url_launcher: 6.2.6
   url_launcher_linux: 3.1.0
   audioplayers: ^5.0.0
+  media_kit: 
+  media_kit_libs_video: 
 
 flutter:
   uses-material-design: true
+
+  assets:
+    - assets/vits-melo-tts-zh_en/
+    - assets/vits-melo-tts-zh_en/dict/
\ No newline at end of file
diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md
index 7e7e8031d0..9255b7ee92 100644
--- a/flutter/sherpa_onnx/example/example.md
+++ b/flutter/sherpa_onnx/example/example.md
@@ -4,13 +4,14 @@
 
 | Functions | URL | Supported Platforms|
 |---|---|---|
-|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows|
+|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, iOS, macOS, Windows|
 |Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows|
 
 ## Pure dart-examples
 
 | Functions | URL | Supported Platforms|
 |---|---|---|
+|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
 |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
 |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
 |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
diff --git a/flutter/sherpa_onnx/lib/sherpa_onnx.dart b/flutter/sherpa_onnx/lib/sherpa_onnx.dart
index b15e675329..b9fb7dd53e 100644
--- a/flutter/sherpa_onnx/lib/sherpa_onnx.dart
+++ b/flutter/sherpa_onnx/lib/sherpa_onnx.dart
@@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
 export 'src/feature_config.dart';
 export 'src/keyword_spotter.dart';
 export 'src/offline_recognizer.dart';
+export 'src/offline_speaker_diarization.dart';
 export 'src/offline_stream.dart';
 export 'src/online_recognizer.dart';
 export 'src/online_stream.dart';
@@ -24,7 +25,7 @@ String? _path;
 // https://github.com/flutter/codelabs/blob/main/ffigen_codelab/step_05/lib/ffigen_app.dart
 // https://api.flutter.dev/flutter/dart-io/Platform-class.html
 final DynamicLibrary _dylib = () {
-  if (Platform.isMacOS || Platform.isIOS) {
+  if (Platform.isMacOS) {
     if (_path == null) {
       return DynamicLibrary.open('libsherpa-onnx-c-api.dylib');
     } else {
@@ -32,6 +33,14 @@ final DynamicLibrary _dylib = () {
     }
   }
 
+  if (Platform.isIOS) {
+    if (_path == null) {
+      return DynamicLibrary.open('sherpa_onnx.framework/sherpa_onnx');
+    } else {
+      return DynamicLibrary.open('$_path/sherpa_onnx.framework/sherpa_onnx');
+    }
+  }
+
   if (Platform.isAndroid || Platform.isLinux) {
     if (_path == null) {
       return DynamicLibrary.open('libsherpa-onnx-c-api.so');
diff --git a/flutter/sherpa_onnx/lib/src/audio_tagging.dart b/flutter/sherpa_onnx/lib/src/audio_tagging.dart
index 6c650b30cc..3e3dbed2f4 100644
--- a/flutter/sherpa_onnx/lib/src/audio_tagging.dart
+++ b/flutter/sherpa_onnx/lib/src/audio_tagging.dart
@@ -62,6 +62,8 @@ class AudioEvent {
 }
 
 class AudioTagging {
+  AudioTagging.fromPtr({required this.ptr, required this.config});
+
   AudioTagging._({required this.ptr, required this.config});
 
   // The user has to invoke AudioTagging.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/lib/src/keyword_spotter.dart b/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
index c098679954..310657d1a0 100644
--- a/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
+++ b/flutter/sherpa_onnx/lib/src/keyword_spotter.dart
@@ -53,6 +53,8 @@ class KeywordResult {
 }
 
 class KeywordSpotter {
+  KeywordSpotter.fromPtr({required this.ptr, required this.config});
+
   KeywordSpotter._({required this.ptr, required this.config});
 
   /// The user is responsible to call the OnlineRecognizer.free()
@@ -166,6 +168,10 @@ class KeywordSpotter {
     SherpaOnnxBindings.decodeKeywordStream?.call(ptr, stream.ptr);
   }
 
+  void reset(OnlineStream stream) {
+    SherpaOnnxBindings.resetKeywordStream?.call(ptr, stream.ptr);
+  }
+
   Pointer<SherpaOnnxKeywordSpotter> ptr;
   KeywordSpotterConfig config;
 }
diff --git a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
index 749ffb316d..01bceccceb 100644
--- a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
+++ b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
@@ -68,6 +68,24 @@ class OfflineWhisperModelConfig {
   final int tailPaddings;
 }
 
+class OfflineMoonshineModelConfig {
+  const OfflineMoonshineModelConfig(
+      {this.preprocessor = '',
+      this.encoder = '',
+      this.uncachedDecoder = '',
+      this.cachedDecoder = ''});
+
+  @override
+  String toString() {
+    return 'OfflineMoonshineModelConfig(preprocessor: $preprocessor, encoder: $encoder, uncachedDecoder: $uncachedDecoder, cachedDecoder: $cachedDecoder)';
+  }
+
+  final String preprocessor;
+  final String encoder;
+  final String uncachedDecoder;
+  final String cachedDecoder;
+}
+
 class OfflineTdnnModelConfig {
   const OfflineTdnnModelConfig({this.model = ''});
 
@@ -116,6 +134,7 @@ class OfflineModelConfig {
     this.whisper = const OfflineWhisperModelConfig(),
     this.tdnn = const OfflineTdnnModelConfig(),
     this.senseVoice = const OfflineSenseVoiceModelConfig(),
+    this.moonshine = const OfflineMoonshineModelConfig(),
     required this.tokens,
     this.numThreads = 1,
     this.debug = true,
@@ -128,7 +147,7 @@ class OfflineModelConfig {
 
   @override
   String toString() {
-    return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
+    return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
   }
 
   final OfflineTransducerModelConfig transducer;
@@ -137,6 +156,7 @@ class OfflineModelConfig {
   final OfflineWhisperModelConfig whisper;
   final OfflineTdnnModelConfig tdnn;
   final OfflineSenseVoiceModelConfig senseVoice;
+  final OfflineMoonshineModelConfig moonshine;
 
   final String tokens;
   final int numThreads;
@@ -207,6 +227,8 @@ class OfflineRecognizerResult {
 }
 
 class OfflineRecognizer {
+  OfflineRecognizer.fromPtr({required this.ptr, required this.config});
+
   OfflineRecognizer._({required this.ptr, required this.config});
 
   void free() {
@@ -257,6 +279,15 @@ class OfflineRecognizer {
     c.ref.model.senseVoice.useInverseTextNormalization =
         config.model.senseVoice.useInverseTextNormalization ? 1 : 0;
 
+    c.ref.model.moonshine.preprocessor =
+        config.model.moonshine.preprocessor.toNativeUtf8();
+    c.ref.model.moonshine.encoder =
+        config.model.moonshine.encoder.toNativeUtf8();
+    c.ref.model.moonshine.uncachedDecoder =
+        config.model.moonshine.uncachedDecoder.toNativeUtf8();
+    c.ref.model.moonshine.cachedDecoder =
+        config.model.moonshine.cachedDecoder.toNativeUtf8();
+
     c.ref.model.tokens = config.model.tokens.toNativeUtf8();
 
     c.ref.model.numThreads = config.model.numThreads;
@@ -294,6 +325,10 @@ class OfflineRecognizer {
     calloc.free(c.ref.model.modelType);
     calloc.free(c.ref.model.provider);
     calloc.free(c.ref.model.tokens);
+    calloc.free(c.ref.model.moonshine.cachedDecoder);
+    calloc.free(c.ref.model.moonshine.uncachedDecoder);
+    calloc.free(c.ref.model.moonshine.encoder);
+    calloc.free(c.ref.model.moonshine.preprocessor);
     calloc.free(c.ref.model.senseVoice.language);
     calloc.free(c.ref.model.senseVoice.model);
     calloc.free(c.ref.model.tdnn.model);
diff --git a/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
new file mode 100644
index 0000000000..fe046a1668
--- /dev/null
+++ b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
@@ -0,0 +1,246 @@
+// Copyright (c)  2024  Xiaomi Corporation
+import 'dart:ffi';
+import 'dart:typed_data';
+
+import 'package:ffi/ffi.dart';
+
+import './sherpa_onnx_bindings.dart';
+import './speaker_identification.dart';
+
+class OfflineSpeakerDiarizationSegment {
+  const OfflineSpeakerDiarizationSegment({
+    required this.start,
+    required this.end,
+    required this.speaker,
+  });
+
+  @override
+  String toString() {
+    return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)';
+  }
+
+  final double start;
+  final double end;
+  final int speaker;
+}
+
+class OfflineSpeakerSegmentationPyannoteModelConfig {
+  const OfflineSpeakerSegmentationPyannoteModelConfig({
+    this.model = '',
+  });
+
+  @override
+  String toString() {
+    return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)';
+  }
+
+  final String model;
+}
+
+class OfflineSpeakerSegmentationModelConfig {
+  const OfflineSpeakerSegmentationModelConfig({
+    this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(),
+    this.numThreads = 1,
+    this.debug = true,
+    this.provider = 'cpu',
+  });
+
+  @override
+  String toString() {
+    return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)';
+  }
+
+  final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+
+  final int numThreads;
+  final bool debug;
+  final String provider;
+}
+
+class FastClusteringConfig {
+  const FastClusteringConfig({
+    this.numClusters = -1,
+    this.threshold = 0.5,
+  });
+
+  @override
+  String toString() {
+    return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)';
+  }
+
+  final int numClusters;
+  final double threshold;
+}
+
+class OfflineSpeakerDiarizationConfig {
+  const OfflineSpeakerDiarizationConfig({
+    this.segmentation = const OfflineSpeakerSegmentationModelConfig(),
+    this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''),
+    this.clustering = const FastClusteringConfig(),
+    this.minDurationOn = 0.2,
+    this.minDurationOff = 0.5,
+  });
+
+  @override
+  String toString() {
+    return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)';
+  }
+
+  final OfflineSpeakerSegmentationModelConfig segmentation;
+  final SpeakerEmbeddingExtractorConfig embedding;
+  final FastClusteringConfig clustering;
+  final double minDurationOff; // in seconds
+  final double minDurationOn; // in seconds
+}
+
+class OfflineSpeakerDiarization {
+  OfflineSpeakerDiarization.fromPtr(
+      {required this.ptr, required this.config, required this.sampleRate});
+
+  OfflineSpeakerDiarization._(
+      {required this.ptr, required this.config, required this.sampleRate});
+
+  void free() {
+    SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr);
+    ptr = nullptr;
+  }
+
+  /// The user is responsible to call the OfflineSpeakerDiarization.free()
+  /// method of the returned instance to avoid memory leak.
+  factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
+    final c = calloc<SherpaOnnxOfflineSpeakerDiarizationConfig>();
+
+    c.ref.segmentation.pyannote.model =
+        config.segmentation.pyannote.model.toNativeUtf8();
+    c.ref.segmentation.numThreads = config.segmentation.numThreads;
+    c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0;
+    c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8();
+
+    c.ref.embedding.model = config.embedding.model.toNativeUtf8();
+    c.ref.embedding.numThreads = config.embedding.numThreads;
+    c.ref.embedding.debug = config.embedding.debug ? 1 : 0;
+    c.ref.embedding.provider = config.embedding.provider.toNativeUtf8();
+
+    c.ref.clustering.numClusters = config.clustering.numClusters;
+    c.ref.clustering.threshold = config.clustering.threshold;
+
+    c.ref.minDurationOn = config.minDurationOn;
+    c.ref.minDurationOff = config.minDurationOff;
+
+    final ptr =
+        SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ??
+            nullptr;
+
+    calloc.free(c.ref.embedding.provider);
+    calloc.free(c.ref.embedding.model);
+    calloc.free(c.ref.segmentation.provider);
+    calloc.free(c.ref.segmentation.pyannote.model);
+
+    int sampleRate = 0;
+    if (ptr != nullptr) {
+      sampleRate = SherpaOnnxBindings
+              .sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
+              ?.call(ptr) ??
+          0;
+    }
+    return OfflineSpeakerDiarization._(
+        ptr: ptr, config: config, sampleRate: sampleRate);
+  }
+
+  List<OfflineSpeakerDiarizationSegment> process(
+      {required Float32List samples}) {
+    if (ptr == nullptr) {
+      return <OfflineSpeakerDiarizationSegment>[];
+    }
+
+    final n = samples.length;
+    final Pointer<Float> p = calloc<Float>(n);
+
+    final pList = p.asTypedList(n);
+    pList.setAll(0, samples);
+
+    final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess
+            ?.call(ptr, p, n) ??
+        nullptr;
+
+    final ans = _processImpl(r);
+
+    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
+        ?.call(r);
+
+    return ans;
+  }
+
+  List<OfflineSpeakerDiarizationSegment> processWithCallback({
+    required Float32List samples,
+    required int Function(int numProcessedChunks, int numTotalChunks) callback,
+  }) {
+    if (ptr == nullptr) {
+      return <OfflineSpeakerDiarizationSegment>[];
+    }
+
+    final n = samples.length;
+    final Pointer<Float> p = calloc<Float>(n);
+
+    final pList = p.asTypedList(n);
+    pList.setAll(0, samples);
+
+    final wrapper = NativeCallable<
+            SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal(
+        (int numProcessedChunks, int numTotalChunks) {
+      return callback(numProcessedChunks, numTotalChunks);
+    }, exceptionalReturn: 0);
+
+    final r = SherpaOnnxBindings
+            .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
+            ?.call(ptr, p, n, wrapper.nativeFunction) ??
+        nullptr;
+
+    wrapper.close();
+
+    final ans = _processImpl(r);
+
+    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
+        ?.call(r);
+
+    return ans;
+  }
+
+  List<OfflineSpeakerDiarizationSegment> _processImpl(
+      Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> r) {
+    if (r == nullptr) {
+      return <OfflineSpeakerDiarizationSegment>[];
+    }
+
+    final numSegments = SherpaOnnxBindings
+            .sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
+            ?.call(r) ??
+        0;
+    final segments = SherpaOnnxBindings
+            .sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+            ?.call(r) ??
+        nullptr;
+
+    if (segments == nullptr) {
+      return <OfflineSpeakerDiarizationSegment>[];
+    }
+
+    final ans = <OfflineSpeakerDiarizationSegment>[];
+    for (int i = 0; i != numSegments; ++i) {
+      final s = segments + i;
+
+      final tmp = OfflineSpeakerDiarizationSegment(
+          start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker);
+      ans.add(tmp);
+    }
+
+    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment
+        ?.call(segments);
+
+    return ans;
+  }
+
+  Pointer<SherpaOnnxOfflineSpeakerDiarization> ptr;
+  OfflineSpeakerDiarizationConfig config;
+  final int sampleRate;
+}
diff --git a/flutter/sherpa_onnx/lib/src/online_recognizer.dart b/flutter/sherpa_onnx/lib/src/online_recognizer.dart
index 18d5a60006..69ed93894d 100644
--- a/flutter/sherpa_onnx/lib/src/online_recognizer.dart
+++ b/flutter/sherpa_onnx/lib/src/online_recognizer.dart
@@ -162,6 +162,8 @@ class OnlineRecognizerResult {
 }
 
 class OnlineRecognizer {
+  OnlineRecognizer.fromPtr({required this.ptr, required this.config});
+
   OnlineRecognizer._({required this.ptr, required this.config});
 
   /// The user is responsible to call the OnlineRecognizer.free()
diff --git a/flutter/sherpa_onnx/lib/src/punctuation.dart b/flutter/sherpa_onnx/lib/src/punctuation.dart
index b4197fa46b..dd38a2445b 100644
--- a/flutter/sherpa_onnx/lib/src/punctuation.dart
+++ b/flutter/sherpa_onnx/lib/src/punctuation.dart
@@ -36,6 +36,8 @@ class OfflinePunctuationConfig {
 }
 
 class OfflinePunctuation {
+  OfflinePunctuation.fromPtr({required this.ptr, required this.config});
+
   OfflinePunctuation._({required this.ptr, required this.config});
 
   // The user has to invoke OfflinePunctuation.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
index 42294c2d4a..c22c2a528c 100644
--- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
+++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
@@ -2,6 +2,66 @@
 import 'dart:ffi';
 import 'package:ffi/ffi.dart';
 
+final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
+  external Pointer<Utf8> model;
+
+  @Int32()
+  external int numThreads;
+
+  @Int32()
+  external int debug;
+
+  external Pointer<Utf8> provider;
+}
+
+final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct {
+  @Float()
+  external double start;
+
+  @Float()
+  external double end;
+
+  @Int32()
+  external int speaker;
+}
+
+final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
+    extends Struct {
+  external Pointer<Utf8> model;
+}
+
+final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct {
+  external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+
+  @Int32()
+  external int numThreads;
+
+  @Int32()
+  external int debug;
+
+  external Pointer<Utf8> provider;
+}
+
+final class SherpaOnnxFastClusteringConfig extends Struct {
+  @Int32()
+  external int numClusters;
+
+  @Float()
+  external double threshold;
+}
+
+final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct {
+  external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
+  external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
+  external SherpaOnnxFastClusteringConfig clustering;
+
+  @Float()
+  external double minDurationOn;
+
+  @Float()
+  external double minDurationOff;
+}
+
 final class SherpaOnnxOfflinePunctuationModelConfig extends Struct {
   external Pointer<Utf8> ctTransformer;
 
@@ -71,6 +131,34 @@ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
   external Pointer<Utf8> dictDir;
 }
 
+final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
+  external Pointer<Utf8> acousticModel;
+  external Pointer<Utf8> vocoder;
+  external Pointer<Utf8> lexicon;
+  external Pointer<Utf8> tokens;
+  external Pointer<Utf8> dataDir;
+
+  @Float()
+  external double noiseScale;
+
+  @Float()
+  external double lengthScale;
+
+  external Pointer<Utf8> dictDir;
+}
+
+final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
+  external Pointer<Utf8> model;
+  external Pointer<Utf8> voices;
+  external Pointer<Utf8> tokens;
+  external Pointer<Utf8> dataDir;
+
+  @Float()
+  external double lengthScale;
+  external Pointer<Utf8> dictDir;
+  external Pointer<Utf8> lexicon;
+}
+
 final class SherpaOnnxOfflineTtsModelConfig extends Struct {
   external SherpaOnnxOfflineTtsVitsModelConfig vits;
   @Int32()
@@ -80,6 +168,8 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
   external int debug;
 
   external Pointer<Utf8> provider;
+  external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
+  external SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
 }
 
 final class SherpaOnnxOfflineTtsConfig extends Struct {
@@ -134,6 +224,13 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
   external int tailPaddings;
 }
 
+final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
+  external Pointer<Utf8> preprocessor;
+  external Pointer<Utf8> encoder;
+  external Pointer<Utf8> uncachedDecoder;
+  external Pointer<Utf8> cachedDecoder;
+}
+
 final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
   external Pointer<Utf8> model;
 }
@@ -176,6 +273,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
   external Pointer<Utf8> telespeechCtc;
 
   external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
+  external SherpaOnnxOfflineMoonshineModelConfig moonshine;
 }
 
 final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@@ -341,18 +439,6 @@ final class SherpaOnnxWave extends Struct {
   external int numSamples;
 }
 
-final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
-  external Pointer<Utf8> model;
-
-  @Int32()
-  external int numThreads;
-
-  @Int32()
-  external int debug;
-
-  external Pointer<Utf8> provider;
-}
-
 final class SherpaOnnxKeywordSpotterConfig extends Struct {
   external SherpaOnnxFeatureConfig feat;
 
@@ -402,10 +488,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
 
 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
 
+final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {}
+
+final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {}
+
+typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative
+    = Pointer<SherpaOnnxOfflineSpeakerDiarization> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
+
+typedef SherpaOnnxCreateOfflineSpeakerDiarization
+    = SherpaOnnxCreateOfflineSpeakerDiarizationNative;
+
+typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>);
+
+typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>);
+
 typedef SherpaOnnxCreateOfflinePunctuationNative
     = Pointer<SherpaOnnxOfflinePunctuation> Function(
         Pointer<SherpaOnnxOfflinePunctuationConfig>);
 
+typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>,
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32
+    Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32
+    Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
+    = Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+    = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative;
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative
+    = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, Int32);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcess
+    = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, int);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32
+    Function(Int32, Int32);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
+    = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarization>,
+        Pointer<Float>,
+        Int32,
+        Pointer<
+            NativeFunction<
+                SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
+    = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
+        Pointer<SherpaOnnxOfflineSpeakerDiarization>,
+        Pointer<Float>,
+        int,
+        Pointer<
+            NativeFunction<
+                SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
+
+typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function(
+    Pointer<SherpaOnnxOfflineSpeakerDiarization>,
+    Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
+
 typedef SherpaOnnxCreateOfflinePunctuation
     = SherpaOnnxCreateOfflinePunctuationNative;
 
@@ -492,6 +669,12 @@ typedef DecodeKeywordStreamNative = Void Function(
 typedef DecodeKeywordStream = void Function(
     Pointer<SherpaOnnxKeywordSpotter>, Pointer<SherpaOnnxOnlineStream>);
 
+typedef ResetKeywordStreamNative = Void Function(
+    Pointer<SherpaOnnxKeywordSpotter>, Pointer<SherpaOnnxOnlineStream>);
+
+typedef ResetKeywordStream = void Function(
+    Pointer<SherpaOnnxKeywordSpotter>, Pointer<SherpaOnnxOnlineStream>);
+
 typedef GetKeywordResultAsJsonNative = Pointer<Utf8> Function(
     Pointer<SherpaOnnxKeywordSpotter>, Pointer<SherpaOnnxOnlineStream>);
 
@@ -940,6 +1123,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
 
 class SherpaOnnxBindings {
+  static SherpaOnnxCreateOfflineSpeakerDiarization?
+      sherpaOnnxCreateOfflineSpeakerDiarization;
+  static SherpaOnnxDestroyOfflineSpeakerDiarization?
+      sherpaOnnxDestroyOfflineSpeakerDiarization;
+  static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate?
+      sherpaOnnxOfflineSpeakerDiarizationGetSampleRate;
+  static SherpaOnnxOfflineSpeakerDiarizationSetConfig?
+      sherpaOnnxOfflineSpeakerDiarizationSetConfig;
+  static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers?
+      sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers;
+  static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments?
+      sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments;
+  static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime?
+      sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime;
+  static SherpaOnnxOfflineSpeakerDiarizationDestroySegment?
+      sherpaOnnxOfflineSpeakerDiarizationDestroySegment;
+  static SherpaOnnxOfflineSpeakerDiarizationProcess?
+      sherpaOnnxOfflineSpeakerDiarizationProcess;
+  static SherpaOnnxOfflineSpeakerDiarizationDestroyResult?
+      sherpaOnnxOfflineSpeakerDiarizationDestroyResult;
+  static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg?
+      sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg;
+
   static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation;
   static SherpaOnnxDestroyOfflinePunctuation?
       sherpaOnnxDestroyOfflinePunctuation;
@@ -959,6 +1165,7 @@ class SherpaOnnxBindings {
   static CreateKeywordStreamWithKeywords? createKeywordStreamWithKeywords;
   static IsKeywordStreamReady? isKeywordStreamReady;
   static DecodeKeywordStream? decodeKeywordStream;
+  static ResetKeywordStream? resetKeywordStream;
   static GetKeywordResultAsJson? getKeywordResultAsJson;
   static FreeKeywordResultJson? freeKeywordResultJson;
 
@@ -1107,6 +1314,83 @@ class SherpaOnnxBindings {
   static SherpaOnnxFreeWave? freeWave;
 
   static void init(DynamicLibrary dynamicLibrary) {
+    sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxCreateOfflineSpeakerDiarizationNative>>(
+            'SherpaOnnxCreateOfflineSpeakerDiarization')
+        .asFunction();
+
+    sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>(
+            'SherpaOnnxDestroyOfflineSpeakerDiarization')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationSetConfig')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationDestroySegment')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationProcessNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationProcess')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg')
+        .asFunction();
+
+    sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary
+        .lookup<
+                NativeFunction<
+                    SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>(
+            'SherpaOnnxOfflineSpeakerDiarizationDestroyResult')
+        .asFunction();
+
     sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary
         .lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>(
             'SherpaOnnxCreateOfflinePunctuation')
@@ -1184,6 +1468,11 @@ class SherpaOnnxBindings {
             'SherpaOnnxDecodeKeywordStream')
         .asFunction();
 
+    resetKeywordStream ??= dynamicLibrary
+        .lookup<NativeFunction<ResetKeywordStreamNative>>(
+            'SherpaOnnxResetKeywordStream')
+        .asFunction();
+
     getKeywordResultAsJson ??= dynamicLibrary
         .lookup<NativeFunction<GetKeywordResultAsJsonNative>>(
             'SherpaOnnxGetKeywordResultAsJson')
diff --git a/flutter/sherpa_onnx/lib/src/speaker_identification.dart b/flutter/sherpa_onnx/lib/src/speaker_identification.dart
index 5c2e10744a..8b27dbc69b 100644
--- a/flutter/sherpa_onnx/lib/src/speaker_identification.dart
+++ b/flutter/sherpa_onnx/lib/src/speaker_identification.dart
@@ -25,6 +25,8 @@ class SpeakerEmbeddingExtractorConfig {
 }
 
 class SpeakerEmbeddingExtractor {
+  SpeakerEmbeddingExtractor.fromPtr({required this.ptr, required this.dim});
+
   SpeakerEmbeddingExtractor._({required this.ptr, required this.dim});
 
   /// The user is responsible to call the SpeakerEmbeddingExtractor.free()
@@ -101,6 +103,8 @@ class SpeakerEmbeddingExtractor {
 }
 
 class SpeakerEmbeddingManager {
+  SpeakerEmbeddingManager.fromPtr({required this.ptr, required this.dim});
+
   SpeakerEmbeddingManager._({required this.ptr, required this.dim});
 
   // The user has to use SpeakerEmbeddingManager.free() to avoid memory leak
diff --git a/flutter/sherpa_onnx/lib/src/tts.dart b/flutter/sherpa_onnx/lib/src/tts.dart
index f779188b72..e03126d0ec 100644
--- a/flutter/sherpa_onnx/lib/src/tts.dart
+++ b/flutter/sherpa_onnx/lib/src/tts.dart
@@ -8,9 +8,9 @@ import './sherpa_onnx_bindings.dart';
 
 class OfflineTtsVitsModelConfig {
   const OfflineTtsVitsModelConfig({
-    required this.model,
+    this.model = '',
     this.lexicon = '',
-    required this.tokens,
+    this.tokens = '',
     this.dataDir = '',
     this.noiseScale = 0.667,
     this.noiseScaleW = 0.8,
@@ -33,9 +33,63 @@ class OfflineTtsVitsModelConfig {
   final String dictDir;
 }
 
+class OfflineTtsMatchaModelConfig {
+  const OfflineTtsMatchaModelConfig({
+    this.acousticModel = '',
+    this.vocoder = '',
+    this.lexicon = '',
+    this.tokens = '',
+    this.dataDir = '',
+    this.noiseScale = 0.667,
+    this.lengthScale = 1.0,
+    this.dictDir = '',
+  });
+
+  @override
+  String toString() {
+    return 'OfflineTtsMatchaModelConfig(acousticModel: $acousticModel, vocoder: $vocoder, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, lengthScale: $lengthScale, dictDir: $dictDir)';
+  }
+
+  final String acousticModel;
+  final String vocoder;
+  final String lexicon;
+  final String tokens;
+  final String dataDir;
+  final double noiseScale;
+  final double lengthScale;
+  final String dictDir;
+}
+
+class OfflineTtsKokoroModelConfig {
+  const OfflineTtsKokoroModelConfig({
+    this.model = '',
+    this.voices = '',
+    this.tokens = '',
+    this.dataDir = '',
+    this.lengthScale = 1.0,
+    this.dictDir = '',
+    this.lexicon = '',
+  });
+
+  @override
+  String toString() {
+    return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, dictDir: $dictDir, lexicon: $lexicon)';
+  }
+
+  final String model;
+  final String voices;
+  final String tokens;
+  final String dataDir;
+  final double lengthScale;
+  final String dictDir;
+  final String lexicon;
+}
+
 class OfflineTtsModelConfig {
   const OfflineTtsModelConfig({
-    required this.vits,
+    this.vits = const OfflineTtsVitsModelConfig(),
+    this.matcha = const OfflineTtsMatchaModelConfig(),
+    this.kokoro = const OfflineTtsKokoroModelConfig(),
     this.numThreads = 1,
     this.debug = true,
     this.provider = 'cpu',
@@ -43,10 +97,12 @@ class OfflineTtsModelConfig {
 
   @override
   String toString() {
-    return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
+    return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, numThreads: $numThreads, debug: $debug, provider: $provider)';
   }
 
   final OfflineTtsVitsModelConfig vits;
+  final OfflineTtsMatchaModelConfig matcha;
+  final OfflineTtsKokoroModelConfig kokoro;
   final int numThreads;
   final bool debug;
   final String provider;
@@ -82,6 +138,8 @@ class GeneratedAudio {
 }
 
 class OfflineTts {
+  OfflineTts.fromPtr({required this.ptr, required this.config});
+
   OfflineTts._({required this.ptr, required this.config});
 
   /// The user is responsible to call the OfflineTts.free()
@@ -97,6 +155,24 @@ class OfflineTts {
     c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
     c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
 
+    c.ref.model.matcha.acousticModel =
+        config.model.matcha.acousticModel.toNativeUtf8();
+    c.ref.model.matcha.vocoder = config.model.matcha.vocoder.toNativeUtf8();
+    c.ref.model.matcha.lexicon = config.model.matcha.lexicon.toNativeUtf8();
+    c.ref.model.matcha.tokens = config.model.matcha.tokens.toNativeUtf8();
+    c.ref.model.matcha.dataDir = config.model.matcha.dataDir.toNativeUtf8();
+    c.ref.model.matcha.noiseScale = config.model.matcha.noiseScale;
+    c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;
+    c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8();
+
+    c.ref.model.kokoro.model = config.model.kokoro.model.toNativeUtf8();
+    c.ref.model.kokoro.voices = config.model.kokoro.voices.toNativeUtf8();
+    c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
+    c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
+    c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
+    c.ref.model.kokoro.dictDir = config.model.kokoro.dictDir.toNativeUtf8();
+    c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();
+
     c.ref.model.numThreads = config.model.numThreads;
     c.ref.model.debug = config.model.debug ? 1 : 0;
     c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -110,6 +186,21 @@ class OfflineTts {
     calloc.free(c.ref.ruleFars);
     calloc.free(c.ref.ruleFsts);
     calloc.free(c.ref.model.provider);
+
+    calloc.free(c.ref.model.kokoro.lexicon);
+    calloc.free(c.ref.model.kokoro.dictDir);
+    calloc.free(c.ref.model.kokoro.dataDir);
+    calloc.free(c.ref.model.kokoro.tokens);
+    calloc.free(c.ref.model.kokoro.voices);
+    calloc.free(c.ref.model.kokoro.model);
+
+    calloc.free(c.ref.model.matcha.dictDir);
+    calloc.free(c.ref.model.matcha.dataDir);
+    calloc.free(c.ref.model.matcha.tokens);
+    calloc.free(c.ref.model.matcha.lexicon);
+    calloc.free(c.ref.model.matcha.vocoder);
+    calloc.free(c.ref.model.matcha.acousticModel);
+
     calloc.free(c.ref.model.vits.dictDir);
     calloc.free(c.ref.model.vits.dataDir);
     calloc.free(c.ref.model.vits.tokens);
diff --git a/flutter/sherpa_onnx/lib/src/vad.dart b/flutter/sherpa_onnx/lib/src/vad.dart
index 10fac5a45a..7db0e55e05 100644
--- a/flutter/sherpa_onnx/lib/src/vad.dart
+++ b/flutter/sherpa_onnx/lib/src/vad.dart
@@ -54,6 +54,8 @@ class SpeechSegment {
 }
 
 class CircularBuffer {
+  CircularBuffer.fromPtr({required this.ptr});
+
   CircularBuffer._({required this.ptr});
 
   /// The user has to invoke CircularBuffer.free() on the returned instance
@@ -115,6 +117,8 @@ class CircularBuffer {
 }
 
 class VoiceActivityDetector {
+  VoiceActivityDetector.fromPtr({required this.ptr, required this.config});
+
   VoiceActivityDetector._({required this.ptr, required this.config});
 
   // The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
diff --git a/flutter/sherpa_onnx/pubspec.yaml b/flutter/sherpa_onnx/pubspec.yaml
index 5b693ef0bf..b0b4c94b51 100644
--- a/flutter/sherpa_onnx/pubspec.yaml
+++ b/flutter/sherpa_onnx/pubspec.yaml
@@ -1,8 +1,8 @@
 name: sherpa_onnx
 
 description: >
-  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
-  with onnxruntime without Internet connection.
+  Speech recognition, speech synthesis, speaker diarization, and speaker recognition
+  using next-gen Kaldi with onnxruntime without Internet connection.
 
 repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter
 
@@ -12,12 +12,12 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/
 topics:
   - speech-recognition
   - speech-synthesis
-  - speaker-identification
+  - speaker-diarization
   - audio-tagging
   - voice-activity-detection
 
 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
-version: 1.10.27
+version: 1.10.42
 
 homepage: https://github.com/k2-fsa/sherpa-onnx
 
@@ -30,23 +30,23 @@ dependencies:
   flutter:
     sdk: flutter
 
-  sherpa_onnx_android: ^1.10.27
+  sherpa_onnx_android: ^1.10.42
   # sherpa_onnx_android:
   #   path: ../sherpa_onnx_android
 
-  sherpa_onnx_macos: ^1.10.27
+  sherpa_onnx_macos: ^1.10.42
   # sherpa_onnx_macos:
   #   path: ../sherpa_onnx_macos
 
-  sherpa_onnx_linux: ^1.10.27
+  sherpa_onnx_linux: ^1.10.42
   # sherpa_onnx_linux:
   #   path: ../sherpa_onnx_linux
-    #
-  sherpa_onnx_windows: ^1.10.27
+
+  sherpa_onnx_windows: ^1.10.42
   # sherpa_onnx_windows:
   #   path: ../sherpa_onnx_windows
 
-  sherpa_onnx_ios: ^1.10.27
+  sherpa_onnx_ios: ^1.10.42
   # sherpa_onnx_ios:
   #   path: ../sherpa_onnx_ios
 
diff --git a/flutter/sherpa_onnx_ios/README.md b/flutter/sherpa_onnx_ios/README.md
index 1334c25649..974250c30d 100644
--- a/flutter/sherpa_onnx_ios/README.md
+++ b/flutter/sherpa_onnx_ios/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_ios
 
 This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
 
diff --git a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
index ab4b74b97c..e4fa2e09ac 100644
--- a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
+++ b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
@@ -7,7 +7,7 @@
 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_ios'
-  s.version          = '1.10.27'
+  s.version          = '1.10.42'
   s.summary          = 'A new Flutter FFI plugin project.'
   s.description      = <<-DESC
 A new Flutter FFI plugin project.
@@ -22,8 +22,9 @@ A new Flutter FFI plugin project.
   # `../src/*` so that the C sources can be shared among all target platforms.
   s.source           = { :path => '.' }
   s.dependency 'Flutter'
-  s.platform = :ios, '12.0'
-  s.ios.vendored_libraries = '*.dylib', '*.a'
+  s.platform = :ios, '13.0'
+  s.preserve_paths = 'sherpa_onnx.xcframework/**/*'
+  s.vendored_frameworks = 'sherpa_onnx.xcframework'
 
   # Flutter.framework does not contain a i386 slice.
   s.pod_target_xcconfig = {
diff --git a/flutter/sherpa_onnx_macos/README.md b/flutter/sherpa_onnx_macos/README.md
index 1334c25649..171c76752b 100644
--- a/flutter/sherpa_onnx_macos/README.md
+++ b/flutter/sherpa_onnx_macos/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_macos
 
 This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
 
diff --git a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
index 880e0217ea..ae701baedb 100644
--- a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
+++ b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
@@ -4,7 +4,7 @@
 #
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_macos'
-  s.version          = '1.10.27'
+  s.version          = '1.10.42'
   s.summary          = 'sherpa-onnx Flutter FFI plugin project.'
   s.description      = <<-DESC
 sherpa-onnx Flutter FFI plugin project.
diff --git a/flutter/sherpa_onnx_windows/README.md b/flutter/sherpa_onnx_windows/README.md
index 1334c25649..71c9109827 100644
--- a/flutter/sherpa_onnx_windows/README.md
+++ b/flutter/sherpa_onnx_windows/README.md
@@ -1,4 +1,4 @@
-# sherpa_onnx_linux
+# sherpa_onnx_windows
 
 This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
 
diff --git a/go-api-examples/README.md b/go-api-examples/README.md
index 91f2c76e19..e16dab690e 100644
--- a/go-api-examples/README.md
+++ b/go-api-examples/README.md
@@ -6,28 +6,41 @@ Please refer to the documentation
 https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
 for details.
 
+- [./add-punctuation](./add-punctuation) It shows how to use
+  a punctuation model to add punctuations to text
+
 - [./non-streaming-decode-files](./non-streaming-decode-files) It shows how to use
   a non-streaming ASR model to decode files
 
+- [./non-streaming-speaker-diarization](./non-streaming-speaker-diarization) It shows how to use
+  a speaker segmentation model and a speaker embedding model for speaker diarization.
+
 - [./non-streaming-tts](./non-streaming-tts) It shows how to use a non-streaming TTS
   model to convert text to speech
 
 - [./real-time-speech-recognition-from-microphone](./real-time-speech-recognition-from-microphone)
   It shows how to use a streaming ASR model to recognize speech from a microphone in real-time
 
+- [./speaker-identification](./speaker-identification) It shows how to use a speaker
+  embedding model for speaker identification.
+
+- [./streaming-decode-files](./streaming-decode-files) It shows how to use a streaming
+  model for streaming speech recognition
+
+- [./streaming-hlg-decoding](./streaming-hlg-decoding) It shows how to use a streaming
+  model for streaming speech recognition with HLG decoding
+
 - [./vad](./vad) It shows how to use silero VAD with Golang.
 
-- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
+- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
   for speech recognition.
 
-- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
+- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
+
+- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
   for speech recognition.
 
 - [./vad-spoken-language-identification](./vad-spoken-language-identification) It shows how to use silero VAD + Whisper
   for spoken language identification.
 
-- [./speaker-identification](./speaker-identification) It shows how to use Go API for speaker identification.
-
-- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
-
 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
diff --git a/go-api-examples/add-punctuation/go.mod b/go-api-examples/add-punctuation/go.mod
new file mode 100644
index 0000000000..ec6d75805b
--- /dev/null
+++ b/go-api-examples/add-punctuation/go.mod
@@ -0,0 +1,3 @@
+module add-punctuation
+
+go 1.12
diff --git a/go-api-examples/add-punctuation/main.go b/go-api-examples/add-punctuation/main.go
new file mode 100644
index 0000000000..055748ea81
--- /dev/null
+++ b/go-api-examples/add-punctuation/main.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+	"log"
+)
+
+func main() {
+	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+
+	config := sherpa.OfflinePunctuationConfig{}
+	config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
+	config.Model.NumThreads = 1
+	config.Model.Provider = "cpu"
+
+	punct := sherpa.NewOfflinePunctuation(&config)
+	defer sherpa.DeleteOfflinePunc(punct)
+
+	textArray := []string{
+		"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
+		"我们都是木头人不会说话不会动",
+		"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
+	}
+	log.Println("----------")
+	for _, text := range textArray {
+		newText := punct.AddPunct(text)
+		log.Printf("Input text: %v", text)
+		log.Printf("Output text: %v", newText)
+		log.Println("----------")
+	}
+}
diff --git a/go-api-examples/add-punctuation/run.sh b/go-api-examples/add-punctuation/run.sh
new file mode 100755
index 0000000000..6d43b84f09
--- /dev/null
+++ b/go-api-examples/add-punctuation/run.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./add-punctuation
diff --git a/go-api-examples/keyword-spotting-from-file/go.mod b/go-api-examples/keyword-spotting-from-file/go.mod
new file mode 100644
index 0000000000..dbd349a5ea
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/go.mod
@@ -0,0 +1,4 @@
+module keyword-spotting-from-file
+
+go 1.12
+
diff --git a/go-api-examples/keyword-spotting-from-file/main.go b/go-api-examples/keyword-spotting-from-file/main.go
new file mode 100644
index 0000000000..697f9f4d77
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/main.go
@@ -0,0 +1,81 @@
+package main
+
+import (
+	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+	"log"
+)
+
+func main() {
+	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+
+	config := sherpa.KeywordSpotterConfig{}
+
+	// Please download the models from
+	// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+
+	config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
+	config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
+	config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
+	config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
+	config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
+	config.ModelConfig.NumThreads = 1
+	config.ModelConfig.Debug = 1
+
+	spotter := sherpa.NewKeywordSpotter(&config)
+	defer sherpa.DeleteKeywordSpotter(spotter)
+
+	wave_filename := "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
+
+	wave := sherpa.ReadWave(wave_filename)
+	if wave == nil {
+		log.Printf("Failed to read %v\n", wave_filename)
+		return
+	}
+
+	log.Println("----------Use pre-defined keywords----------")
+
+	stream := sherpa.NewKeywordStream(spotter)
+	defer sherpa.DeleteOnlineStream(stream)
+
+	stream.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+	for spotter.IsReady(stream) {
+		spotter.Decode(stream)
+		result := spotter.GetResult(stream)
+		if result.Keyword != "" {
+			// You have to reset the stream right after detecting a keyword
+			spotter.Reset(stream)
+			log.Printf("Detected %v\n", result.Keyword)
+		}
+	}
+
+	log.Println("----------Use pre-defined keywords + add a new keyword----------")
+
+	stream2 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员")
+	defer sherpa.DeleteOnlineStream(stream2)
+
+	stream2.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+	for spotter.IsReady(stream2) {
+		spotter.Decode(stream2)
+		result := spotter.GetResult(stream2)
+		if result.Keyword != "" {
+			log.Printf("Detected %v\n", result.Keyword)
+		}
+	}
+
+	log.Println("----------Use pre-defined keywords + add 2 new keywords----------")
+
+	stream3 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员/zh ī m íng @知名")
+	defer sherpa.DeleteOnlineStream(stream3)
+
+	stream3.AcceptWaveform(wave.SampleRate, wave.Samples)
+
+	for spotter.IsReady(stream3) {
+		spotter.Decode(stream3)
+		result := spotter.GetResult(stream3)
+		if result.Keyword != "" {
+			log.Printf("Detected %v\n", result.Keyword)
+		}
+	}
+}
diff --git a/go-api-examples/keyword-spotting-from-file/run.sh b/go-api-examples/keyword-spotting-from-file/run.sh
new file mode 100755
index 0000000000..89411f47a4
--- /dev/null
+++ b/go-api-examples/keyword-spotting-from-file/run.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+fi
+
+go mod tidy
+go build
+./keyword-spotting-from-file
diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go
index 5373dcf29d..92b23dc19b 100644
--- a/go-api-examples/non-streaming-decode-files/main.go
+++ b/go-api-examples/non-streaming-decode-files/main.go
@@ -34,6 +34,11 @@ func main() {
 	flag.StringVar(&config.ModelConfig.Whisper.Task, "whisper-task", "transcribe", "transcribe or translate")
 	flag.IntVar(&config.ModelConfig.Whisper.TailPaddings, "whisper-tail-paddings", -1, "tail paddings for whisper")
 
+	flag.StringVar(&config.ModelConfig.Moonshine.Preprocessor, "moonshine-preprocessor", "", "Path to the moonshine preprocessor model")
+	flag.StringVar(&config.ModelConfig.Moonshine.Encoder, "moonshine-encoder", "", "Path to the moonshine encoder model")
+	flag.StringVar(&config.ModelConfig.Moonshine.UncachedDecoder, "moonshine-uncached-decoder", "", "Path to the moonshine uncached decoder model")
+	flag.StringVar(&config.ModelConfig.Moonshine.CachedDecoder, "moonshine-cached-decoder", "", "Path to the moonshine cached decoder model")
+
 	flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
 
 	flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
@@ -85,12 +90,8 @@ func main() {
 	log.Println("Emotion: " + result.Emotion)
 	log.Println("Lang: " + result.Lang)
 	log.Println("Event: " + result.Event)
-	for _, v := range result.Timestamps {
-		log.Printf("Timestamp: %+v\n", v)
-	}
-	for _, v := range result.Tokens {
-		log.Println("Token: " + v)
-	}
+	log.Printf("Timestamp: %v\n", result.Timestamps)
+	log.Printf("Tokens: %v\n", result.Tokens)
 	log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
 }
 
diff --git a/go-api-examples/non-streaming-decode-files/run-moonshine.sh b/go-api-examples/non-streaming-decode-files/run-moonshine.sh
new file mode 100755
index 0000000000..409101e4e3
--- /dev/null
+++ b/go-api-examples/non-streaming-decode-files/run-moonshine.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-decode-files \
+  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
+
diff --git a/go-api-examples/non-streaming-speaker-diarization/go.mod b/go-api-examples/non-streaming-speaker-diarization/go.mod
new file mode 100644
index 0000000000..39edcecf56
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/go.mod
@@ -0,0 +1,3 @@
+module non-streaming-speaker-diarization
+
+go 1.12
diff --git a/go-api-examples/non-streaming-speaker-diarization/main.go b/go-api-examples/non-streaming-speaker-diarization/main.go
new file mode 100644
index 0000000000..7b975bf614
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/main.go
@@ -0,0 +1,87 @@
+package main
+
+import (
+	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+	"log"
+)
+
+/*
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+*/
+
+func initSpeakerDiarization() *sherpa.OfflineSpeakerDiarization {
+	config := sherpa.OfflineSpeakerDiarizationConfig{}
+
+	config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
+	config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
+
+	// The test wave file contains 4 speakers, so we use 4 here
+	config.Clustering.NumClusters = 4
+
+	// if you don't know the actual numbers in the wave file,
+	// then please don't set NumClusters; you need to use
+	//
+	// config.Clustering.Threshold = 0.5
+	//
+
+	// A larger Threshold leads to fewer clusters
+	// A smaller Threshold leads to more clusters
+
+	sd := sherpa.NewOfflineSpeakerDiarization(&config)
+	return sd
+}
+
+func main() {
+	wave_filename := "./0-four-speakers-zh.wav"
+	wave := sherpa.ReadWave(wave_filename)
+	if wave == nil {
+		log.Printf("Failed to read %v", wave_filename)
+		return
+	}
+
+	sd := initSpeakerDiarization()
+	if sd == nil {
+		log.Printf("Please check your config")
+		return
+	}
+
+	defer sherpa.DeleteOfflineSpeakerDiarization(sd)
+
+	if wave.SampleRate != sd.SampleRate() {
+		log.Printf("Expected sample rate: %v, given: %d\n", sd.SampleRate(), wave.SampleRate)
+		return
+	}
+
+	log.Println("Started")
+	segments := sd.Process(wave.Samples)
+	n := len(segments)
+
+	for i := 0; i < n; i++ {
+		log.Printf("%.3f -- %.3f speaker_%02d\n", segments[i].Start, segments[i].End, segments[i].Speaker)
+	}
+}
diff --git a/go-api-examples/non-streaming-speaker-diarization/run.sh b/go-api-examples/non-streaming-speaker-diarization/run.sh
new file mode 100755
index 0000000000..1ebfd4aa1c
--- /dev/null
+++ b/go-api-examples/non-streaming-speaker-diarization/run.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+go mod tidy
+go build
+./non-streaming-speaker-diarization
diff --git a/go-api-examples/non-streaming-tts/main.go b/go-api-examples/non-streaming-tts/main.go
index 0ddeb8fe44..8a5d03a306 100644
--- a/go-api-examples/non-streaming-tts/main.go
+++ b/go-api-examples/non-streaming-tts/main.go
@@ -17,11 +17,30 @@ func main() {
 	flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
 	flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
 	flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
+	flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba")
 
 	flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
 	flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
 	flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
 
+	flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
+	flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
+	flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
+	flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
+	flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
+	flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba")
+
+	flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
+	flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")
+
+	flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model")
+	flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
+	flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
+	flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
+	flag.StringVar(&config.Model.Kokoro.DictDir, "kokoro-dict-dir", "", "Path to dict for Kokoro")
+	flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
+	flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
+
 	flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
 	flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
 	flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
diff --git a/go-api-examples/non-streaming-tts/run-kokoro-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-en.sh
new file mode 100755
index 0000000000..a7d356d1c2
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-kokoro-en.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+  --kokoro-model=./kokoro-en-v0_19/model.onnx \
+  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./test-kokoro-en.wav \
+  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
diff --git a/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
new file mode 100755
index 0000000000..4ed74f90d5
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+  --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+  --debug=1 \
+  --output-filename=./test-kokoro-zh-en.wav \
+  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
diff --git a/go-api-examples/non-streaming-tts/run-matcha-en.sh b/go-api-examples/non-streaming-tts/run-matcha-en.sh
new file mode 100755
index 0000000000..f0932da56a
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-matcha-en.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --debug=1 \
+  --output-filename=./test-matcha-en.wav \
+  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+
diff --git a/go-api-examples/non-streaming-tts/run-matcha-zh.sh b/go-api-examples/non-streaming-tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..ef4165d042
--- /dev/null
+++ b/go-api-examples/non-streaming-tts/run-matcha-zh.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+go mod tidy
+go build
+
+./non-streaming-tts \
+  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+  --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+  --debug=1 \
+  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+  --output-filename=./test-matcha-zh.wav \
+  "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+
diff --git a/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh b/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
index 15e4f1dbd8..6f8c98e80a 100755
--- a/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
+++ b/go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
@@ -4,7 +4,7 @@ set -ex
 
 if [ ! -d vits-piper-en_US-lessac-medium ]; then
   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
-  tar xvf vits-piper-en_US-lessac-medium.tar.bz2
+  tar xf vits-piper-en_US-lessac-medium.tar.bz2
   rm vits-piper-en_US-lessac-medium.tar.bz2
 fi
 
diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/go.mod b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
index 5d6a5b784b..636d6f797d 100644
--- a/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
+++ b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod
@@ -1,3 +1,7 @@
 module real-time-speech-recognition-from-microphone
 
 go 1.12
+
+require (
+	github.com/csukuangfj/portaudio-go v1.0.3
+)
diff --git a/harmony-os/.gitignore b/harmony-os/.gitignore
new file mode 100644
index 0000000000..dd2f4066e6
--- /dev/null
+++ b/harmony-os/.gitignore
@@ -0,0 +1,2 @@
+!build-profile.json5
+*.har
diff --git a/harmony-os/README.md b/harmony-os/README.md
new file mode 100644
index 0000000000..63a530cc0a
--- /dev/null
+++ b/harmony-os/README.md
@@ -0,0 +1,23 @@
+# Introduction
+
+- [./SherpaOnnxHar](./SherpaOnnxHar) It is for building `sherpa_onnx.har`.
+  If you don't need to change the C++ or Typescript code of sherpa-onnx, then
+  you can download pre-built `sherpa_onnx.har` from us. Just run `ohpm install sherpa_onnx`.
+  Please refer to our [doc](https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html)
+  if you want to build `sherpa-onnx` from source.
+
+- [./SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It shows how
+  to run on-device speaker diarization.
+
+- [./SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification) It shows how to use
+  speaker embedding models for on-device speaker identification.
+
+- [./SherpaOnnxStreamingAsr](./SherpaOnnxStreamingAsr) It shows how to use
+  streaming ASR models for real-time on-device speech recognition.
+
+- [./SherpaOnnxTts](./SherpaOnnxTts) It shows how to run on-device text-to-speech.
+  Please see the doc at <https://k2-fsa.github.io/sherpa/onnx/harmony-os/tts.html>
+
+- [./SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It shows how to use
+  VAD + Non-streaming ASR for speech recognition.
+  Please see the doc at <https://k2-fsa.github.io/sherpa/onnx/harmony-os/vad-asr.html>
diff --git a/harmony-os/SherpaOnnxHar/.gitignore b/harmony-os/SherpaOnnxHar/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/AppScope/app.json5 b/harmony-os/SherpaOnnxHar/AppScope/app.json5
new file mode 100644
index 0000000000..8f5c08b90b
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx",
+    "vendor": "example",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..a0fa21ba74
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxHar"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxHar/README.md b/harmony-os/SherpaOnnxHar/README.md
new file mode 100644
index 0000000000..a378f73ccd
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/README.md
@@ -0,0 +1,6 @@
+# Introduction
+
+How to build `sherpa_onnx.har` from the command line
+----------------------------------------------------
+
+Please see https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html
diff --git a/harmony-os/SherpaOnnxHar/build-profile.json5 b/harmony-os/SherpaOnnxHar/build-profile.json5
new file mode 100644
index 0000000000..2b12adad05
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/build-profile.json5
@@ -0,0 +1,44 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    },
+    {
+      "name": "sherpa_onnx",
+      "srcPath": "./sherpa_onnx",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/code-linter.json5 b/harmony-os/SherpaOnnxHar/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/.gitignore b/harmony-os/SherpaOnnxHar/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/build-profile.json5 b/harmony-os/SherpaOnnxHar/entry/build-profile.json5
new file mode 100644
index 0000000000..4d611879c7
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/build-profile.json5
@@ -0,0 +1,28 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts b/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/oh-package.json5 b/harmony-os/SherpaOnnxHar/entry/oh-package.json5
new file mode 100644
index 0000000000..248c3b7541
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/oh-package.json5
@@ -0,0 +1,10 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {}
+}
+
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..423b4276ec
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,17 @@
+@Entry
+@Component
+struct Index {
+  @State message: string = 'Hello World';
+
+  build() {
+    Row() {
+      Column() {
+        Text(this.message)
+          .fontSize(50)
+          .fontWeight(FontWeight.Bold)
+      }
+      .width('100%')
+    }
+    .height('100%')
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/module.json5 b/harmony-os/SherpaOnnxHar/entry/src/main/module.json5
new file mode 100644
index 0000000000..a1cea8b6a4
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/module.json5
@@ -0,0 +1,52 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..f94595515a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "module description"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "description"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "label"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..f94595515a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "module description"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "description"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "label"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..597ecf95e6
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "模块描述"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "description"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "label"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/hvigorfile.ts b/harmony-os/SherpaOnnxHar/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/notes.md b/harmony-os/SherpaOnnxHar/notes.md
new file mode 100644
index 0000000000..6926a7bb6d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/notes.md
@@ -0,0 +1,13 @@
+# Notes
+
+## How to publish a package
+
+Please see
+ - <https://ohpm.openharmony.cn/#/cn/help/publishrequirefile>
+ - <https://ohpm.openharmony.cn/#/cn/help/createandpublish>
+ - <https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/ide-har-publish-V5>
+
+## How to sign the HAP file from commandline
+
+Please see
+<https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/ide-command-line-building-app-V5>
diff --git a/harmony-os/SherpaOnnxHar/oh-package-lock.json5 b/harmony-os/SherpaOnnxHar/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/oh-package.json5 b/harmony-os/SherpaOnnxHar/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/release.sh b/harmony-os/SherpaOnnxHar/release.sh
new file mode 100755
index 0000000000..cc33364fb1
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/release.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -ex
+
+export PATH=/Users/fangjun/software/command-line-tools/bin:$PATH
+
+hvigorw clean --no-daemon
+hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon
+
+ohpm publish ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore b/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
new file mode 100644
index 0000000000..ea97166bc9
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
@@ -0,0 +1,17 @@
+/**
+ * Use these variables when you tailor your ArkTS code. They must be of the const type.
+ */
+export const HAR_VERSION = '1.10.41';
+export const BUILD_MODE_NAME = 'debug';
+export const DEBUG = true;
+export const TARGET_NAME = 'default';
+
+/**
+ * BuildProfile Class is used only for compatibility purposes.
+ */
+export default class BuildProfile { 
+	static readonly HAR_VERSION = HAR_VERSION;
+	static readonly BUILD_MODE_NAME = BUILD_MODE_NAME;
+	static readonly DEBUG = DEBUG;
+	static readonly TARGET_NAME = TARGET_NAME;
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
new file mode 100644
index 0000000000..84286294a6
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
@@ -0,0 +1,60 @@
+export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
+
+export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';
+
+
+export { Samples,
+  OfflineStream,
+  FeatureConfig,
+  OfflineTransducerModelConfig,
+  OfflineParaformerModelConfig,
+  OfflineNemoEncDecCtcModelConfig,
+  OfflineWhisperModelConfig,
+  OfflineTdnnModelConfig,
+  OfflineSenseVoiceModelConfig,
+  OfflineMoonshineModelConfig,
+  OfflineModelConfig,
+  OfflineLMConfig,
+  OfflineRecognizerConfig,
+  OfflineRecognizerResult,
+  OfflineRecognizer,
+} from './src/main/ets/components/NonStreamingAsr';
+
+export { OnlineStream,
+  OnlineTransducerModelConfig,
+  OnlineParaformerModelConfig,
+  OnlineZipformer2CtcModelConfig,
+  OnlineModelConfig,
+  OnlineCtcFstDecoderConfig,
+  OnlineRecognizerConfig,
+  OnlineRecognizerResult,
+  OnlineRecognizer,
+} from './src/main/ets/components/StreamingAsr';
+
+export { OfflineTtsKokoroModelConfig,
+  OfflineTtsMatchaModelConfig,
+  OfflineTtsVitsModelConfig,
+  OfflineTtsModelConfig,
+  OfflineTtsConfig,
+  OfflineTts,
+  TtsOutput,
+  TtsInput,
+} from './src/main/ets/components/NonStreamingTts';
+
+export { SpeakerEmbeddingExtractorConfig,
+  SpeakerEmbeddingExtractor,
+  SpeakerEmbeddingManager,
+} from './src/main/ets/components/SpeakerIdentification';
+
+export { OfflineSpeakerSegmentationPyannoteModelConfig,
+  OfflineSpeakerSegmentationModelConfig,
+  OfflineSpeakerDiarizationConfig,
+  OfflineSpeakerDiarizationSegment,
+  OfflineSpeakerDiarization,
+  FastClusteringConfig,
+} from './src/main/ets/components/NonStreamingSpeakerDiarization';
+
+export { KeywordSpotterConfig,
+  KeywordSpotterResult,
+  KeywordSpotter,
+} from './src/main/ets/components/KeywordSpotting';
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md b/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md
new file mode 100644
index 0000000000..95fc7bdd75
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/README.md
@@ -0,0 +1,46 @@
+# Introduction
+
+[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is one of the deployment
+frameworks of [Next-gen Kaldi](https://github.com/k2-fsa).
+
+It supports speech-to-text, text-to-speech, speaker diarization, and VAD using
+onnxruntime without Internet connection.
+
+It also supports embedded systems, Android, iOS, HarmonyOS,
+Raspberry Pi, RISC-V, x86_64 servers, websocket server/client,
+C/C++, Python, Kotlin, C#, Go, NodeJS, Java, Swift, Dart, JavaScript,
+Flutter, Object Pascal, Lazarus, Rust, etc.
+
+
+# Installation
+
+To use `sherpa-onnx` in your project, please either use
+
+```
+ohpm install sherpa_onnx
+```
+or update your `oh-package.json5` to include the following:
+
+```
+  "dependencies": {
+    "sherpa_onnx": "1.10.42",
+  },
+```
+
+Note that we recommend always using the latest version.
+
+# Examples
+
+| Demo | URL | Description|
+|------|-----|------------|
+|SherpaOnnxStreamingAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxStreamingAsr)|On-device real-time/streaming speech recognition with Next-gen Kaldi|
+|SherpaOnnxVadAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxVadAsr)|It shows how to use VAD with a non-streaming ASR model for on-device speech recognition without accessing the network |
+|SherpaOnnxTts|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxTts)|It shows how to use Next-gen Kaldi for on-device text-to-speech (TTS, i.e., speech synthesis)|
+|SherpaOnnxSpeakerDiarization|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerDiarization)|On-device speaker diarization with Next-gen Kaldi|
+|SherpaOnnxSpeakerIdentification|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerIdentification)|On-device speaker identification with Next-gen Kaldi|
+
+# Documentation
+
+If you have any issues, please either look at our doc at
+<https://k2-fsa.github.io/sherpa/onnx/> or create an issue at
+<https://github.com/k2-fsa/sherpa-onnx/issues>
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5
new file mode 100644
index 0000000000..905c571273
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5
@@ -0,0 +1,46 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "externalNativeOptions": {
+      "path": "./src/main/cpp/CMakeLists.txt",
+      "arguments": "",
+      "cppFlags": "-std=c++17",
+      "abiFilters": [
+        "arm64-v8a",
+        "x86_64",
+      ],
+    },
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          },
+          "consumerFiles": [
+            "./consumer-rules.txt"
+          ]
+        }
+      },
+      "nativeLib": {
+        "debugSymbol": {
+          "strip": true,
+          "exclude": []
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/consumer-rules.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/consumer-rules.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts
new file mode 100644
index 0000000000..4218707148
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { harTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: harTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5
new file mode 100644
index 0000000000..2585b2e83d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5
@@ -0,0 +1,18 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx"
+  },
+  "packages": {
+    "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5
new file mode 100644
index 0000000000..52b9733345
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5
@@ -0,0 +1,28 @@
+{
+  "name": "sherpa_onnx",
+  "version": "1.10.42",
+  "description": "On-device speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without Internet connection",
+  "main": "Index.ets",
+  "author": "The next-gen Kaldi team",
+  "license": "Apache-2.0",
+  "homepage": "https://github.com/k2-fsa/sherpa-onnx",
+  "repository": "https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxHar",
+  "dependencies": {
+    "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+  },
+  "keywords": [
+    "语音识别",
+    "语音合成",
+    "说话人日志",
+    "新一代Kaldi",
+    "不联网",
+    "本地",
+    "tts",
+    "asr",
+    "privacy",
+    "open-source",
+  ],
+  "bugs": {
+    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
+  },
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt
new file mode 100644
index 0000000000..26dda1789e
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt
@@ -0,0 +1,74 @@
+# the minimum version of CMake.
+cmake_minimum_required(VERSION 3.13.0)
+project(myNpmLib)
+
+if (NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
+endif()
+
+# Disable warning about
+#
+# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
+#  not set.
+if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
+  cmake_policy(SET CMP0135 NEW)
+endif()
+
+set(NATIVERENDER_ROOT_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+
+if(DEFINED PACKAGE_FIND_FILE)
+    include(${PACKAGE_FIND_FILE})
+endif()
+
+include_directories(${NATIVERENDER_ROOT_PATH}
+                    ${NATIVERENDER_ROOT_PATH}/include)
+
+include(FetchContent)
+FetchContent_Declare(node_addon_api
+    GIT_REPOSITORY "https://github.com/nodejs/node-addon-api.git"
+    GIT_TAG c679f6f4c9dc6bf9fc0d99cbe5982bd24a5e2c7b
+    PATCH_COMMAND git checkout . && git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/my-patch.diff"
+)
+FetchContent_MakeAvailable(node_addon_api)
+FetchContent_GetProperties(node_addon_api)
+if(NOT node_addon_api_POPULATED)
+    message(STATUS "Downloading node-addon-api from")
+    FetchContent_Populate(node_addon_api)
+endif()
+
+message(STATUS "node-addon-api is downloaded to ${node_addon_api_SOURCE_DIR}")
+include_directories(${node_addon_api_SOURCE_DIR})
+
+add_library(sherpa_onnx SHARED
+  audio-tagging.cc
+  keyword-spotting.cc
+  non-streaming-asr.cc
+  non-streaming-speaker-diarization.cc
+  non-streaming-tts.cc
+  punctuation.cc
+  sherpa-onnx-node-addon-api.cc
+  speaker-identification.cc
+  spoken-language-identification.cc
+  streaming-asr.cc
+  utils.cc
+  vad.cc
+  wave-reader.cc
+  wave-writer.cc
+)
+
+add_library(sherpa_onnx_c_api SHARED IMPORTED)
+set_target_properties(sherpa_onnx_c_api
+    PROPERTIES
+    IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libsherpa-onnx-c-api.so)
+
+add_library(onnxruntime SHARED IMPORTED)
+set_target_properties(onnxruntime
+    PROPERTIES
+    IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libonnxruntime.so)
+
+
+target_link_libraries(sherpa_onnx PUBLIC libace_napi.z.so
+ libhilog_ndk.z.so # for hilog
+ librawfile.z.so
+ sherpa_onnx_c_api onnxruntime
+)
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
new file mode 100644
index 0000000000..f4d6ac5391
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
@@ -0,0 +1,216 @@
+// scripts/node-addon-api/src/audio-tagging.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxOfflineZipformerAudioTaggingModelConfig
+GetAudioTaggingZipformerModelConfig(Napi::Object obj) {
+  SherpaOnnxOfflineZipformerAudioTaggingModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("zipformer").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxAudioTaggingModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("model").As<Napi::Object>();
+  c.zipformer = GetAudioTaggingZipformerModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxAudioTagging> CreateAudioTaggingWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "You should pass an object as the only argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxAudioTaggingConfig c;
+  memset(&c, 0, sizeof(c));
+  c.model = GetAudioTaggingModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK);
+
+  const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model.zipformer.model);
+  SHERPA_ONNX_DELETE_C_STR(c.model.ced);
+  SHERPA_ONNX_DELETE_C_STR(c.model.provider);
+  SHERPA_ONNX_DELETE_C_STR(c.labels);
+
+  if (!at) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxAudioTagging>::New(
+      env, const_cast<SherpaOnnxAudioTagging *>(at),
+      [](Napi::Env env, SherpaOnnxAudioTagging *at) {
+        SherpaOnnxDestroyAudioTagging(at);
+      });
+}
+
+static Napi::External<SherpaOnnxOfflineStream>
+AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "You should pass an audio tagging pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxAudioTagging *at =
+      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
+
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxAudioTaggingCreateOfflineStream(at);
+
+  return Napi::External<SherpaOnnxOfflineStream>::New(
+      env, const_cast<SherpaOnnxOfflineStream *>(stream),
+      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
+        SherpaOnnxDestroyOfflineStream(stream);
+      });
+}
+
+static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 3) {
+    std::ostringstream os;
+    os << "Expect only 3 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "You should pass an audio tagging pointer as the first argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(
+        env, "You should pass an offline stream pointer as the second argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[2].IsNumber()) {
+    Napi::TypeError::New(env,
+                         "You should pass an integer as the third argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxAudioTagging *at =
+      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
+
+  SherpaOnnxOfflineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
+
+  int32_t top_k = info[2].As<Napi::Number>().Int32Value();
+
+  const SherpaOnnxAudioEvent *const *events =
+      SherpaOnnxAudioTaggingCompute(at, stream, top_k);
+
+  auto p = events;
+  int32_t k = 0;
+  while (p && *p) {
+    ++k;
+    ++p;
+  }
+
+  Napi::Array ans = Napi::Array::New(env, k);
+  for (uint32_t i = 0; i != k; ++i) {
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "name"),
+            Napi::String::New(env, events[i]->name));
+    obj.Set(Napi::String::New(env, "index"),
+            Napi::Number::New(env, events[i]->index));
+    obj.Set(Napi::String::New(env, "prob"),
+            Napi::Number::New(env, events[i]->prob));
+    ans[i] = obj;
+  }
+
+  SherpaOnnxAudioTaggingFreeResults(events);
+
+  return ans;
+}
+
+void InitAudioTagging(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createAudioTagging"),
+              Napi::Function::New(env, CreateAudioTaggingWrapper));
+
+  exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"),
+              Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "audioTaggingCompute"),
+              Napi::Function::New(env, AudioTaggingComputeWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md
new file mode 100644
index 0000000000..95744c221f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md
@@ -0,0 +1,8 @@
+# Node
+
+[./c-api.h](./c-api.h) is a symbolic link to
+https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h
+
+If you are using Windows, then you need to manually replace this file with
+https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h
+since Windows does not support symbolic links.
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h
new file mode 120000
index 0000000000..d9c1b82e10
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/c-api.h
@@ -0,0 +1 @@
+../../../../../../../../../sherpa-onnx/c-api/c-api.h
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
new file mode 100644
index 0000000000..6562ef5a1a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
@@ -0,0 +1,310 @@
+// scripts/node-addon-api/src/keyword-spotting.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+// defined ./streaming-asr.cc
+SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
+
+// defined ./streaming-asr.cc
+SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj);
+
+static Napi::External<SherpaOnnxKeywordSpotter> CreateKeywordSpotterWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+#if __OHOS__
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+  SherpaOnnxKeywordSpotterConfig c;
+  memset(&c, 0, sizeof(c));
+  c.feat_config = GetFeatureConfig(o);
+  c.model_config = GetOnlineModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_trailing_blanks, numTrailingBlanks);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_score, keywordsScore);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_threshold, keywordsThreshold);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_file, keywordsFile);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_buf, keywordsBuf);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(keywords_buf_size, keywordsBufSize);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  const SherpaOnnxKeywordSpotter *kws =
+      SherpaOnnxCreateKeywordSpotterOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&c);
+#endif
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
+  SHERPA_ONNX_DELETE_C_STR(c.keywords_file);
+  SHERPA_ONNX_DELETE_C_STR(c.keywords_buf);
+
+  if (!kws) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxKeywordSpotter>::New(
+      env, const_cast<SherpaOnnxKeywordSpotter *>(kws),
+      [](Napi::Env env, SherpaOnnxKeywordSpotter *kws) {
+        SherpaOnnxDestroyKeywordSpotter(kws);
+      });
+}
+
+static Napi::External<SherpaOnnxOnlineStream> CreateKeywordStreamWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1 && info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 1 or 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "You should pass a keyword spotter pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (info.Length() == 2 && !info[1].IsString()) {
+    std::ostringstream os;
+    os << "Argument 2 should be a string.";
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+    return {};
+  }
+
+  const SherpaOnnxKeywordSpotter *kws =
+      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
+
+  const SherpaOnnxOnlineStream *stream;
+  if (info.Length() == 1) {
+    stream = SherpaOnnxCreateKeywordStream(kws);
+  } else {
+    Napi::String js_keywords = info[1].As<Napi::String>();
+    std::string keywords = js_keywords.Utf8Value();
+    stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, keywords.c_str());
+  }
+
+  return Napi::External<SherpaOnnxOnlineStream>::New(
+      env, const_cast<SherpaOnnxOnlineStream *>(stream),
+      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
+        SherpaOnnxDestroyOnlineStream(stream);
+      });
+}
+
+static Napi::Boolean IsKeywordStreamReadyWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  const SherpaOnnxKeywordSpotter *kws =
+      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  int32_t is_ready = SherpaOnnxIsKeywordStreamReady(kws, stream);
+
+  return Napi::Boolean::New(env, is_ready);
+}
+
+static void DecodeKeywordStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  const SherpaOnnxKeywordSpotter *kws =
+      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  SherpaOnnxDecodeKeywordStream(kws, stream);
+}
+
+static void ResetKeywordStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  const SherpaOnnxKeywordSpotter *kws =
+      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  SherpaOnnxResetKeywordStream(kws, stream);
+}
+
+static Napi::String GetKeywordResultAsJsonWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  const SherpaOnnxKeywordSpotter *kws =
+      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  const char *json = SherpaOnnxGetKeywordResultAsJson(kws, stream);
+
+  Napi::String s = Napi::String::New(env, json);
+
+  SherpaOnnxFreeKeywordResultJson(json);
+
+  return s;
+}
+
+void InitKeywordSpotting(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createKeywordSpotter"),
+              Napi::Function::New(env, CreateKeywordSpotterWrapper));
+
+  exports.Set(Napi::String::New(env, "createKeywordStream"),
+              Napi::Function::New(env, CreateKeywordStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "isKeywordStreamReady"),
+              Napi::Function::New(env, IsKeywordStreamReadyWrapper));
+
+  exports.Set(Napi::String::New(env, "decodeKeywordStream"),
+              Napi::Function::New(env, DecodeKeywordStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "resetKeywordStream"),
+              Napi::Function::New(env, ResetKeywordStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "getKeywordResultAsJson"),
+              Napi::Function::New(env, GetKeywordResultAsJsonWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/.gitignore b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/.gitignore
new file mode 100644
index 0000000000..140f8cf80f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/.gitignore
@@ -0,0 +1 @@
+*.so
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/README.md b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/README.md
new file mode 100644
index 0000000000..0094a8def8
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/README.md
@@ -0,0 +1,17 @@
+# Introduction
+
+You need to get the following four `.so` files using
+
+  - [build-ohos-arm64-v8a.sh](https://github.com/k2-fsa/sherpa-onnx/blob/master/build-ohos-arm64-v8a.sh)
+  - [build-ohos-x86-64.sh](https://github.com/k2-fsa/sherpa-onnx/blob/master/build-ohos-x86-64.sh)
+
+```
+.
+├── README.md
+├── arm64-v8a
+│   ├── libonnxruntime.so
+│   └── libsherpa-onnx-c-api.so
+└── x86_64
+    ├── libonnxruntime.so
+    └── libsherpa-onnx-c-api.so
+```
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/arm64-v8a/.gitkeep b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/arm64-v8a/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/x86_64/.gitkeep b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/x86_64/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h
new file mode 100644
index 0000000000..fc70abb011
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h
@@ -0,0 +1,63 @@
+// scripts/node-addon-api/src/macros.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
+#define SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
+
+#include <algorithm>
+#include <string>
+
+#if __OHOS__
+#include "hilog/log.h"
+#include "rawfile/raw_file_manager.h"
+
+#undef LOG_DOMAIN
+#undef LOG_TAG
+
+// https://gitee.com/openharmony/docs/blob/145a084f0b742e4325915e32f8184817927d1251/en/contribute/OpenHarmony-Log-guide.md#hilog-api-usage-specifications
+#define LOG_DOMAIN 0x6666
+#define LOG_TAG "sherpa_onnx"
+#endif
+
+#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name)                       \
+  do {                                                                     \
+    if (o.Has(#js_name) && o.Get(#js_name).IsString()) {                   \
+      Napi::String _str = o.Get(#js_name).As<Napi::String>();              \
+      std::string s = _str.Utf8Value();                                    \
+      char *p = new char[s.size() + 1];                                    \
+      std::copy(s.begin(), s.end(), p);                                    \
+      p[s.size()] = 0;                                                     \
+                                                                           \
+      c.c_name = p;                                                        \
+    } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) {        \
+      Napi::Uint8Array _array = o.Get(#js_name).As<Napi::Uint8Array>();    \
+      char *p = new char[_array.ElementLength() + 1];                      \
+      std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \
+      p[_array.ElementLength()] = '\0';                                    \
+                                                                           \
+      c.c_name = p;                                                        \
+    }                                                                      \
+  } while (0)
+
+#define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name)            \
+  do {                                                            \
+    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
+      c.c_name = o.Get(#js_name).As<Napi::Number>().Int32Value(); \
+    }                                                             \
+  } while (0)
+
+#define SHERPA_ONNX_ASSIGN_ATTR_FLOAT(c_name, js_name)            \
+  do {                                                            \
+    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
+      c.c_name = o.Get(#js_name).As<Napi::Number>().FloatValue(); \
+    }                                                             \
+  } while (0)
+
+#define SHERPA_ONNX_DELETE_C_STR(p) \
+  do {                              \
+    if (p) {                        \
+      delete[] p;                   \
+    }                               \
+  } while (0)
+
+#endif  // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/my-patch.diff b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/my-patch.diff
new file mode 100644
index 0000000000..535dc38d16
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/my-patch.diff
@@ -0,0 +1,14 @@
+diff --git a/napi-inl.h b/napi-inl.h
+index e7141c0..0fd90d8 100644
+--- a/napi-inl.h
++++ b/napi-inl.h
+@@ -2156,7 +2156,8 @@ inline ArrayBuffer::ArrayBuffer(napi_env env, napi_value value)
+ 
+ inline void* ArrayBuffer::Data() {
+   void* data;
+-  napi_status status = napi_get_arraybuffer_info(_env, _value, &data, nullptr);
++  size_t byte_length;
++  napi_status status = napi_get_arraybuffer_info(_env, _value, &data, &byte_length);
+   NAPI_THROW_IF_FAILED(_env, status, nullptr);
+   return data;
+ }
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
new file mode 100644
index 0000000000..a34139aa0f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
@@ -0,0 +1,490 @@
+// scripts/node-addon-api/src/non-streaming-asr.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+// defined in ./streaming-asr.cc
+SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
+
+static SherpaOnnxOfflineTransducerModelConfig GetOfflineTransducerModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTransducerModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("transducer").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);
+
+  return c;
+}
+
+static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineParaformerModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineNemoEncDecCtcModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineWhisperModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("whisper").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(task, task);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);
+
+  return c;
+}
+
+static SherpaOnnxOfflineMoonshineModelConfig GetOfflineMoonshineModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineMoonshineModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("moonshine") || !obj.Get("moonshine").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("moonshine").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(preprocessor, preprocessor);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(uncached_decoder, uncachedDecoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(cached_decoder, cachedDecoder);
+
+  return c;
+}
+
+static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTdnnModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("tdnn") || !obj.Get("tdnn").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("tdnn").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxOfflineSenseVoiceModelConfig GetOfflineSenseVoiceModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineSenseVoiceModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("senseVoice") || !obj.Get("senseVoice").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("senseVoice").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(use_itn, useInverseTextNormalization);
+
+  return c;
+}
+
+static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
+  SherpaOnnxOfflineModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
+
+  c.transducer = GetOfflineTransducerModelConfig(o);
+  c.paraformer = GetOfflineParaformerModelConfig(o);
+  c.nemo_ctc = GetOfflineNeMoCtcModelConfig(o);
+  c.whisper = GetOfflineWhisperModelConfig(o);
+  c.tdnn = GetOfflineTdnnModelConfig(o);
+  c.sense_voice = GetOfflineSenseVoiceModelConfig(o);
+  c.moonshine = GetOfflineMoonshineModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(telespeech_ctc, teleSpeechCtc);
+
+  return c;
+}
+
+static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
+  SherpaOnnxOfflineLMConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("lmConfig") || !obj.Get("lmConfig").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("lmConfig").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(scale, scale);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxOfflineRecognizer>
+CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+#if __OHOS__
+  // the last argument is the NativeResourceManager
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxOfflineRecognizerConfig c;
+  memset(&c, 0, sizeof(c));
+  c.feat_config = GetFeatureConfig(o);
+  c.model_config = GetOfflineModelConfig(o);
+  c.lm_config = GetOfflineLMConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  const SherpaOnnxOfflineRecognizer *recognizer =
+      SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxOfflineRecognizer *recognizer =
+      SherpaOnnxCreateOfflineRecognizer(&c);
+#endif
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);
+
+  SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);
+
+  SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
+  SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
+
+  if (!recognizer) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxOfflineRecognizer>::New(
+      env, const_cast<SherpaOnnxOfflineRecognizer *>(recognizer),
+      [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) {
+        SherpaOnnxDestroyOfflineRecognizer(recognizer);
+      });
+}
+
+static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env,
+        "You should pass an offline recognizer pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
+
+  const SherpaOnnxOfflineStream *stream =
+      SherpaOnnxCreateOfflineStream(recognizer);
+
+  return Napi::External<SherpaOnnxOfflineStream>::New(
+      env, const_cast<SherpaOnnxOfflineStream *>(stream),
+      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
+        SherpaOnnxDestroyOfflineStream(stream);
+      });
+}
+
+static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOfflineStream *stream =
+      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("samples")) {
+    Napi::TypeError::New(env, "The argument object should have a field samples")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Get("samples").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Has("sampleRate")) {
+    Napi::TypeError::New(env,
+                         "The argument object should have a field sampleRate")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Get("sampleRate").IsNumber()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
+  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
+
+#if __OHOS__
+  // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
+  // sizeof(float) here
+  SherpaOnnxAcceptWaveformOffline(stream, sample_rate, samples.Data(),
+                                  samples.ElementLength() / sizeof(float));
+#else
+  SherpaOnnxAcceptWaveformOffline(stream, sample_rate, samples.Data(),
+                                  samples.ElementLength());
+#endif
+}
+
+static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an offline recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOfflineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
+
+  SherpaOnnxOfflineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
+
+  SherpaOnnxDecodeOfflineStream(recognizer, stream);
+}
+
+static Napi::String GetOfflineStreamResultAsJsonWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineStream *stream =
+      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
+
+  const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
+  Napi::String s = Napi::String::New(env, json);
+
+  SherpaOnnxDestroyOfflineStreamResultJson(json);
+
+  return s;
+}
+
+void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createOfflineRecognizer"),
+              Napi::Function::New(env, CreateOfflineRecognizerWrapper));
+
+  exports.Set(Napi::String::New(env, "createOfflineStream"),
+              Napi::Function::New(env, CreateOfflineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "acceptWaveformOffline"),
+              Napi::Function::New(env, AcceptWaveformOfflineWrapper));
+
+  exports.Set(Napi::String::New(env, "decodeOfflineStream"),
+              Napi::Function::New(env, DecodeOfflineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
+              Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc
new file mode 100644
index 0000000000..cf23fa75b1
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc
@@ -0,0 +1,497 @@
+// scripts/node-addon-api/src/non-streaming-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <algorithm>
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
+GetOfflineSpeakerSegmentationPyannoteModelConfig(Napi::Object obj) {
+  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("pyannote") || !obj.Get("pyannote").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("pyannote").As<Napi::Object>();
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxOfflineSpeakerSegmentationModelConfig
+GetOfflineSpeakerSegmentationModelConfig(Napi::Object obj) {
+  SherpaOnnxOfflineSpeakerSegmentationModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("segmentation") || !obj.Get("segmentation").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("segmentation").As<Napi::Object>();
+
+  c.pyannote = GetOfflineSpeakerSegmentationPyannoteModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  return c;
+}
+
+static SherpaOnnxSpeakerEmbeddingExtractorConfig
+GetSpeakerEmbeddingExtractorConfig(Napi::Object obj) {
+  SherpaOnnxSpeakerEmbeddingExtractorConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("embedding") || !obj.Get("embedding").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("embedding").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  return c;
+}
+
+static SherpaOnnxFastClusteringConfig GetFastClusteringConfig(
+    Napi::Object obj) {
+  SherpaOnnxFastClusteringConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("clustering") || !obj.Get("clustering").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("clustering").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_clusters, numClusters);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxOfflineSpeakerDiarization>
+CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+#if __OHOS__
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxOfflineSpeakerDiarizationConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.segmentation = GetOfflineSpeakerSegmentationModelConfig(o);
+  c.embedding = GetSpeakerEmbeddingExtractorConfig(o);
+  c.clustering = GetFastClusteringConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_on, minDurationOn);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_off, minDurationOff);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      SherpaOnnxCreateOfflineSpeakerDiarization(&c);
+#endif
+
+  SHERPA_ONNX_DELETE_C_STR(c.segmentation.pyannote.model);
+  SHERPA_ONNX_DELETE_C_STR(c.segmentation.provider);
+  SHERPA_ONNX_DELETE_C_STR(c.embedding.model);
+  SHERPA_ONNX_DELETE_C_STR(c.embedding.provider);
+
+  if (!sd) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxOfflineSpeakerDiarization>::New(
+      env, const_cast<SherpaOnnxOfflineSpeakerDiarization *>(sd),
+      [](Napi::Env env, SherpaOnnxOfflineSpeakerDiarization *sd) {
+        SherpaOnnxDestroyOfflineSpeakerDiarization(sd);
+      });
+}
+
+static Napi::Number OfflineSpeakerDiarizationGetSampleRateWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be an offline speaker diarization pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();
+
+  int32_t sample_rate = SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd);
+
+  return Napi::Number::New(env, sample_rate);
+}
+
+static Napi::Array OfflineSpeakerDiarizationProcessWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be an offline speaker diarization pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();
+
+  if (!info[1].IsTypedArray()) {
+    Napi::TypeError::New(env, "Argument 1 should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
+
+#if __OHOS__
+  // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
+  // sizeof(float) here
+  const SherpaOnnxOfflineSpeakerDiarizationResult *r =
+      SherpaOnnxOfflineSpeakerDiarizationProcess(
+          sd, samples.Data(), samples.ElementLength() / sizeof(float));
+#else
+  const SherpaOnnxOfflineSpeakerDiarizationResult *r =
+      SherpaOnnxOfflineSpeakerDiarizationProcess(sd, samples.Data(),
+                                                 samples.ElementLength());
+#endif
+
+  int32_t num_segments =
+      SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);
+
+  const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
+      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r);
+
+  Napi::Array ans = Napi::Array::New(env, num_segments);
+
+  for (int32_t i = 0; i != num_segments; ++i) {
+    Napi::Object obj = Napi::Object::New(env);
+
+    obj.Set(Napi::String::New(env, "start"), segments[i].start);
+    obj.Set(Napi::String::New(env, "end"), segments[i].end);
+    obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker);
+
+    ans.Set(i, obj);
+  }
+
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r);
+
+  return ans;
+}
+
+struct SpeakerDiarizationCallbackData {
+  int32_t num_processed_chunks;
+  int32_t num_total_chunks;
+};
+
+// see
+// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
+static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
+                             Napi::Reference<Napi::Value> *context,
+                             SpeakerDiarizationCallbackData *data) {
+  if (env != nullptr) {
+    if (callback != nullptr) {
+      Napi::Number num_processed_chunks =
+          Napi::Number::New(env, data->num_processed_chunks);
+      Napi::Number num_total_chunks =
+          Napi::Number::New(env, data->num_total_chunks);
+
+      callback.Call(context->Value(), {num_processed_chunks, num_total_chunks});
+    }
+  }
+  delete data;
+}
+
+using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
+                                           SpeakerDiarizationCallbackData,
+                                           InvokeJsCallback>;
+
+class SpeakerDiarizationProcessWorker : public Napi::AsyncWorker {
+ public:
+  SpeakerDiarizationProcessWorker(const Napi::Env &env, TSFN tsfn,
+                                  const SherpaOnnxOfflineSpeakerDiarization *sd,
+                                  std::vector<float> samples)
+      : tsfn_(tsfn),
+        Napi::AsyncWorker{env, "SpeakerDiarizationProcessAsyncWorker"},
+        deferred_(env),
+        sd_(sd),
+        samples_(std::move(samples)) {}
+
+  Napi::Promise Promise() { return deferred_.Promise(); }
+
+ protected:
+  void Execute() override {
+    auto callback = [](int32_t num_processed_chunks, int32_t num_total_chunks,
+                       void *arg) -> int32_t {
+      auto _this = reinterpret_cast<SpeakerDiarizationProcessWorker *>(arg);
+
+      auto data = new SpeakerDiarizationCallbackData;
+      data->num_processed_chunks = num_processed_chunks;
+      data->num_total_chunks = num_total_chunks;
+
+      _this->tsfn_.NonBlockingCall(data);
+
+      return 0;
+    };
+
+    r_ = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+        sd_, samples_.data(), samples_.size(), callback, this);
+
+    tsfn_.Release();
+  }
+
+  void OnOK() override {
+    Napi::Env env = deferred_.Env();
+
+    int32_t num_segments =
+        SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r_);
+
+    const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
+        SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r_);
+
+    Napi::Array ans = Napi::Array::New(env, num_segments);
+
+    for (int32_t i = 0; i != num_segments; ++i) {
+      Napi::Object obj = Napi::Object::New(env);
+
+      obj.Set(Napi::String::New(env, "start"), segments[i].start);
+      obj.Set(Napi::String::New(env, "end"), segments[i].end);
+      obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker);
+
+      ans.Set(i, obj);
+    }
+
+    SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
+    SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r_);
+
+    deferred_.Resolve(ans);
+  }
+
+ private:
+  TSFN tsfn_;
+  Napi::Promise::Deferred deferred_;
+  const SherpaOnnxOfflineSpeakerDiarization *sd_;
+  std::vector<float> samples_;
+  const SherpaOnnxOfflineSpeakerDiarizationResult *r_;
+};
+
+static Napi::Object OfflineSpeakerDiarizationProcessAsyncWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 3) {
+    std::ostringstream os;
+    os << "Expect only 3 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be an offline speaker diarization pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();
+
+  if (!info[1].IsTypedArray()) {
+    Napi::TypeError::New(env, "Argument 1 should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[2].IsFunction()) {
+    Napi::TypeError::New(env, "Argument 2 should be a function")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Function cb = info[2].As<Napi::Function>();
+
+  auto context =
+      new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
+
+  TSFN tsfn = TSFN::New(
+      env,
+      cb,  // JavaScript function called asynchronously
+      "SpeakerDiarizationProcessAsyncFunc",  // Name
+      0,                                     // Unlimited queue
+      1,  // Only one thread will use this initially
+      context,
+      [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
+
+  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
+
+#if __OHOS__
+  int32_t num_samples = samples.ElementLength() / sizeof(float);
+#else
+  int32_t num_samples = samples.ElementLength();
+#endif
+  std::vector<float> v(num_samples);
+  std::copy(samples.Data(), samples.Data() + num_samples, v.begin());
+
+  SpeakerDiarizationProcessWorker *worker =
+      new SpeakerDiarizationProcessWorker(env, tsfn, sd, v);
+  worker->Queue();
+  return worker->Promise();
+}
+
+static void OfflineSpeakerDiarizationSetConfigWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be an offline speaker diarization pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  const SherpaOnnxOfflineSpeakerDiarization *sd =
+      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Object o = info[1].As<Napi::Object>();
+
+  SherpaOnnxOfflineSpeakerDiarizationConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.clustering = GetFastClusteringConfig(o);
+  SherpaOnnxOfflineSpeakerDiarizationSetConfig(sd, &c);
+}
+
+void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createOfflineSpeakerDiarization"),
+              Napi::Function::New(env, CreateOfflineSpeakerDiarizationWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "getOfflineSpeakerDiarizationSampleRate"),
+      Napi::Function::New(env, OfflineSpeakerDiarizationGetSampleRateWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "offlineSpeakerDiarizationProcess"),
+      Napi::Function::New(env, OfflineSpeakerDiarizationProcessWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "offlineSpeakerDiarizationProcessAsync"),
+      Napi::Function::New(env, OfflineSpeakerDiarizationProcessAsyncWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "offlineSpeakerDiarizationSetConfig"),
+      Napi::Function::New(env, OfflineSpeakerDiarizationSetConfigWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
new file mode 100644
index 0000000000..b4095526ca
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
@@ -0,0 +1,643 @@
+// scripts/node-addon-api/src/non-streaming-tts.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <algorithm>
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxOfflineTtsVitsModelConfig GetOfflineTtsVitsModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTtsVitsModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("vits") || !obj.Get("vits").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("vits").As<Napi::Object>();
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale_w, noiseScaleW);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir);
+
+  return c;
+}
+
+static SherpaOnnxOfflineTtsMatchaModelConfig GetOfflineTtsMatchaModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTtsMatchaModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("matcha") || !obj.Get("matcha").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("matcha").As<Napi::Object>();
+  SHERPA_ONNX_ASSIGN_ATTR_STR(acoustic_model, acousticModel);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(vocoder, vocoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir);
+
+  return c;
+}
+
+static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTtsKokoroModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("kokoro") || !obj.Get("kokoro").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("kokoro").As<Napi::Object>();
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(voices, voices);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
+
+  return c;
+}
+
+static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflineTtsModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("model").As<Napi::Object>();
+
+  c.vits = GetOfflineTtsVitsModelConfig(o);
+  c.matcha = GetOfflineTtsMatchaModelConfig(o);
+  c.kokoro = GetOfflineTtsKokoroModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+#if __OHOS__
+  // the last argument is the NativeResourceManager
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxOfflineTtsConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.model = GetOfflineTtsModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+  const SherpaOnnxOfflineTts *tts =
+      SherpaOnnxCreateOfflineTtsOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c);
+#endif
+  SHERPA_ONNX_DELETE_C_STR(c.model.vits.model);
+  SHERPA_ONNX_DELETE_C_STR(c.model.vits.lexicon);
+  SHERPA_ONNX_DELETE_C_STR(c.model.vits.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model.vits.data_dir);
+  SHERPA_ONNX_DELETE_C_STR(c.model.vits.dict_dir);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.acoustic_model);
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.vocoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.lexicon);
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.data_dir);
+  SHERPA_ONNX_DELETE_C_STR(c.model.matcha.dict_dir);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.model);
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.voices);
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.data_dir);
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.dict_dir);
+  SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.lexicon);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model.provider);
+
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
+
+  if (!tts) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxOfflineTts>::New(
+      env, const_cast<SherpaOnnxOfflineTts *>(tts),
+      [](Napi::Env env, SherpaOnnxOfflineTts *tts) {
+        SherpaOnnxDestroyOfflineTts(tts);
+      });
+}
+
+static Napi::Number OfflineTtsSampleRateWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineTts *tts =
+      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
+
+  int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts);
+
+  return Napi::Number::New(env, sample_rate);
+}
+
+static Napi::Number OfflineTtsNumSpeakersWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineTts *tts =
+      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
+
+  int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts);
+
+  return Napi::Number::New(env, num_speakers);
+}
+
+// synchronous version
+static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineTts *tts =
+      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("text")) {
+    Napi::TypeError::New(env, "The argument object should have a field text")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("text").IsString()) {
+    Napi::TypeError::New(env, "The object['text'] should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("sid")) {
+    Napi::TypeError::New(env, "The argument object should have a field sid")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("sid").IsNumber()) {
+    Napi::TypeError::New(env, "The object['sid'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("speed")) {
+    Napi::TypeError::New(env, "The argument object should have a field speed")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("speed").IsNumber()) {
+    Napi::TypeError::New(env, "The object['speed'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (obj.Has("enableExternalBuffer") &&
+      obj.Get("enableExternalBuffer").IsBoolean()) {
+    enable_external_buffer =
+        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
+  }
+
+  Napi::String _text = obj.Get("text").As<Napi::String>();
+  std::string text = _text.Utf8Value();
+  int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
+  float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
+
+  const SherpaOnnxGeneratedAudio *audio;
+  audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
+        [](Napi::Env /*env*/, void * /*data*/,
+           const SherpaOnnxGeneratedAudio *hint) {
+          SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
+        },
+        audio);
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
+
+    Napi::Object ans = Napi::Object::New(env);
+    ans.Set(Napi::String::New(env, "samples"), float32Array);
+    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
+    return ans;
+  } else {
+    // don't use external buffer
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
+
+    std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
+
+    Napi::Object ans = Napi::Object::New(env);
+    ans.Set(Napi::String::New(env, "samples"), float32Array);
+    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
+    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+    return ans;
+  }
+}
+
+struct TtsCallbackData {
+  std::vector<float> samples;
+  float progress;
+  bool processed = false;
+  bool cancelled = false;
+};
+
+// see
+// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
+static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
+                             Napi::Reference<Napi::Value> *context,
+                             TtsCallbackData *data) {
+  if (env != nullptr) {
+    if (callback != nullptr) {
+      Napi::ArrayBuffer arrayBuffer =
+          Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size());
+
+      Napi::Float32Array float32Array =
+          Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0);
+
+      std::copy(data->samples.begin(), data->samples.end(),
+                float32Array.Data());
+
+      Napi::Object arg = Napi::Object::New(env);
+      arg.Set(Napi::String::New(env, "samples"), float32Array);
+      arg.Set(Napi::String::New(env, "progress"), data->progress);
+
+      auto v = callback.Call(context->Value(), {arg});
+      data->processed = true;
+      if (v.IsNumber() && v.As<Napi::Number>().Int32Value()) {
+        data->cancelled = false;
+      } else {
+        data->cancelled = true;
+      }
+    }
+  }
+}
+
+using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
+                                           TtsCallbackData, InvokeJsCallback>;
+
+class TtsGenerateWorker : public Napi::AsyncWorker {
+ public:
+  TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts,
+                    const std::string &text, float speed, int32_t sid,
+                    bool use_external_buffer)
+      : tsfn_(tsfn),
+        Napi::AsyncWorker{env, "TtsGenerateWorker"},
+        deferred_(env),
+        tts_(tts),
+        text_(text),
+        speed_(speed),
+        sid_(sid),
+        use_external_buffer_(use_external_buffer) {}
+
+  Napi::Promise Promise() { return deferred_.Promise(); }
+
+  ~TtsGenerateWorker() {
+    for (auto d : data_list_) {
+      delete d;
+    }
+  }
+
+ protected:
+  void Execute() override {
+    auto callback = [](const float *samples, int32_t n, float progress,
+                       void *arg) -> int32_t {
+      TtsGenerateWorker *_this = reinterpret_cast<TtsGenerateWorker *>(arg);
+
+      for (auto d : _this->data_list_) {
+        if (d->cancelled) {
+#if __OHOS__
+          OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled");
+#endif
+          return 0;
+        }
+      }
+
+      auto data = new TtsCallbackData;
+      data->samples = std::vector<float>{samples, samples + n};
+      data->progress = progress;
+      _this->data_list_.push_back(data);
+
+      _this->tsfn_.NonBlockingCall(data);
+
+      return 1;
+    };
+    audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
+        tts_, text_.c_str(), sid_, speed_, callback, this);
+
+    tsfn_.Release();
+  }
+
+  void OnOK() override {
+    Napi::Env env = deferred_.Env();
+    Napi::Object ans = Napi::Object::New(env);
+    if (use_external_buffer_) {
+      Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+          env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
+          [](Napi::Env /*env*/, void * /*data*/,
+             const SherpaOnnxGeneratedAudio *hint) {
+            SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
+          },
+          audio_);
+      Napi::Float32Array float32Array =
+          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
+
+      ans.Set(Napi::String::New(env, "samples"), float32Array);
+      ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
+    } else {
+      // don't use external buffer
+      Napi::ArrayBuffer arrayBuffer =
+          Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);
+
+      Napi::Float32Array float32Array =
+          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
+
+      std::copy(audio_->samples, audio_->samples + audio_->n,
+                float32Array.Data());
+
+      ans.Set(Napi::String::New(env, "samples"), float32Array);
+      ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
+      SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
+    }
+
+    deferred_.Resolve(ans);
+  }
+
+ private:
+  TSFN tsfn_;
+  Napi::Promise::Deferred deferred_;
+  SherpaOnnxOfflineTts *tts_;
+  std::string text_;
+  float speed_;
+  int32_t sid_;
+  bool use_external_buffer_;
+
+  const SherpaOnnxGeneratedAudio *audio_;
+
+  std::vector<TtsCallbackData *> data_list_;
+};
+
+static Napi::Object OfflineTtsGenerateAsyncWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflineTts *tts =
+      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("text")) {
+    Napi::TypeError::New(env, "The argument object should have a field text")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("text").IsString()) {
+    Napi::TypeError::New(env, "The object['text'] should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("sid")) {
+    Napi::TypeError::New(env, "The argument object should have a field sid")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("sid").IsNumber()) {
+    Napi::TypeError::New(env, "The object['sid'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("speed")) {
+    Napi::TypeError::New(env, "The argument object should have a field speed")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("speed").IsNumber()) {
+    Napi::TypeError::New(env, "The object['speed'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (obj.Has("enableExternalBuffer") &&
+      obj.Get("enableExternalBuffer").IsBoolean()) {
+    enable_external_buffer =
+        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
+  }
+
+  Napi::String _text = obj.Get("text").As<Napi::String>();
+  std::string text = _text.Utf8Value();
+  int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
+  float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
+
+  Napi::Function cb;
+  if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
+    cb = obj.Get("callback").As<Napi::Function>();
+  }
+
+  auto context =
+      new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
+
+  TSFN tsfn = TSFN::New(
+      env,
+      cb,                 // JavaScript function called asynchronously
+      "TtsGenerateFunc",  // Name
+      0,                  // Unlimited queue
+      1,                  // Only one thread will use this initially
+      context,
+      [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
+
+  TtsGenerateWorker *worker = new TtsGenerateWorker(
+      env, tsfn, tts, text, speed, sid, enable_external_buffer);
+  worker->Queue();
+  return worker->Promise();
+}
+
+void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createOfflineTts"),
+              Napi::Function::New(env, CreateOfflineTtsWrapper));
+
+  exports.Set(Napi::String::New(env, "getOfflineTtsSampleRate"),
+              Napi::Function::New(env, OfflineTtsSampleRateWrapper));
+
+  exports.Set(Napi::String::New(env, "getOfflineTtsNumSpeakers"),
+              Napi::Function::New(env, OfflineTtsNumSpeakersWrapper));
+
+  exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
+              Napi::Function::New(env, OfflineTtsGenerateWrapper));
+
+  exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"),
+              Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc
new file mode 100644
index 0000000000..df4c920f71
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc
@@ -0,0 +1,130 @@
+// scripts/node-addon-api/src/punctuation.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxOfflinePunctuationModelConfig GetOfflinePunctuationModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOfflinePunctuationModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("model").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(ct_transformer, ctTransformer);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxOfflinePunctuation>
+CreateOfflinePunctuationWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "You should pass an object as the only argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxOfflinePunctuationConfig c;
+  memset(&c, 0, sizeof(c));
+  c.model = GetOfflinePunctuationModelConfig(o);
+
+  const SherpaOnnxOfflinePunctuation *punct =
+      SherpaOnnxCreateOfflinePunctuation(&c);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model.ct_transformer);
+  SHERPA_ONNX_DELETE_C_STR(c.model.provider);
+
+  if (!punct) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxOfflinePunctuation>::New(
+      env, const_cast<SherpaOnnxOfflinePunctuation *>(punct),
+      [](Napi::Env env, SherpaOnnxOfflinePunctuation *punct) {
+        SherpaOnnxDestroyOfflinePunctuation(punct);
+      });
+}
+
+static Napi::String OfflinePunctuationAddPunctWraper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env,
+        "You should pass an offline punctuation pointer as the first argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsString()) {
+    Napi::TypeError::New(env, "You should pass a string as the second argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOfflinePunctuation *punct =
+      info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data();
+  Napi::String js_text = info[1].As<Napi::String>();
+  std::string text = js_text.Utf8Value();
+
+  const char *punct_text =
+      SherpaOfflinePunctuationAddPunct(punct, text.c_str());
+
+  Napi::String ans = Napi::String::New(env, punct_text);
+  SherpaOfflinePunctuationFreeText(punct_text);
+  return ans;
+}
+
+void InitPunctuation(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createOfflinePunctuation"),
+              Napi::Function::New(env, CreateOfflinePunctuationWrapper));
+
+  exports.Set(Napi::String::New(env, "offlinePunctuationAddPunct"),
+              Napi::Function::New(env, OfflinePunctuationAddPunctWraper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc
new file mode 100644
index 0000000000..54f0350fe6
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc
@@ -0,0 +1,59 @@
+// scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include "napi.h"  // NOLINT
+
+void InitStreamingAsr(Napi::Env env, Napi::Object exports);
+
+void InitNonStreamingAsr(Napi::Env env, Napi::Object exports);
+
+void InitNonStreamingTts(Napi::Env env, Napi::Object exports);
+
+void InitVad(Napi::Env env, Napi::Object exports);
+
+void InitWaveReader(Napi::Env env, Napi::Object exports);
+
+void InitWaveWriter(Napi::Env env, Napi::Object exports);
+
+void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);
+
+void InitSpeakerID(Napi::Env env, Napi::Object exports);
+
+void InitAudioTagging(Napi::Env env, Napi::Object exports);
+
+void InitPunctuation(Napi::Env env, Napi::Object exports);
+
+void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
+
+void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);
+
+#if __OHOS__
+void InitUtils(Napi::Env env, Napi::Object exports);
+#endif
+
+Napi::Object Init(Napi::Env env, Napi::Object exports) {
+  InitStreamingAsr(env, exports);
+  InitNonStreamingAsr(env, exports);
+  InitNonStreamingTts(env, exports);
+  InitVad(env, exports);
+  InitWaveReader(env, exports);
+  InitWaveWriter(env, exports);
+  InitSpokenLanguageID(env, exports);
+  InitSpeakerID(env, exports);
+  InitAudioTagging(env, exports);
+  InitPunctuation(env, exports);
+  InitKeywordSpotting(env, exports);
+  InitNonStreamingSpeakerDiarization(env, exports);
+
+#if __OHOS__
+  InitUtils(env, exports);
+#endif
+
+  return exports;
+}
+
+#if __OHOS__
+NODE_API_MODULE(sherpa_onnx, Init)
+#else
+NODE_API_MODULE(addon, Init)
+#endif
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc
new file mode 100644
index 0000000000..5a1ee5582d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc
@@ -0,0 +1,824 @@
+// scripts/node-addon-api/src/speaker-identification.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <algorithm>
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
+CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+#if __OHOS__
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "You should pass an object as the only argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxSpeakerEmbeddingExtractorConfig c;
+  memset(&c, 0, sizeof(c));
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
+#endif
+  SHERPA_ONNX_DELETE_C_STR(c.model);
+  SHERPA_ONNX_DELETE_C_STR(c.provider);
+
+  if (!extractor) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>::New(
+      env, const_cast<SherpaOnnxSpeakerEmbeddingExtractor *>(extractor),
+      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) {
+        SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor);
+      });
+}
+
+static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be a speaker embedding extractor pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
+
+  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
+
+  return Napi::Number::New(env, dim);
+}
+
+static Napi::External<SherpaOnnxOnlineStream>
+SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding extractor "
+                         "pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor);
+
+  return Napi::External<SherpaOnnxOnlineStream>::New(
+      env, const_cast<SherpaOnnxOnlineStream *>(stream),
+      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
+        SherpaOnnxDestroyOnlineStream(stream);
+      });
+}
+
+static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be a speaker embedding extractor pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  int32_t is_ready =
+      SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream);
+
+  return Napi::Boolean::New(env, is_ready);
+}
+
+static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2 && info.Length() != 3) {
+    std::ostringstream os;
+    os << "Expect only 2 or 3 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be a speaker embedding extractor pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (info.Length() == 3) {
+    if (info[2].IsBoolean()) {
+      enable_external_buffer = info[2].As<Napi::Boolean>().Value();
+    } else {
+      Napi::TypeError::New(env, "Argument 2 should be a boolean.")
+          .ThrowAsJavaScriptException();
+    }
+  }
+
+  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  const float *v =
+      SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream);
+
+  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(v), sizeof(float) * dim,
+        [](Napi::Env /*env*/, void *data) {
+          SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
+              reinterpret_cast<float *>(data));
+        });
+
+    return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
+  } else {
+    // don't use external buffer
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * dim);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, dim, arrayBuffer, 0);
+
+    std::copy(v, v + dim, float32Array.Data());
+
+    SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v);
+
+    return float32Array;
+  }
+}
+
+static Napi::External<SherpaOnnxSpeakerEmbeddingManager>
+CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsNumber()) {
+    Napi::TypeError::New(env,
+                         "You should pass an integer as the only argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  int32_t dim = info[0].As<Napi::Number>().Int32Value();
+
+  const SherpaOnnxSpeakerEmbeddingManager *manager =
+      SherpaOnnxCreateSpeakerEmbeddingManager(dim);
+
+  if (!manager) {
+    Napi::TypeError::New(env, "Please check your input dim!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxSpeakerEmbeddingManager>::New(
+      env, const_cast<SherpaOnnxSpeakerEmbeddingManager *>(manager),
+      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) {
+        SherpaOnnxDestroySpeakerEmbeddingManager(manager);
+      });
+}
+
+static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("v")) {
+    Napi::TypeError::New(env, "The argument object should have a field v")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("v").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['v'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("name")) {
+    Napi::TypeError::New(env, "The argument object should have a field name")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("name").IsString()) {
+    Napi::TypeError::New(env, "The object['name'] should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
+  Napi::String js_name = obj.Get("name").As<Napi::String>();
+  std::string name = js_name.Utf8Value();
+
+  int32_t ok =
+      SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data());
+  return Napi::Boolean::New(env, ok);
+}
+
+static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("vv")) {
+    Napi::TypeError::New(env, "The argument object should have a field vv")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("vv").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['vv'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("name")) {
+    Napi::TypeError::New(env, "The argument object should have a field name")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("name").IsString()) {
+    Napi::TypeError::New(env, "The object['name'] should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("n")) {
+    Napi::TypeError::New(env, "The argument object should have a field n")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("n").IsNumber()) {
+    Napi::TypeError::New(env, "The object['n'] should be an integer")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array v = obj.Get("vv").As<Napi::Float32Array>();
+  Napi::String js_name = obj.Get("name").As<Napi::String>();
+  int32_t n = obj.Get("n").As<Napi::Number>().Int32Value();
+
+  std::string name = js_name.Utf8Value();
+
+  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
+      manager, name.c_str(), v.Data(), n);
+
+  return Napi::Boolean::New(env, ok);
+}
+
+static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsString()) {
+    Napi::TypeError::New(env, "Argument 1 should be string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::String js_name = info[1].As<Napi::String>();
+  std::string name = js_name.Utf8Value();
+
+  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str());
+
+  return Napi::Boolean::New(env, ok);
+}
+
+static Napi::String SpeakerEmbeddingManagerSearchWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("v")) {
+    Napi::TypeError::New(env, "The argument object should have a field v")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("v").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['v'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("threshold")) {
+    Napi::TypeError::New(env,
+                         "The argument object should have a field threshold")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("threshold").IsNumber()) {
+    Napi::TypeError::New(env, "The object['threshold'] should be a float")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
+  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
+
+  const char *name =
+      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold);
+  const char *p = name;
+  if (!p) {
+    p = "";
+  }
+
+  Napi::String js_name = Napi::String::New(env, p);
+  SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name);
+
+  return js_name;
+}
+
+static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("v")) {
+    Napi::TypeError::New(env, "The argument object should have a field v")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("v").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['v'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("threshold")) {
+    Napi::TypeError::New(env,
+                         "The argument object should have a field threshold")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("threshold").IsNumber()) {
+    Napi::TypeError::New(env, "The object['threshold'] should be a float")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("name")) {
+    Napi::TypeError::New(env, "The argument object should have a field name")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("name").IsString()) {
+    Napi::TypeError::New(env, "The object['name'] should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
+  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
+
+  Napi::String js_name = obj.Get("name").As<Napi::String>();
+  std::string name = js_name.Utf8Value();
+
+  int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(),
+                                                          v.Data(), threshold);
+
+  return Napi::Boolean::New(env, found);
+}
+
+static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsString()) {
+    Napi::TypeError::New(env, "Argument 1 should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  Napi::String js_name = info[1].As<Napi::String>();
+  std::string name = js_name.Utf8Value();
+
+  int32_t exists =
+      SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str());
+
+  return Napi::Boolean::New(env, exists);
+}
+
+static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
+
+  return Napi::Number::New(env, num_speakers);
+}
+
+static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "You should pass a speaker embedding manager pointer "
+                         "as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpeakerEmbeddingManager *manager =
+      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
+
+  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
+  if (num_speakers == 0) {
+    return Napi::Array::New(env, num_speakers);
+  }
+
+  const char *const *all_speaker_names =
+      SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
+
+  Napi::Array ans = Napi::Array::New(env, num_speakers);
+  for (uint32_t i = 0; i != num_speakers; ++i) {
+    ans[i] = Napi::String::New(env, all_speaker_names[i]);
+  }
+  SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names);
+  return ans;
+}
+
+void InitSpeakerID(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"),
+              Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"),
+              Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"),
+      Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingExtractorIsReady"),
+      Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"),
+      Napi::Function::New(env,
+                          SpeakerEmbeddingExtractorComputeEmbeddingWrapper));
+
+  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"),
+              Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"),
+              Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"),
+      Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"),
+              Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"),
+              Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"),
+              Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper));
+
+  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"),
+              Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"),
+      Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"),
+      Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc
new file mode 100644
index 0000000000..c40e838b6f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc
@@ -0,0 +1,180 @@
+// scripts/node-addon-api/src/spoken-language-identification.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static SherpaOnnxSpokenLanguageIdentificationWhisperConfig
+GetSpokenLanguageIdentificationWhisperConfig(Napi::Object obj) {
+  SherpaOnnxSpokenLanguageIdentificationWhisperConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("whisper").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxSpokenLanguageIdentification>
+CreateSpokenLanguageIdentificationWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "You should pass an object as the only argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxSpokenLanguageIdentificationConfig c;
+  memset(&c, 0, sizeof(c));
+  c.whisper = GetSpokenLanguageIdentificationWhisperConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  const SherpaOnnxSpokenLanguageIdentification *slid =
+      SherpaOnnxCreateSpokenLanguageIdentification(&c);
+
+  SHERPA_ONNX_DELETE_C_STR(c.whisper.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.whisper.decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.provider);
+
+  if (!slid) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxSpokenLanguageIdentification>::New(
+      env, const_cast<SherpaOnnxSpokenLanguageIdentification *>(slid),
+      [](Napi::Env env, SherpaOnnxSpokenLanguageIdentification *slid) {
+        SherpaOnnxDestroySpokenLanguageIdentification(slid);
+      });
+}
+
+static Napi::External<SherpaOnnxOfflineStream>
+SpokenLanguageIdentificationCreateOfflineStreamWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env,
+        "You should pass an offline language ID pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpokenLanguageIdentification *slid =
+      info[0]
+          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
+          .Data();
+
+  SherpaOnnxOfflineStream *stream =
+      SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
+
+  return Napi::External<SherpaOnnxOfflineStream>::New(
+      env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
+        SherpaOnnxDestroyOfflineStream(stream);
+      });
+}
+
+static Napi::String SpokenLanguageIdentificationComputeWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env, "Argument 0 should be an offline spoken language ID pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxSpokenLanguageIdentification *slid =
+      info[0]
+          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
+          .Data();
+
+  SherpaOnnxOfflineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
+
+  const SherpaOnnxSpokenLanguageIdentificationResult *r =
+      SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
+
+  std::string lang = r->lang;
+  SherpaOnnxDestroySpokenLanguageIdentificationResult(r);
+
+  return Napi::String::New(env, lang);
+}
+
+void InitSpokenLanguageID(Napi::Env env, Napi::Object exports) {
+  exports.Set(
+      Napi::String::New(env, "createSpokenLanguageIdentification"),
+      Napi::Function::New(env, CreateSpokenLanguageIdentificationWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "createSpokenLanguageIdentificationOfflineStream"),
+      Napi::Function::New(
+          env, SpokenLanguageIdentificationCreateOfflineStreamWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "spokenLanguageIdentificationCompute"),
+      Napi::Function::New(env, SpokenLanguageIdentificationComputeWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc
new file mode 100644
index 0000000000..fce28358d4
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc
@@ -0,0 +1,682 @@
+// scripts/node-addon-api/src/streaming-asr.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+/*
+{
+  'featConfig': {
+    'sampleRate': 16000,
+    'featureDim': 80,
+  }
+};
+ */
+SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj) {
+  SherpaOnnxFeatureConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("featConfig") || !obj.Get("featConfig").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("featConfig").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(feature_dim, featureDim);
+
+  return c;
+}
+/*
+{
+  'transducer': {
+    'encoder': './encoder.onnx',
+    'decoder': './decoder.onnx',
+    'joiner': './joiner.onnx',
+  }
+}
+ */
+
+static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOnlineTransducerModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("transducer").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);
+
+  return c;
+}
+
+static SherpaOnnxOnlineZipformer2CtcModelConfig
+GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
+  SherpaOnnxOnlineZipformer2CtcModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+
+  return c;
+}
+
+static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
+    Napi::Object obj) {
+  SherpaOnnxOnlineParaformerModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
+
+  return c;
+}
+
+SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
+  SherpaOnnxOnlineModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
+
+  c.transducer = GetOnlineTransducerModelConfig(o);
+  c.paraformer = GetOnlineParaformerModelConfig(o);
+  c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens_buf, tokensBuf);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(tokens_buf_size, tokensBufSize);
+
+  return c;
+}
+
+static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
+    Napi::Object obj) {
+  SherpaOnnxOnlineCtcFstDecoderConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("ctcFstDecoderConfig") ||
+      !obj.Get("ctcFstDecoderConfig").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(graph, graph);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active, maxActive);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+#if __OHOS__
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env, "Expect an object as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+  SherpaOnnxOnlineRecognizerConfig c;
+  memset(&c, 0, sizeof(c));
+  c.feat_config = GetFeatureConfig(o);
+  c.model_config = GetOnlineModelConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
+
+  // enableEndpoint can be either a boolean or an integer
+  if (o.Has("enableEndpoint") && (o.Get("enableEndpoint").IsNumber() ||
+                                  o.Get("enableEndpoint").IsBoolean())) {
+    if (o.Get("enableEndpoint").IsNumber()) {
+      c.enable_endpoint =
+          o.Get("enableEndpoint").As<Napi::Number>().Int32Value();
+    } else {
+      c.enable_endpoint = o.Get("enableEndpoint").As<Napi::Boolean>().Value();
+    }
+  }
+
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule1_min_trailing_silence,
+                                rule1MinTrailingSilence);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule2_min_trailing_silence,
+                                rule2MinTrailingSilence);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule3_min_utterance_length,
+                                rule3MinUtteranceLength);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_buf, hotwordsBuf);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(hotwords_buf_size, hotwordsBufSize);
+
+  c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  const SherpaOnnxOnlineRecognizer *recognizer =
+      SherpaOnnxCreateOnlineRecognizerOHOS(&c, mgr.get());
+#else
+  const SherpaOnnxOnlineRecognizer *recognizer =
+      SherpaOnnxCreateOnlineRecognizer(&c);
+#endif
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);
+
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
+  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens_buf);
+  SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
+  SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
+  SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
+  SHERPA_ONNX_DELETE_C_STR(c.hotwords_buf);
+  SHERPA_ONNX_DELETE_C_STR(c.ctc_fst_decoder_config.graph);
+
+  if (!recognizer) {
+    Napi::TypeError::New(env, "Please check your config!")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  return Napi::External<SherpaOnnxOnlineRecognizer>::New(
+      env, const_cast<SherpaOnnxOnlineRecognizer *>(recognizer),
+      [](Napi::Env env, SherpaOnnxOnlineRecognizer *recognizer) {
+        SherpaOnnxDestroyOnlineRecognizer(recognizer);
+      });
+}
+
+static Napi::External<SherpaOnnxOnlineStream> CreateOnlineStreamWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(
+        env,
+        "You should pass an online recognizer pointer as the only argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  const SherpaOnnxOnlineStream *stream =
+      SherpaOnnxCreateOnlineStream(recognizer);
+
+  return Napi::External<SherpaOnnxOnlineStream>::New(
+      env, const_cast<SherpaOnnxOnlineStream *>(stream),
+      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
+        SherpaOnnxDestroyOnlineStream(stream);
+      });
+}
+
+static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOnlineStream *stream =
+      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("samples")) {
+    Napi::TypeError::New(env, "The argument object should have a field samples")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Get("samples").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Has("sampleRate")) {
+    Napi::TypeError::New(env,
+                         "The argument object should have a field sampleRate")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!obj.Get("sampleRate").IsNumber()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
+  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
+
+#if __OHOS__
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples.Data(),
+                                       samples.ElementLength() / sizeof(float));
+#else
+  SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples.Data(),
+                                       samples.ElementLength());
+#endif
+}
+
+static Napi::Boolean IsOnlineStreamReadyWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an online recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream);
+
+  return Napi::Boolean::New(env, is_ready);
+}
+
+static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an online recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  SherpaOnnxDecodeOnlineStream(recognizer, stream);
+}
+
+static Napi::String GetOnlineStreamResultAsJsonWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an online recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream);
+  Napi::String s = Napi::String::New(env, json);
+
+  SherpaOnnxDestroyOnlineStreamResultJson(json);
+
+  return s;
+}
+
+static void InputFinishedWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOnlineStream *stream =
+      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  SherpaOnnxOnlineStreamInputFinished(stream);
+}
+
+static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an online recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  SherpaOnnxOnlineStreamReset(recognizer, stream);
+}
+
+static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env,
+                         "Argument 0 should be an online recognizer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxOnlineRecognizer *recognizer =
+      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
+
+  SherpaOnnxOnlineStream *stream =
+      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
+
+  int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream);
+
+  return Napi::Boolean::New(env, is_endpoint);
+}
+
+static Napi::External<SherpaOnnxDisplay> CreateDisplayWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsNumber()) {
+    Napi::TypeError::New(env, "Expect a number as the argument")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+  int32_t max_word_per_line = info[0].As<Napi::Number>().Int32Value();
+
+  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(max_word_per_line);
+
+  return Napi::External<SherpaOnnxDisplay>::New(
+      env, const_cast<SherpaOnnxDisplay *>(display),
+      [](Napi::Env env, SherpaOnnxDisplay *display) {
+        SherpaOnnxDestroyDisplay(display);
+      });
+}
+
+static void PrintWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 3) {
+    std::ostringstream os;
+    os << "Expect only 3 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[1].IsNumber()) {
+    Napi::TypeError::New(env, "Argument 1 should be a number.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[2].IsString()) {
+    Napi::TypeError::New(env, "Argument 2 should be a string.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxDisplay *display =
+      info[0].As<Napi::External<SherpaOnnxDisplay>>().Data();
+
+  int32_t idx = info[1].As<Napi::Number>().Int32Value();
+
+  Napi::String text = info[2].As<Napi::String>();
+  std::string s = text.Utf8Value();
+  SherpaOnnxPrint(display, idx, s.c_str());
+}
+
+void InitStreamingAsr(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createOnlineRecognizer"),
+              Napi::Function::New(env, CreateOnlineRecognizerWrapper));
+
+  exports.Set(Napi::String::New(env, "createOnlineStream"),
+              Napi::Function::New(env, CreateOnlineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "acceptWaveformOnline"),
+              Napi::Function::New(env, AcceptWaveformWrapper));
+
+  exports.Set(Napi::String::New(env, "isOnlineStreamReady"),
+              Napi::Function::New(env, IsOnlineStreamReadyWrapper));
+
+  exports.Set(Napi::String::New(env, "decodeOnlineStream"),
+              Napi::Function::New(env, DecodeOnlineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "getOnlineStreamResultAsJson"),
+              Napi::Function::New(env, GetOnlineStreamResultAsJsonWrapper));
+
+  exports.Set(Napi::String::New(env, "inputFinished"),
+              Napi::Function::New(env, InputFinishedWrapper));
+
+  exports.Set(Napi::String::New(env, "reset"),
+              Napi::Function::New(env, ResetOnlineStreamWrapper));
+
+  exports.Set(Napi::String::New(env, "isEndpoint"),
+              Napi::Function::New(env, IsEndpointWrapper));
+
+  exports.Set(Napi::String::New(env, "createDisplay"),
+              Napi::Function::New(env, CreateDisplayWrapper));
+
+  exports.Set(Napi::String::New(env, "print"),
+              Napi::Function::New(env, PrintWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts
new file mode 100644
index 0000000000..a51b9ed23b
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts
@@ -0,0 +1,77 @@
+export const listRawfileDir: (mgr: object, dir: string) => Array<string>;
+
+export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
+export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
+export const createCircularBuffer: (capacity: number) => object;
+export const circularBufferPush: (handle: object, samples: Float32Array) => void;
+export const circularBufferGet: (handle: object, index: number, n: number, enableExternalBuffer: boolean = true) => Float32Array;
+export const circularBufferPop: (handle: object, n: number) => void;
+export const circularBufferSize: (handle: object) => number;
+export const circularBufferHead: (handle: object) => number;
+export const circularBufferReset: (handle: object) => void;
+
+export const createVoiceActivityDetector: (config: object, bufferSizeInSeconds: number, mgr?: object) => object;
+export const voiceActivityDetectorAcceptWaveform: (handle: object, samples: Float32Array) => void;
+export const voiceActivityDetectorIsEmpty: (handle: object) => boolean;
+export const voiceActivityDetectorIsDetected: (handle: object) => boolean;
+export const voiceActivityDetectorPop: (handle: object) => void;
+export const voiceActivityDetectorClear: (handle: object) => void;
+export const voiceActivityDetectorFront: (handle: object, enableExternalBuffer: boolean = true) => {samples: Float32Array, start: number};
+export const voiceActivityDetectorReset: (handle: object) => void;
+export const voiceActivityDetectorFlush: (handle: object) => void;
+
+export const createOfflineRecognizer: (config: object, mgr?: object) => object;
+export const createOfflineStream: (handle: object) => object;
+export const acceptWaveformOffline: (handle: object, audio: object) => void;
+export const decodeOfflineStream: (handle: object, streamHandle: object) => void;
+export const getOfflineStreamResultAsJson: (streamHandle: object) => string;
+
+export const createOnlineRecognizer: (config: object, mgr?: object) => object;
+export const createOnlineStream: (handle: object) => object;
+export const acceptWaveformOnline: (handle: object, audio: object) => void;
+export const inputFinished: (streamHandle: object) => void;
+export const isOnlineStreamReady: (handle: object, streamHandle: object) => boolean;
+export const decodeOnlineStream: (handle: object, streamHandle: object) => void;
+export const isEndpoint: (handle: object, streamHandle: object) => boolean;
+export const reset: (handle: object, streamHandle: object) => void;
+export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) => string;
+
+export const createOfflineTts: (config: object, mgr?: object) => object;
+export const getOfflineTtsNumSpeakers: (handle: object) => number;
+export const getOfflineTtsSampleRate: (handle: object) => number;
+
+export type TtsOutput = {
+  samples: Float32Array;
+  sampleRate: number;
+};
+
+export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
+export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
+
+export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object;
+export const speakerEmbeddingExtractorDim: (handle: object) => number;
+export const speakerEmbeddingExtractorCreateStream: (handle: object) => object;
+export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean;
+export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array;
+export const createSpeakerEmbeddingManager: (dim: number) => object;
+export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean;
+export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean;
+export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean;
+export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string;
+export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean;
+export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
+export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
+export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;
+
+export const createOfflineSpeakerDiarization: (config: object, mgr?: object) => object;
+export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number;
+export const offlineSpeakerDiarizationProcess: (handle: object, input: object) => object;
+export const offlineSpeakerDiarizationProcessAsync: (handle: object, input: object, callback: object) => object;
+export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void;
+
+export const createKeywordSpotter: (config: object, mgr?: object) => object;
+export const createKeywordStream: (handle: object, keywords?: string) => object;
+export const isKeywordStreamReady: (handle: object, stream: object) => boolean;
+export const decodeKeywordStream: (handle: object, stream: object) => void;
+export const resetKeywordStream: (handle: object, stream: object) => void;
+export const getKeywordResultAsJson: (handle: object, stream: object) => string;
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/oh-package.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/oh-package.json5
new file mode 100644
index 0000000000..09065d8d38
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/oh-package.json5
@@ -0,0 +1,6 @@
+{
+  "name": "libsherpa_onnx.so",
+  "types": "./Index.d.ts",
+  "version": "1.0.0",
+  "description": "Please describe the basic information."
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc
new file mode 100644
index 0000000000..33b8f2a292
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc
@@ -0,0 +1,76 @@
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"  // NOLINT
+
+static std::vector<std::string> GetFilenames(NativeResourceManager *mgr,
+                                             const std::string &d) {
+  std::unique_ptr<RawDir, decltype(&OH_ResourceManager_CloseRawDir)> raw_dir(
+      OH_ResourceManager_OpenRawDir(mgr, d.c_str()),
+      &OH_ResourceManager_CloseRawDir);
+  int count = OH_ResourceManager_GetRawFileCount(raw_dir.get());
+  std::vector<std::string> ans;
+  ans.reserve(count);
+  for (int32_t i = 0; i < count; ++i) {
+    std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i);
+    bool is_dir = OH_ResourceManager_IsRawDir(
+        mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str());
+    if (is_dir) {
+      auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename);
+      for (auto &f : files) {
+        ans.push_back(std::move(f));
+      }
+    } else {
+      if (d.empty()) {
+        ans.push_back(std::move(filename));
+      } else {
+        ans.push_back(d + "/" + filename);
+      }
+    }
+  }
+
+  return ans;
+}
+
+static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  if (!info[1].IsString()) {
+    Napi::TypeError::New(env, "Argument 1 should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  std::string dir = info[1].As<Napi::String>().Utf8Value();
+
+  auto files = GetFilenames(mgr.get(), dir);
+  Napi::Array ans = Napi::Array::New(env, files.size());
+  for (int32_t i = 0; i != files.size(); ++i) {
+    ans[i] = Napi::String::New(env, files[i]);
+  }
+  return ans;
+}
+void InitUtils(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "listRawfileDir"),
+              Napi::Function::New(env, ListRawFileDir));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc
new file mode 100644
index 0000000000..81eed7c8d0
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc
@@ -0,0 +1,701 @@
+// scripts/node-addon-api/src/vad.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <algorithm>
+#include <sstream>
+
+#include "macros.h"  // NOLINT
+#include "napi.h"    // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsNumber()) {
+    Napi::TypeError::New(env, "You should pass an integer as the argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());
+
+  return Napi::External<SherpaOnnxCircularBuffer>::New(
+      env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
+        SherpaOnnxDestroyCircularBuffer(p);
+      });
+}
+
+static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  if (!info[1].IsTypedArray()) {
+    Napi::TypeError::New(env, "Argument 1 should be a Float32Array.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Float32Array data = info[1].As<Napi::Float32Array>();
+
+#if __OHOS__
+  // Note(fangjun): Normally, we don't need to divied it by sizeof(float).
+  // However, data.ElementLength() here returns number of bytes, not number of
+  // elements.
+  SherpaOnnxCircularBufferPush(buf, data.Data(),
+                               data.ElementLength() / sizeof(float));
+#else
+  SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
+#endif
+}
+
+// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md
+// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc
+static Napi::Float32Array CircularBufferGetWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 3 && info.Length() != 4) {
+    std::ostringstream os;
+    os << "Expect only 3 or 4 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  if (!info[1].IsNumber()) {
+    Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[2].IsNumber()) {
+    Napi::TypeError::New(env, "Argument 2 should be an integer (n).")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (info.Length() == 4) {
+    if (info[3].IsBoolean()) {
+      enable_external_buffer = info[3].As<Napi::Boolean>().Value();
+    } else {
+      Napi::TypeError::New(env, "Argument 3 should be a boolean.")
+          .ThrowAsJavaScriptException();
+    }
+  }
+
+  int32_t start_index = info[1].As<Napi::Number>().Int32Value();
+  int32_t n = info[2].As<Napi::Number>().Int32Value();
+
+  const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n);
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(data), sizeof(float) * n,
+        [](Napi::Env /*env*/, void *p) {
+          SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p));
+        });
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, n, arrayBuffer, 0);
+
+    return float32Array;
+  } else {
+    // don't use external buffer
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * n);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, n, arrayBuffer, 0);
+
+    std::copy(data, data + n, float32Array.Data());
+
+    SherpaOnnxCircularBufferFree(data);
+
+    return float32Array;
+  }
+}
+
+static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  if (!info[1].IsNumber()) {
+    Napi::TypeError::New(env, "Argument 1 should be an integer (n).")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  int32_t n = info[1].As<Napi::Number>().Int32Value();
+
+  SherpaOnnxCircularBufferPop(buf, n);
+}
+
+static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  int32_t size = SherpaOnnxCircularBufferSize(buf);
+
+  return Napi::Number::New(env, size);
+}
+
+static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  int32_t size = SherpaOnnxCircularBufferHead(buf);
+
+  return Napi::Number::New(env, size);
+}
+
+static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxCircularBuffer *buf =
+      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
+
+  SherpaOnnxCircularBufferReset(buf);
+}
+
+static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
+    const Napi::Object &obj) {
+  SherpaOnnxSileroVadModelConfig c;
+  memset(&c, 0, sizeof(c));
+
+  if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) {
+    return c;
+  }
+
+  Napi::Object o = obj.Get("sileroVad").As<Napi::Object>();
+  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
+  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);
+
+  return c;
+}
+
+static Napi::External<SherpaOnnxVoiceActivityDetector>
+CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+#if __OHOS__
+  // the last argument is a NativeResourceManager
+  if (info.Length() != 3) {
+    std::ostringstream os;
+    os << "Expect only 3 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#else
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+#endif
+
+  if (!info[0].IsObject()) {
+    Napi::TypeError::New(env,
+                         "You should pass an object as the first argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsNumber()) {
+    Napi::TypeError::New(env,
+                         "You should pass an integer as the second argument.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object o = info[0].As<Napi::Object>();
+
+  SherpaOnnxVadModelConfig c;
+  memset(&c, 0, sizeof(c));
+  c.silero_vad = GetSileroVadConfig(o);
+
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
+  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
+  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
+
+  if (o.Has("debug") &&
+      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
+    if (o.Get("debug").IsBoolean()) {
+      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
+    } else {
+      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
+    }
+  }
+
+  float buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
+
+#if __OHOS__
+  std::unique_ptr<NativeResourceManager,
+                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
+      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]),
+          &OH_ResourceManager_ReleaseNativeResourceManager);
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds,
+                                                mgr.get());
+#else
+  SherpaOnnxVoiceActivityDetector *vad =
+      SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
+#endif
+  SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model);
+  SHERPA_ONNX_DELETE_C_STR(c.provider);
+
+  return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
+      env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
+        SherpaOnnxDestroyVoiceActivityDetector(p);
+      });
+}
+
+static void VoiceActivityDetectorAcceptWaveformWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  if (!info[1].IsTypedArray()) {
+    Napi::TypeError::New(
+        env, "Argument 1 should be a Float32Array containing samples")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
+
+#if __OHOS__
+  // Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here
+  // for Huawei
+  SherpaOnnxVoiceActivityDetectorAcceptWaveform(
+      vad, samples.Data(), samples.ElementLength() / sizeof(float));
+#else
+  SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
+                                                samples.ElementLength());
+#endif
+}
+
+static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);
+
+  return Napi::Boolean::New(env, is_empty);
+}
+
+static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);
+
+  return Napi::Boolean::New(env, is_detected);
+}
+
+static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  SherpaOnnxVoiceActivityDetectorPop(vad);
+}
+
+static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  SherpaOnnxVoiceActivityDetectorClear(vad);
+}
+
+static Napi::Object VoiceActivityDetectorFrontWrapper(
+    const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1 && info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 1 or 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (info.Length() == 2) {
+    if (info[1].IsBoolean()) {
+      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
+    } else {
+      Napi::TypeError::New(env, "Argument 1 should be a boolean.")
+          .ThrowAsJavaScriptException();
+    }
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  const SherpaOnnxSpeechSegment *segment =
+      SherpaOnnxVoiceActivityDetectorFront(vad);
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(segment->samples), sizeof(float) * segment->n,
+        [](Napi::Env /*env*/, void * /*data*/,
+           const SherpaOnnxSpeechSegment *hint) {
+          SherpaOnnxDestroySpeechSegment(hint);
+        },
+        segment);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "start"), segment->start);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+
+    return obj;
+  } else {
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * segment->n);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
+
+    std::copy(segment->samples, segment->samples + segment->n,
+              float32Array.Data());
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "start"), segment->start);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+
+    SherpaOnnxDestroySpeechSegment(segment);
+
+    return obj;
+  }
+}
+
+static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  SherpaOnnxVoiceActivityDetectorReset(vad);
+}
+
+static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 1) {
+    std::ostringstream os;
+    os << "Expect only 1 argument. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  if (!info[0].IsExternal()) {
+    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
+        .ThrowAsJavaScriptException();
+
+    return;
+  }
+
+  SherpaOnnxVoiceActivityDetector *vad =
+      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
+
+  SherpaOnnxVoiceActivityDetectorFlush(vad);
+}
+
+void InitVad(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "createCircularBuffer"),
+              Napi::Function::New(env, CreateCircularBufferWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferPush"),
+              Napi::Function::New(env, CircularBufferPushWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferGet"),
+              Napi::Function::New(env, CircularBufferGetWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferPop"),
+              Napi::Function::New(env, CircularBufferPopWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferSize"),
+              Napi::Function::New(env, CircularBufferSizeWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferHead"),
+              Napi::Function::New(env, CircularBufferHeadWrapper));
+
+  exports.Set(Napi::String::New(env, "circularBufferReset"),
+              Napi::Function::New(env, CircularBufferResetWrapper));
+
+  exports.Set(Napi::String::New(env, "createVoiceActivityDetector"),
+              Napi::Function::New(env, CreateVoiceActivityDetectorWrapper));
+
+  exports.Set(
+      Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"),
+      Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"),
+              Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"),
+              Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"),
+              Napi::Function::New(env, VoiceActivityDetectorPopWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"),
+              Napi::Function::New(env, VoiceActivityDetectorClearWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"),
+              Napi::Function::New(env, VoiceActivityDetectorFrontWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
+              Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
+
+  exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
+              Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc
new file mode 100644
index 0000000000..23b3a72423
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc
@@ -0,0 +1,172 @@
+// scripts/node-addon-api/src/wave-reader.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <algorithm>
+#include <sstream>
+
+#include "napi.h"  // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() > 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsString()) {
+    Napi::TypeError::New(env, "Argument 0 should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  std::string filename = info[0].As<Napi::String>().Utf8Value();
+
+  bool enable_external_buffer = true;
+  if (info.Length() == 2) {
+    if (info[1].IsBoolean()) {
+      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
+    } else {
+      Napi::TypeError::New(env, "Argument 1 should be a boolean")
+          .ThrowAsJavaScriptException();
+
+      return {};
+    }
+  }
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(filename.c_str());
+  if (!wave) {
+    std::ostringstream os;
+    os << "Failed to read '" << filename << "'";
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(wave->samples),
+        sizeof(float) * wave->num_samples,
+        [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) {
+          SherpaOnnxFreeWave(hint);
+        },
+        wave);
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
+    return obj;
+  } else {
+    // don't use external buffer
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
+
+    std::copy(wave->samples, wave->samples + wave->num_samples,
+              float32Array.Data());
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
+
+    SherpaOnnxFreeWave(wave);
+
+    return obj;
+  }
+}
+
+static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() > 2) {
+    std::ostringstream os;
+    os << "Expect only 1 or 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsTypedArray()) {
+    Napi::TypeError::New(env, "Argument 0 should be a float32 array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Uint8Array data = info[0].As<Napi::Uint8Array>();
+  int32_t n = data.ElementLength();
+  const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData(
+      reinterpret_cast<const char *>(data.Data()), n);
+  if (!wave) {
+    std::ostringstream os;
+    os << "Failed to read wave";
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  bool enable_external_buffer = true;
+  if (info.Length() == 2) {
+    if (info[1].IsBoolean()) {
+      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
+    } else {
+      Napi::TypeError::New(env, "Argument 1 should be a boolean")
+          .ThrowAsJavaScriptException();
+
+      return {};
+    }
+  }
+
+  if (enable_external_buffer) {
+    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
+        env, const_cast<float *>(wave->samples),
+        sizeof(float) * wave->num_samples,
+        [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) {
+          SherpaOnnxFreeWave(hint);
+        },
+        wave);
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
+    return obj;
+  } else {
+    // don't use external buffer
+    Napi::ArrayBuffer arrayBuffer =
+        Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples);
+
+    Napi::Float32Array float32Array =
+        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
+
+    std::copy(wave->samples, wave->samples + wave->num_samples,
+              float32Array.Data());
+
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set(Napi::String::New(env, "samples"), float32Array);
+    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
+
+    SherpaOnnxFreeWave(wave);
+
+    return obj;
+  }
+}
+
+void InitWaveReader(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "readWave"),
+              Napi::Function::New(env, ReadWaveWrapper));
+
+  exports.Set(Napi::String::New(env, "readWaveFromBinary"),
+              Napi::Function::New(env, ReadWaveFromBinaryWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc
new file mode 100644
index 0000000000..8f6d7bcab8
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc
@@ -0,0 +1,86 @@
+// scripts/node-addon-api/src/wave-writer.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include <sstream>
+
+#include "napi.h"  // NOLINT
+#include "sherpa-onnx/c-api/c-api.h"
+
+// (filename, {samples: samples, sampleRate: sampleRate}
+static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+
+  if (info.Length() != 2) {
+    std::ostringstream os;
+    os << "Expect only 2 arguments. Given: " << info.Length();
+
+    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[0].IsString()) {
+    Napi::TypeError::New(env, "Argument 0 should be a string")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!info[1].IsObject()) {
+    Napi::TypeError::New(env, "Argument 1 should be an object")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Object obj = info[1].As<Napi::Object>();
+
+  if (!obj.Has("samples")) {
+    Napi::TypeError::New(env, "The argument object should have a field samples")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("samples").IsTypedArray()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Has("sampleRate")) {
+    Napi::TypeError::New(env,
+                         "The argument object should have a field sampleRate")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  if (!obj.Get("sampleRate").IsNumber()) {
+    Napi::TypeError::New(env, "The object['samples'] should be a number")
+        .ThrowAsJavaScriptException();
+
+    return {};
+  }
+
+  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
+  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
+#if __OHOS__
+  int32_t ok = SherpaOnnxWriteWave(
+      samples.Data(), samples.ElementLength() / sizeof(float), sample_rate,
+      info[0].As<Napi::String>().Utf8Value().c_str());
+#else
+  int32_t ok =
+      SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
+                          info[0].As<Napi::String>().Utf8Value().c_str());
+#endif
+
+  return Napi::Boolean::New(env, ok);
+}
+
+void InitWaveWriter(Napi::Env env, Napi::Object exports) {
+  exports.Set(Napi::String::New(env, "writeWave"),
+              Napi::Function::New(env, WriteWaveWrapper));
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/KeywordSpotting.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/KeywordSpotting.ets
new file mode 100644
index 0000000000..f9c84d9ce5
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/KeywordSpotting.ets
@@ -0,0 +1,78 @@
+import {
+  createKeywordSpotter,
+  createKeywordStream,
+  isKeywordStreamReady,
+  decodeKeywordStream,
+  resetKeywordStream,
+  getKeywordResultAsJson,
+} from 'libsherpa_onnx.so';
+
+import { FeatureConfig } from './NonStreamingAsr';
+import { OnlineModelConfig, OnlineStream } from './StreamingAsr';
+
+export class KeywordSpotterConfig {
+  public featConfig: FeatureConfig = new FeatureConfig();
+  public modelConfig: OnlineModelConfig = new OnlineModelConfig();
+  public maxActivePaths: number = 4;
+  public numTrailingBlanks: number = 1;
+  public keywordsScore: number = 1;
+  public keywordsThreshold: number = 0.25;
+  public keywordsFile: string = '';
+}
+
+interface KeywordSpotterResultJson {
+  keyword: string;
+  timestamps: number[];
+  tokens: string[];
+}
+
+export class KeywordSpotterResult {
+  public keyword: string = '';
+  public tokens: string[] = [];
+  public timestamps: number[] = [];
+  public json: string = '';
+}
+
+export class KeywordSpotter {
+  public handle: object;
+  public config: KeywordSpotterConfig;
+
+  constructor(config: KeywordSpotterConfig, mgr?: object) {
+    this.handle = createKeywordSpotter(config, mgr);
+    this.config = config
+  }
+
+  createStream(keywords?: string): OnlineStream {
+    if (typeof keywords !== "undefined") {
+      return new OnlineStream(createKeywordStream(this.handle, keywords));
+    } else {
+      return new OnlineStream(createKeywordStream(this.handle));
+    }
+  }
+
+  isReady(stream: OnlineStream): boolean {
+    return isKeywordStreamReady(this.handle, stream.handle);
+  }
+
+  decode(stream: OnlineStream) {
+    decodeKeywordStream(this.handle, stream.handle);
+  }
+
+  reset(stream: OnlineStream) {
+    resetKeywordStream(this.handle, stream.handle);
+  }
+
+  getResult(stream: OnlineStream): KeywordSpotterResult {
+    const jsonStr: string = getKeywordResultAsJson(this.handle, stream.handle);
+
+    let o = JSON.parse(jsonStr) as KeywordSpotterResultJson;
+
+    const r = new KeywordSpotterResult()
+    r.keyword = o.keyword
+    r.timestamps = o.timestamps;
+    r.tokens = o.tokens;
+    r.json = jsonStr;
+
+    return r;
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/MainPage.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/MainPage.ets
new file mode 100644
index 0000000000..34b4b2c88a
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/MainPage.ets
@@ -0,0 +1,21 @@
+import hilog from '@ohos.hilog';
+import testNapi from 'libsherpa_onnx.so';
+
+@Component
+export struct MainPage {
+  @State message: string = 'Hello World';
+
+  build() {
+    Row() {
+      Column() {
+        Text(this.message)
+          .fontSize(50)
+          .fontWeight(FontWeight.Bold)
+          .onClick(() => {
+          })
+      }
+      .width('100%')
+    }
+    .height('100%')
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets
new file mode 100644
index 0000000000..d3f849ccae
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets
@@ -0,0 +1,162 @@
+import {
+  acceptWaveformOffline,
+  createOfflineRecognizer,
+  createOfflineStream,
+  decodeOfflineStream,
+  getOfflineStreamResultAsJson,
+} from 'libsherpa_onnx.so';
+
+export interface Samples {
+  samples: Float32Array;
+  sampleRate: number;
+}
+
+export class OfflineStream {
+  public handle: object;
+
+  constructor(handle: object) {
+    this.handle = handle;
+  }
+
+  // obj is {samples: samples, sampleRate: sampleRate}
+  // samples is a float32 array containing samples in the range [-1, 1]
+  // sampleRate is a number
+  acceptWaveform(obj: Samples) {
+    acceptWaveformOffline(this.handle, obj)
+  }
+}
+
+export class FeatureConfig {
+  public sampleRate: number = 16000;
+  public featureDim: number = 80;
+}
+
+export class OfflineTransducerModelConfig {
+  public encoder: string = '';
+  public decoder: string = '';
+  public joiner: string = '';
+}
+
+export class OfflineParaformerModelConfig {
+  public model: string = '';
+}
+
+export class OfflineNemoEncDecCtcModelConfig {
+  public model: string = '';
+}
+
+export class OfflineWhisperModelConfig {
+  public encoder: string = '';
+  public decoder: string = '';
+  public language: string = '';
+  public task: string = 'transcribe';
+  public tailPaddings: number = -1;
+}
+
+export class OfflineTdnnModelConfig {
+  public model: string = '';
+}
+
+export class OfflineSenseVoiceModelConfig {
+  public model: string = '';
+  public language: string = '';
+  public useItn: boolean = false;
+}
+
+export class OfflineMoonshineModelConfig {
+  public preprocessor: string = '';
+  public encoder: string = '';
+  public uncachedDecoder: string = '';
+  public cachedDecoder: string = '';
+}
+
+export class OfflineModelConfig {
+  public transducer: OfflineTransducerModelConfig = new OfflineTransducerModelConfig();
+  public paraformer: OfflineParaformerModelConfig = new OfflineParaformerModelConfig();
+  public nemoCtc: OfflineNemoEncDecCtcModelConfig = new OfflineNemoEncDecCtcModelConfig();
+  public whisper: OfflineWhisperModelConfig = new OfflineWhisperModelConfig();
+  public tdnn: OfflineTdnnModelConfig = new OfflineTdnnModelConfig();
+  public tokens: string = '';
+  public numThreads: number = 1;
+  public debug: boolean = false;
+  public provider: string = 'cpu';
+  public modelType: string = '';
+  public modelingUnit: string = "cjkchar";
+  public bpeVocab: string = '';
+  public telespeechCtc: string = '';
+  public senseVoice: OfflineSenseVoiceModelConfig = new OfflineSenseVoiceModelConfig();
+  public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
+}
+
+export class OfflineLMConfig {
+  public model: string = '';
+  public scale: number = 1.0;
+}
+
+export class OfflineRecognizerConfig {
+  public featConfig: FeatureConfig = new FeatureConfig();
+  public modelConfig: OfflineModelConfig = new OfflineModelConfig();
+  public lmConfig: OfflineLMConfig = new OfflineLMConfig();
+  public decodingMethod: string = "greedy_search";
+  public maxActivePaths: number = 4;
+  public hotwordsFfile: string = '';
+  public hotwordsScore: number = 1.5;
+  public ruleFsts: string = '';
+  public ruleFars: string = '';
+  public blankPenalty: number = 0;
+}
+
+export class OfflineRecognizerResult {
+  public text: string = '';
+  public timestamps: number[] = [];
+  public tokens: string[] = [];
+  public json = '';
+  public lang: string = '';
+  public emotion: string = '';
+  public event: string = '';
+}
+
+interface OfflineRecognizerResultJson {
+  text: string;
+  timestamps: number[];
+  tokens: string[];
+  lang: string;
+  emotion: string;
+  event: string;
+}
+
+export class OfflineRecognizer {
+  public handle: object;
+  public config: OfflineRecognizerConfig;
+
+  constructor(config: OfflineRecognizerConfig, mgr?: object) {
+    this.handle = createOfflineRecognizer(config, mgr);
+    this.config = config
+  }
+
+  createStream(): OfflineStream {
+    const handle: object = createOfflineStream(this.handle);
+    return new OfflineStream(handle);
+  }
+
+  decode(stream: OfflineStream) {
+    decodeOfflineStream(this.handle, stream.handle);
+  }
+
+  getResult(stream: OfflineStream): OfflineRecognizerResult {
+    const jsonStr: string = getOfflineStreamResultAsJson(stream.handle);
+
+    let o = JSON.parse(jsonStr) as OfflineRecognizerResultJson;
+
+    const r = new OfflineRecognizerResult()
+    r.text = o.text
+    r.timestamps = o.timestamps;
+    r.tokens = o.tokens;
+    r.json = jsonStr;
+    r.lang = o.lang;
+    r.emotion = o.emotion;
+    r.event = o.event;
+
+    return r;
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets
new file mode 100644
index 0000000000..176da87a58
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets
@@ -0,0 +1,84 @@
+import {
+  createOfflineSpeakerDiarization,
+  getOfflineSpeakerDiarizationSampleRate,
+  offlineSpeakerDiarizationProcess,
+  offlineSpeakerDiarizationProcessAsync,
+  offlineSpeakerDiarizationSetConfig,
+} from 'libsherpa_onnx.so';
+
+import { SpeakerEmbeddingExtractorConfig } from './SpeakerIdentification';
+
+export class OfflineSpeakerSegmentationPyannoteModelConfig {
+  public model: string = '';
+}
+
+export class OfflineSpeakerSegmentationModelConfig {
+  public pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = new OfflineSpeakerSegmentationPyannoteModelConfig();
+  public numThreads: number = 1;
+  public debug: boolean = false;
+  public provider: string = 'cpu';
+}
+
+export class FastClusteringConfig {
+  public numClusters: number = -1;
+  public threshold: number = 0.5;
+}
+
+export class OfflineSpeakerDiarizationConfig {
+  public segmentation: OfflineSpeakerSegmentationModelConfig = new OfflineSpeakerSegmentationModelConfig();
+  public embedding: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
+  public clustering: FastClusteringConfig = new FastClusteringConfig();
+  public minDurationOn: number = 0.2;
+  public minDurationOff: number = 0.5;
+}
+
+export class OfflineSpeakerDiarizationSegment {
+  // in seconds
+  public start: number = 0;
+  // in seconds
+  public end: number = 0;
+  // ID of the speaker; count from 0
+  public speaker: number = 0;
+}
+
+export class OfflineSpeakerDiarization {
+  public config: OfflineSpeakerDiarizationConfig;
+  public sampleRate: number;
+  private handle: object;
+
+  constructor(config: OfflineSpeakerDiarizationConfig, mgr?: object) {
+    this.handle = createOfflineSpeakerDiarization(config, mgr);
+    this.config = config;
+
+    this.sampleRate = getOfflineSpeakerDiarizationSampleRate(this.handle);
+  }
+
+  /**
+   * samples is a 1-d float32 array. Each element of the array should be
+   * in the range [-1, 1].
+   *
+   * We assume its sample rate equals to this.sampleRate.
+   *
+   * Returns an array of object, where an object is
+   *
+   *  {
+   *    "start": start_time_in_seconds,
+   *    "end": end_time_in_seconds,
+   *    "speaker": an_integer,
+   *  }
+   */
+  process(samples: Float32Array): OfflineSpeakerDiarizationSegment[] {
+    return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment[];
+  }
+
+  processAsync(samples: Float32Array, callback: (numProcessedChunks: number,
+    numTotalChunks: number) => void): Promise<OfflineSpeakerDiarizationSegment[]> {
+    return offlineSpeakerDiarizationProcessAsync(this.handle, samples,
+      callback) as Promise<OfflineSpeakerDiarizationSegment[]>;
+  }
+
+  setConfig(config: OfflineSpeakerDiarizationConfig) {
+    offlineSpeakerDiarizationSetConfig(this.handle, config);
+    this.config.clustering = config.clustering;
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
new file mode 100644
index 0000000000..a7d855b38f
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
@@ -0,0 +1,101 @@
+import {
+  createOfflineTts,
+  getOfflineTtsNumSpeakers,
+  getOfflineTtsSampleRate,
+  offlineTtsGenerate,
+  offlineTtsGenerateAsync,
+} from 'libsherpa_onnx.so';
+
+export class OfflineTtsVitsModelConfig {
+  public model: string = '';
+  public lexicon: string = '';
+  public tokens: string = '';
+  public dataDir: string = '';
+  public dictDir: String = '';
+  public noiseScale: number = 0.667;
+  public noiseScaleW: number = 0.8;
+  public lengthScale: number = 1.0;
+}
+
+export class OfflineTtsMatchaModelConfig {
+  public acousticModel: string = '';
+  public vocoder: string = '';
+  public lexicon: string = '';
+  public tokens: string = '';
+  public dataDir: string = '';
+  public dictDir: String = '';
+  public noiseScale: number = 0.667;
+  public lengthScale: number = 1.0;
+}
+
+export class OfflineTtsKokoroModelConfig {
+  public model: string = '';
+  public voices: string = '';
+  public tokens: string = '';
+  public dataDir: string = '';
+  public lengthScale: number = 1.0;
+  public dictDir: string = '';
+  public lexicon: string = '';
+}
+
+export class OfflineTtsModelConfig {
+  public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
+  public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
+  public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
+  public numThreads: number = 1;
+  public debug: boolean = false;
+  public provider: string = 'cpu';
+}
+
+export class OfflineTtsConfig {
+  public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
+  public ruleFsts: string = '';
+  public ruleFars: string = '';
+  public maxNumSentences: number = 1;
+}
+
+export class TtsOutput {
+  public samples: Float32Array = new Float32Array(0);
+  public sampleRate: number = 0;
+}
+
+interface TtsCallbackData {
+  samples: Float32Array;
+  progress: number;
+}
+
+export class TtsInput {
+  public text: string = '';
+  public sid: number = 0;
+  public speed: number = 1.0;
+  public callback?: (data: TtsCallbackData) => number;
+}
+
+export class OfflineTts {
+  public config: OfflineTtsConfig;
+  public numSpeakers: number;
+  public sampleRate: number;
+  private handle: object;
+
+  constructor(config: OfflineTtsConfig, mgr?: object) {
+    this.handle = createOfflineTts(config, mgr);
+    this.config = config;
+
+    this.numSpeakers = getOfflineTtsNumSpeakers(this.handle);
+    this.sampleRate = getOfflineTtsSampleRate(this.handle);
+  }
+
+  /*
+   input obj: {text: "xxxx", sid: 0, speed: 1.0}
+   where text is a string, sid is a int32, speed is a float
+
+   return an object {samples: Float32Array, sampleRate: <a number>}
+   */
+  generate(input: TtsInput): TtsOutput {
+    return offlineTtsGenerate(this.handle, input) as TtsOutput;
+  }
+
+  generateAsync(input: TtsInput): Promise<TtsOutput> {
+    return offlineTtsGenerateAsync(this.handle, input);
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/SpeakerIdentification.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/SpeakerIdentification.ets
new file mode 100644
index 0000000000..50868dfb07
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/SpeakerIdentification.ets
@@ -0,0 +1,135 @@
+import {
+  createSpeakerEmbeddingExtractor,
+  createSpeakerEmbeddingManager,
+  speakerEmbeddingExtractorComputeEmbedding,
+  speakerEmbeddingExtractorCreateStream,
+  speakerEmbeddingExtractorDim,
+  speakerEmbeddingExtractorIsReady,
+  speakerEmbeddingManagerAdd,
+  speakerEmbeddingManagerAddListFlattened,
+  speakerEmbeddingManagerContains,
+  speakerEmbeddingManagerGetAllSpeakers,
+  speakerEmbeddingManagerNumSpeakers,
+  speakerEmbeddingManagerRemove,
+  speakerEmbeddingManagerSearch,
+  speakerEmbeddingManagerVerify
+} from 'libsherpa_onnx.so';
+import { OnlineStream } from './StreamingAsr';
+
+export class SpeakerEmbeddingExtractorConfig {
+  public model: string = '';
+  public numThreads: number = 1;
+  public debug: boolean = false;
+  public provider: string = 'cpu';
+}
+
+export class SpeakerEmbeddingExtractor {
+  public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
+  public dim: number;
+  private handle: object;
+
+  constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) {
+    this.handle = createSpeakerEmbeddingExtractor(config, mgr);
+    this.config = config;
+    this.dim = speakerEmbeddingExtractorDim(this.handle);
+  }
+
+  createStream(): OnlineStream {
+    return new OnlineStream(speakerEmbeddingExtractorCreateStream(this.handle));
+  }
+
+  isReady(stream: OnlineStream): boolean {
+    return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
+  }
+
+  compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
+    return speakerEmbeddingExtractorComputeEmbedding(this.handle, stream.handle, enableExternalBuffer);
+  }
+}
+
+function flatten(arrayList: Float32Array[]): Float32Array {
+  let n = 0;
+  for (let i = 0; i < arrayList.length; ++i) {
+    n += arrayList[i].length;
+  }
+  let ans = new Float32Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < arrayList.length; ++i) {
+    ans.set(arrayList[i], offset);
+    offset += arrayList[i].length;
+  }
+  return ans;
+}
+
+interface SpeakerNameWithEmbedding {
+  name: string;
+  v: Float32Array;
+}
+
+interface SpeakerNameWithEmbeddingList {
+  name: string;
+  v: Float32Array[];
+}
+
+interface SpeakerNameWithEmbeddingN {
+  name: string;
+  vv: Float32Array;
+  n: number;
+}
+
+interface EmbeddingWithThreshold {
+  v: Float32Array;
+  threshold: number;
+}
+
+interface SpeakerNameEmbeddingThreshold {
+  name: string;
+  v: Float32Array;
+  threshold: number;
+}
+
+export class SpeakerEmbeddingManager {
+  public dim: number;
+  private handle: object;
+
+  constructor(dim: number) {
+    this.handle = createSpeakerEmbeddingManager(dim);
+    this.dim = dim;
+  }
+
+  add(speaker: SpeakerNameWithEmbedding): boolean {
+    return speakerEmbeddingManagerAdd(this.handle, speaker);
+  }
+
+  addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
+    const c: SpeakerNameWithEmbeddingN = {
+      name: speaker.name, vv: flatten(speaker.v), n: speaker.v.length,
+    };
+    return speakerEmbeddingManagerAddListFlattened(this.handle, c);
+  }
+
+  remove(name: string): boolean {
+    return speakerEmbeddingManagerRemove(this.handle, name);
+  }
+
+  search(obj: EmbeddingWithThreshold): string {
+    return speakerEmbeddingManagerSearch(this.handle, obj);
+  }
+
+  verify(obj: SpeakerNameEmbeddingThreshold): boolean {
+    return speakerEmbeddingManagerVerify(this.handle, obj);
+  }
+
+  contains(name: string): boolean {
+    return speakerEmbeddingManagerContains(this.handle, name);
+  }
+
+  getNumSpeakers(): number {
+    return speakerEmbeddingManagerNumSpeakers(this.handle);
+  }
+
+  getAllSpeakerNames(): string[] {
+    return speakerEmbeddingManagerGetAllSpeakers(this.handle);
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets
new file mode 100644
index 0000000000..f8b3c61e46
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets
@@ -0,0 +1,140 @@
+import {
+  acceptWaveformOnline,
+  createOnlineRecognizer,
+  createOnlineStream,
+  decodeOnlineStream,
+  getOnlineStreamResultAsJson,
+  inputFinished,
+  isEndpoint,
+  isOnlineStreamReady,
+  reset,
+} from 'libsherpa_onnx.so';
+
+import { FeatureConfig, Samples } from './NonStreamingAsr';
+
+export class OnlineStream {
+  public handle: object;
+
+  constructor(handle: object) {
+    this.handle = handle;
+  }
+
+  // obj is {samples: samples, sampleRate: sampleRate}
+  // samples is a float32 array containing samples in the range [-1, 1]
+  // sampleRate is a number
+  acceptWaveform(obj: Samples) {
+    acceptWaveformOnline(this.handle, obj)
+  }
+
+  inputFinished() {
+    inputFinished(this.handle)
+  }
+}
+
+export class OnlineTransducerModelConfig {
+  public encoder: string = '';
+  public decoder: string = '';
+  public joiner: string = '';
+}
+
+export class OnlineParaformerModelConfig {
+  public encoder: string = '';
+  public decoder: string = '';
+}
+
+export class OnlineZipformer2CtcModelConfig {
+  public model: string = '';
+}
+
+export class OnlineModelConfig {
+  public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig();
+  public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig();
+  public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
+  public tokens: string = '';
+  public numThreads: number = 1;
+  public provider: string = 'cpu';
+  public debug: boolean = false;
+  public modelType: string = '';
+  public modelingUnit: string = "cjkchar";
+  public bpeVocab: string = '';
+}
+
+export class OnlineCtcFstDecoderConfig {
+  public graph: string = '';
+  public maxActive: number = 3000;
+}
+
+export class OnlineRecognizerConfig {
+  public featConfig: FeatureConfig = new FeatureConfig();
+  public modelConfig: OnlineModelConfig = new OnlineModelConfig();
+  public decodingMethod: string = 'greedy_search';
+  public maxActivePaths: number = 4;
+  public enableEndpoint: boolean = false;
+  public rule1MinTrailingSilence: number = 2.4;
+  public rule2MinTrailingSilence: number = 1.2;
+  public rule3MinUtteranceLength: number = 20;
+  public hotwordsFile: string = '';
+  public hotwordsScore: number = 1.5;
+  public ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
+  public ruleFsts: string = '';
+  public ruleFars: string = '';
+  public blankPenalty: number = 0;
+}
+
+interface OnlineRecognizerResultJson {
+  text: string;
+  timestamps: number[];
+  tokens: string[];
+}
+
+export class OnlineRecognizerResult {
+  public text: string = '';
+  public tokens: string[] = [];
+  public timestamps: number[] = [];
+  public json: string = '';
+}
+
+export class OnlineRecognizer {
+  public handle: object;
+  public config: OnlineRecognizerConfig
+
+  constructor(config: OnlineRecognizerConfig, mgr?: object) {
+    this.handle = createOnlineRecognizer(config, mgr);
+    this.config = config
+  }
+
+  createStream(): OnlineStream {
+    const handle: object = createOnlineStream(this.handle);
+    return new OnlineStream(handle);
+  }
+
+  isReady(stream: OnlineStream): boolean {
+    return isOnlineStreamReady(this.handle, stream.handle);
+  }
+
+  decode(stream: OnlineStream) {
+    decodeOnlineStream(this.handle, stream.handle);
+  }
+
+  isEndpoint(stream: OnlineStream): boolean {
+    return isEndpoint(this.handle, stream.handle);
+  }
+
+  reset(stream: OnlineStream) {
+    reset(this.handle, stream.handle);
+  }
+
+  getResult(stream: OnlineStream): OnlineRecognizerResult {
+    const jsonStr: string = getOnlineStreamResultAsJson(this.handle, stream.handle);
+
+    let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson;
+
+    const r = new OnlineRecognizerResult()
+    r.text = o.text
+    r.timestamps = o.timestamps;
+    r.tokens = o.tokens;
+    r.json = jsonStr;
+
+    return r;
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets
new file mode 100644
index 0000000000..cae2cbf139
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets
@@ -0,0 +1,130 @@
+import {
+  circularBufferGet,
+  circularBufferHead,
+  circularBufferPop,
+  circularBufferPush,
+  circularBufferReset,
+  circularBufferSize,
+  createCircularBuffer,
+  createVoiceActivityDetector,
+  voiceActivityDetectorAcceptWaveform,
+  voiceActivityDetectorClear,
+  voiceActivityDetectorFlush,
+  voiceActivityDetectorFront,
+  voiceActivityDetectorIsDetected,
+  voiceActivityDetectorIsEmpty,
+  voiceActivityDetectorPop,
+  voiceActivityDetectorReset,
+} from 'libsherpa_onnx.so';
+
+export class SileroVadConfig {
+  public model: string;
+  public threshold: number;
+  public minSpeechDuration: number;
+  public minSilenceDuration: number;
+  public windowSize: number;
+
+  public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
+    windowSize: number) {
+    this.model = model;
+    this.threshold = threshold;
+    this.minSpeechDuration = minSpeechDuration;
+    this.minSilenceDuration = minSilenceDuration;
+    this.windowSize = windowSize;
+  }
+}
+
+export class VadConfig {
+  public sileroVad: SileroVadConfig;
+  public sampleRate: number;
+  public debug: boolean;
+  public numThreads: number;
+
+  public constructor(sileroVad: SileroVadConfig, sampleRate: number, debug: boolean, numThreads: number) {
+    this.sileroVad = sileroVad;
+    this.sampleRate = sampleRate;
+    this.debug = debug;
+    this.numThreads = numThreads;
+  }
+}
+
+export class CircularBuffer {
+  private handle: object;
+
+  constructor(capacity: number) {
+    this.handle = createCircularBuffer(capacity);
+  }
+
+  // samples is a float32 array
+  push(samples: Float32Array) {
+    circularBufferPush(this.handle, samples);
+  }
+
+  // return a float32 array
+  get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array {
+    return circularBufferGet(this.handle, startIndex, n, enableExternalBuffer);
+  }
+
+  pop(n: number) {
+    circularBufferPop(this.handle, n);
+  }
+
+  size(): number {
+    return circularBufferSize(this.handle);
+  }
+
+  head(): number {
+    return circularBufferHead(this.handle);
+  }
+
+  reset() {
+    circularBufferReset(this.handle);
+  }
+}
+
+export interface SpeechSegment {
+  samples: Float32Array;
+  start: number;
+}
+
+export class Vad {
+  public config: VadConfig;
+  private handle: object;
+
+  constructor(config: VadConfig, bufferSizeInSeconds?: number, mgr?: object) {
+    this.handle = createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
+    this.config = config;
+  }
+
+  acceptWaveform(samples: Float32Array): void {
+    voiceActivityDetectorAcceptWaveform(this.handle, samples);
+  }
+
+  isEmpty(): boolean {
+    return voiceActivityDetectorIsEmpty(this.handle);
+  }
+
+  isDetected(): boolean {
+    return voiceActivityDetectorIsDetected(this.handle);
+  }
+
+  pop(): void {
+    voiceActivityDetectorPop(this.handle);
+  }
+
+  clear(): void {
+    voiceActivityDetectorClear(this.handle);
+  }
+
+  front(enableExternalBuffer = true): SpeechSegment {
+    return voiceActivityDetectorFront(this.handle, enableExternalBuffer);
+  }
+
+  reset(): void {
+    voiceActivityDetectorReset(this.handle);
+  }
+
+  flush(): void {
+    voiceActivityDetectorFlush(this.handle);
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/module.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/module.json5
new file mode 100644
index 0000000000..1db0eb692d
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/module.json5
@@ -0,0 +1,11 @@
+{
+  "module": {
+    "name": "sherpa_onnx",
+    "type": "har",
+    "deviceTypes": [
+      "default",
+      "tablet",
+      "2in1"
+    ]
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..f51a9c8461
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "page_show",
+      "value": "page from package"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..f51a9c8461
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/en_US/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "page_show",
+      "value": "page from package"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..f51a9c8461
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "page_show",
+      "value": "page from package"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/module.json5
new file mode 100644
index 0000000000..b7a1665a71
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "sherpa_onnx_test",
+    "type": "feature",
+    "deviceTypes": [
+      "default",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/List.test.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/.gitignore b/harmony-os/SherpaOnnxSpeakerDiarization/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/app.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/app.json5
new file mode 100644
index 0000000000..93816310b8
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx.speaker.diarization",
+    "vendor": "example",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..a9d8f06254
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxSpeakerDiarization"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/build-profile.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/build-profile.json5
new file mode 100644
index 0000000000..8e63d97684
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/build-profile.json5
@@ -0,0 +1,40 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/code-linter.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/.gitignore b/harmony-os/SherpaOnnxSpeakerDiarization/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/build-profile.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/entry/build-profile.json5
new file mode 100644
index 0000000000..cb3e674c96
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/build-profile.json5
@@ -0,0 +1,33 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "sourceOption": {
+      "workers": [
+        './src/main/ets/workers/SpeakerDiarizationWorker.ets'
+      ]
+    }
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/hvigorfile.ts b/harmony-os/SherpaOnnxSpeakerDiarization/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxSpeakerDiarization/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5
new file mode 100644
index 0000000000..d434966004
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5
@@ -0,0 +1,12 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {
+    "sherpa_onnx": "1.10.42"
+  }
+}
+
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..f6fc537b46
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,194 @@
+import { LengthUnit, promptAction } from '@kit.ArkUI';
+import worker, { MessageEvents } from '@ohos.worker';
+import { BusinessError, pasteboard } from '@kit.BasicServicesKit';
+import { picker } from '@kit.CoreFileKit';
+
+
+@Entry
+@Component
+struct Index {
+  @State title: string = 'Next-gen Kaldi: Speaker Diarization';
+  @State titleFontSize: number = 15;
+  @State currentIndex: number = 0;
+  @State resultForFile: string = '';
+  @State resultForMic: string = '';
+  @State progressForFile: number = 0;
+  @State selectFileBtnEnabled: boolean = false;
+  @State copyBtnForFileEnabled: boolean = false;
+  private controller: TabsController = new TabsController();
+  private workerInstance?: worker.ThreadWorker
+  private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets'
+  private numSpeakers: string = '-1';
+
+  @Builder
+  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
+    Column() {
+      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
+      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
+    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
+      this.currentIndex = targetIndex;
+      this.controller.changeIndex(this.currentIndex);
+    })
+  }
+
+  aboutToAppear(): void {
+    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
+      name: 'Streaming ASR worker'
+    });
+
+    this.workerInstance.onmessage = (e: MessageEvents) => {
+      const msgType = e.data['msgType'] as string;
+
+      if (msgType != 'speaker-diarization-file-progress') {
+        console.log(`received msg from worker: ${msgType}`);
+      }
+
+      if (msgType == 'init-speaker-diarization-done') {
+        console.log('Speaker diarization initialized successfully');
+
+        this.resultForFile = 'Initialization finished.\nPlease select a .wav file.';
+        this.resultForMic = 'Initialization finished.\nPlease click the button Start recording.';
+
+        this.selectFileBtnEnabled = true;
+      }
+
+      if (msgType == 'speaker-diarization-file-progress') {
+        this.progressForFile = e.data['progress'] as number;
+      }
+
+      if (msgType == 'speaker-diarization-file-done') {
+        const result = e.data['result'] as string;
+        this.resultForFile = result;
+
+        this.selectFileBtnEnabled = true;
+        this.copyBtnForFileEnabled = true;
+      }
+    };
+
+    const context = getContext();
+    this.workerInstance.postMessage({ msgType: 'init-speaker-diarization', context });
+    console.log('initializing');
+    this.resultForFile = 'Initializing models. Please wait';
+    this.resultForMic = this.resultForFile;
+  }
+
+  build() {
+    Column() {
+      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            Row({ space: 10 }) {
+              Text(`Number of speakers`).width('60%')
+
+              TextInput({ text: this.numSpeakers }).onChange((text) => {
+                this.numSpeakers = text.trim();
+              }).width('20%')
+            }.justifyContent(FlexAlign.Center)
+
+            Row({ space: 10 }) {
+              Button('Select .wav file (16kHz) ').enabled(this.selectFileBtnEnabled).onClick(() => {
+                this.resultForFile = '';
+                this.progressForFile = 0;
+                this.copyBtnForFileEnabled = false;
+
+                let numSpeakers = parseInt(this.numSpeakers);
+                if (numSpeakers.toString() != this.numSpeakers) {
+                  this.resultForFile =
+                    'Please input a valid value for the number of speakers in the .wav file you are going to select';
+                  return;
+                }
+
+                if (numSpeakers < 1) {
+                  this.resultForFile =
+                    'Please input a positive value for the number of speakers in the .wav file you are going to select';
+                  return;
+                }
+
+                this.selectFileBtnEnabled = false;
+
+                const documentSelectOptions = new picker.DocumentSelectOptions();
+                documentSelectOptions.maxSelectNumber = 1;
+                documentSelectOptions.fileSuffixFilters = ['.wav'];
+                const documentViewPicker = new picker.DocumentViewPicker();
+
+                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
+                  console.log(`select file result: ${result}`);
+
+                  if (!result[0]) {
+                    this.resultForFile = 'Please select a file to decode';
+                    this.selectFileBtnEnabled = true;
+                    return;
+                  }
+
+                  if (this.workerInstance) {
+                    this.workerInstance.postMessage({
+                      msgType: 'speaker-diarization-file', filename: result[0], numSpeakers,
+                    });
+                    this.resultForFile = `Decoding ${result[0]} ... ...`;
+                  } else {
+                    console.log(`this worker instance is undefined ${this.workerInstance}`);
+                  }
+                }).catch((err: BusinessError) => {
+                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
+                  this.selectFileBtnEnabled = true;
+                })
+              })
+              Button('Copy results')
+                .enabled(this.copyBtnForFileEnabled)
+                .onClick(() => { // See https://developer.huawei.com/consumer/cn/doc/harmonyos-faqs/faqs-arkui-308-V5
+                  const pasteboardData = pasteboard.createData(pasteboard.MIMETYPE_TEXT_PLAIN, this.resultForFile);
+                  const systemPasteboard = pasteboard.getSystemPasteboard();
+                  systemPasteboard.setData(pasteboardData);
+                  systemPasteboard.getData().then((data) => {
+                    if (data) {
+                      promptAction.showToast({ message: 'Result copied.' });
+                    } else {
+                      promptAction.showToast({ message: 'Failed to copy' });
+                    }
+                  })
+                })
+            }
+
+            if (this.progressForFile > 0) {
+              Row() {
+                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
+                  .width('80%')
+                  .height(20)
+                  .value(this.progressForFile);
+
+                Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
+              }.width('100%').justifyContent(FlexAlign.Center)
+            }
+
+            TextArea({ text: this.resultForFile })
+              .lineSpacing({ value: 10, unit: LengthUnit.VP })
+              .width('100%')
+              .height('100%')
+          }
+        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            TextArea({
+              text: `
+Everyting is open-sourced.
+
+It runs locally, without accessing the network
+
+See also https://github.com/k2-fsa/sherpa-onnx
+
+新一代 Kaldi QQ 和微信交流群: 请看
+
+https://k2-fsa.github.io/sherpa/social-groups.html
+
+微信公众号: 新一代 Kaldi
+            `
+            }).width('100%').height('100%').focusable(false)
+          }.justifyContent(FlexAlign.Start)
+        }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
+      }.scrollable(false)
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets
new file mode 100644
index 0000000000..4a297ec877
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets
@@ -0,0 +1,189 @@
+import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
+import {
+  OfflineSpeakerDiarization,
+  OfflineSpeakerDiarizationConfig,
+  OfflineSpeakerDiarizationSegment,
+  readWaveFromBinary,
+  Samples
+} from 'sherpa_onnx';
+import { fileIo } from '@kit.CoreFileKit';
+
+const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
+
+let sd: OfflineSpeakerDiarization;
+let useAsync: boolean = true;
+
+function readWave(filename: string): Samples {
+  const fp = fileIo.openSync(filename);
+  const stat = fileIo.statSync(fp.fd);
+  const arrayBuffer = new ArrayBuffer(stat.size);
+  fileIo.readSync(fp.fd, arrayBuffer);
+  const data: Uint8Array = new Uint8Array(arrayBuffer);
+  return readWaveFromBinary(data) as Samples;
+}
+
+function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarization {
+  const config: OfflineSpeakerDiarizationConfig = new OfflineSpeakerDiarizationConfig();
+
+  // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+  // to download models.
+  // Make sure you have placed it inside the directory
+  // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
+  //
+  // Also, please delete unused files to reduce the size of the app
+  config.segmentation.pyannote.model = 'sherpa-onnx-pyannote-segmentation-3-0/model.int8.onnx';
+  config.segmentation.numThreads = 2;
+  config.segmentation.debug = true;
+
+  // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+  // to download models.
+  // Make sure you have placed it inside the directory
+  // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
+  config.embedding.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
+  config.embedding.numThreads = 2;
+  config.embedding.debug = true;
+
+  config.minDurationOn = 0.2;
+  config.minDurationOff = 0.5;
+  return new OfflineSpeakerDiarization(config, context.resourceManager);
+
+  // For the above two models files, you should have the following directory structure
+  /*
+  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
+  /Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
+  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
+  total 77336
+  -rw-r--r--  1 fangjun  staff    38M Dec 10 16:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+  drwxr-xr-x  3 fangjun  staff    96B Dec 10 19:36 sherpa-onnx-pyannote-segmentation-3-0
+  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
+  .
+  ├── 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+  └── sherpa-onnx-pyannote-segmentation-3-0
+      └── model.int8.onnx
+
+  1 directory, 2 files
+
+  (Note that we have kept only model.int8.onnx and removed all other files
+  from sherpa-onnx-pyannote-segmentation-3-0
+  )
+   */
+}
+
+/**
+ * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessage = (e: MessageEvents) => {
+  const msgType = e.data['msgType'] as string;
+
+  console.log(`from the main thread, msg-type: ${msgType}`);
+  if (msgType == 'init-speaker-diarization' && !sd) {
+    const context: Context = e.data['context'] as Context;
+    sd = initOfflineSpeakerDiarization(context);
+    workerPort.postMessage({ msgType: 'init-speaker-diarization-done' });
+    console.log('Init sd done');
+  }
+
+  if (msgType == 'speaker-diarization-file') {
+    const filename = e.data['filename'] as string;
+    const numSpeakers = e.data['numSpeakers'] as number;
+    const wave = readWave(filename);
+    let result = '';
+    if (wave == undefined || wave == null) {
+      result = `Failed to read ${filename}`;
+
+      workerPort.postMessage({
+        msgType: 'speaker-diarization-file-done', result
+      });
+      return;
+    }
+
+    if (wave.sampleRate != sd.sampleRate) {
+      result = `Expected sample rate: ${sd.sampleRate}`;
+      result += '\n';
+      result += `Sample rate in file ${filename} is ${wave.sampleRate}`;
+
+      workerPort.postMessage({
+        msgType: 'speaker-diarization-file-done', result
+      });
+
+      return;
+    }
+
+    const duration = wave.samples.length / wave.sampleRate;
+    console.log(`Processing ${filename} of ${duration} seconds`);
+
+    // You can remove this if statement if you want
+    if (duration < 0.3) {
+      result = `${filename} has only ${duration} seconds. Please use a longer file`;
+
+      workerPort.postMessage({
+        msgType: 'speaker-diarization-file-done', result
+      });
+      return;
+    }
+    sd.config.clustering.numClusters = numSpeakers;
+    sd.setConfig(sd.config);
+
+    if (useAsync) {
+      sd.processAsync(wave.samples, (numProcessedChunks: number, numTotalChunks: number) => {
+        const progress = numProcessedChunks / numTotalChunks * 100;
+        workerPort.postMessage({
+          msgType: 'speaker-diarization-file-progress', progress
+        });
+      }).then((r: OfflineSpeakerDiarizationSegment[]) => {
+        console.log(`r is ${r.length}, ${r}`);
+
+        for (const s of r) {
+          const start: string = s.start.toFixed(3);
+          const end: string = s.end.toFixed(3);
+          result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
+          console.log(`result: ${result}`);
+        }
+
+        if (r.length == 0) {
+          result = 'The result is empty';
+        }
+
+        workerPort.postMessage({
+          msgType: 'speaker-diarization-file-done', result
+        });
+      });
+    } else {
+      const r: OfflineSpeakerDiarizationSegment[] = sd.process(wave.samples)
+      console.log(`r is ${r.length}, ${r}`);
+      for (const s of r) {
+        const start: string = s.start.toFixed(3);
+        const end: string = s.end.toFixed(3);
+        result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
+        console.log(`result: ${result}`);
+      }
+
+      if (r.length == 0) {
+        result = 'The result is empty';
+      }
+
+      workerPort.postMessage({
+        msgType: 'speaker-diarization-file-done', result
+      });
+    }
+  }
+} /**
+ * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessageerror = (e: MessageEvents) => {
+}
+
+/**
+ * Defines the event handler to be called when an exception occurs during worker execution.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e error message
+ */
+workerPort.onerror = (e: ErrorEvent) => {
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/module.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/module.json5
new file mode 100644
index 0000000000..a1cea8b6a4
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/module.json5
@@ -0,0 +1,52 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..55c8939f39
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device speaker diarization with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device speaker diarization with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Speaker diarization"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_doc.svg b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_doc.svg
new file mode 100644
index 0000000000..ab6b1fd763
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_doc.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M6.22 1.01Q5.35 1.01 4.61 1.45Q3.86 1.9 3.42 2.64Q2.98 3.38 2.98 4.25L2.98 19.75Q2.98 20.64 3.42 21.38Q3.86 22.13 4.61 22.56Q5.35 22.99 6.22 22.99L17.76 22.99Q18.65 22.99 19.39 22.56Q20.14 22.13 20.58 21.38Q21.02 20.64 21.02 19.75L21.02 7.25L14.76 1.01L6.22 1.01ZM15.48 7.25Q15.17 7.25 14.95 7.02Q14.74 6.79 14.74 6.48L14.74 3.1L18.89 7.25L15.48 7.25ZM6.22 21.5Q5.5 21.5 4.98 20.99Q4.46 20.47 4.46 19.75L4.46 4.25Q4.46 3.53 4.98 3.01Q5.5 2.5 6.22 2.5L13.22 2.5L13.22 6.48Q13.22 7.1 13.52 7.62Q13.82 8.14 14.34 8.44Q14.86 8.74 15.48 8.74L19.51 8.74L19.51 19.75Q19.51 20.47 19 20.99Q18.48 21.5 17.76 21.5L6.22 21.5Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_mic.svg b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_mic.svg
new file mode 100644
index 0000000000..0aeb30d63b
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/icon_mic.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M12 3.46Q13.06 3.46 13.78 4.18Q14.5 4.9 14.5 5.95L14.5 11.21Q14.5 12.24 13.78 12.97Q13.06 13.7 12 13.7Q10.97 13.7 10.24 12.97Q9.5 12.24 9.5 11.21L9.5 5.95Q9.5 4.9 10.24 4.18Q10.97 3.46 12 3.46ZM12 1.94Q10.92 1.94 10 2.48Q9.07 3.02 8.53 3.95Q7.99 4.87 7.99 5.95L7.99 11.21Q7.99 12.29 8.53 13.21Q9.07 14.14 10 14.68Q10.92 15.22 12 15.22Q13.08 15.22 14 14.68Q14.93 14.14 15.47 13.21Q16.01 12.29 16.01 11.21L16.01 5.95Q16.01 4.87 15.47 3.95Q14.93 3.02 14 2.48Q13.08 1.94 12 1.94ZM19.51 11.23Q19.51 10.92 19.28 10.69Q19.06 10.46 18.74 10.46Q18.43 10.46 18.22 10.69Q18 10.92 18 11.23Q18 12.84 17.2 14.22Q16.39 15.6 15.01 16.4Q13.63 17.21 12 17.21Q10.37 17.21 8.99 16.4Q7.61 15.6 6.8 14.22Q6 12.84 6 11.23Q6 10.92 5.78 10.69Q5.57 10.46 5.26 10.46Q4.94 10.46 4.73 10.69Q4.51 10.92 4.51 11.23Q4.51 13.13 5.4 14.76Q6.29 16.39 7.84 17.44Q9.38 18.48 11.26 18.67L11.26 21.29Q11.26 21.6 11.47 21.82Q11.69 22.03 12 22.03Q12.31 22.03 12.53 21.82Q12.74 21.6 12.74 21.29L12.74 18.67Q14.62 18.48 16.16 17.44Q17.71 16.39 18.61 14.76Q19.51 13.13 19.51 11.23Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/info.svg b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/info.svg
new file mode 100644
index 0000000000..2210223f4f
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/info.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..55c8939f39
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device speaker diarization with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device speaker diarization with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Speaker diarization"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..d9180dfd12
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "新一代Kaldi: 本地说话人日志"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "新一代Kaldi: 本地说话人日志"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "说话人日志"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/hvigorfile.ts b/harmony-os/SherpaOnnxSpeakerDiarization/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/oh-package-lock.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerDiarization/oh-package.json5 b/harmony-os/SherpaOnnxSpeakerDiarization/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerDiarization/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/.gitignore b/harmony-os/SherpaOnnxSpeakerIdentification/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/app.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/app.json5
new file mode 100644
index 0000000000..650f50349e
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx.speaker.identification",
+    "vendor": "example",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..6204133f25
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxSpeakerIdentification"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/build-profile.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/build-profile.json5
new file mode 100644
index 0000000000..8e63d97684
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/build-profile.json5
@@ -0,0 +1,40 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/code-linter.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/.gitignore b/harmony-os/SherpaOnnxSpeakerIdentification/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/build-profile.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/build-profile.json5
new file mode 100644
index 0000000000..572de83dcd
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/build-profile.json5
@@ -0,0 +1,33 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "sourceOption": {
+      "workers": [
+        './src/main/ets/workers/SpeakerIdentificationWorker.ets'
+      ]
+    }
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/hvigorfile.ts b/harmony-os/SherpaOnnxSpeakerIdentification/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxSpeakerIdentification/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5
new file mode 100644
index 0000000000..5a9a425085
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5
@@ -0,0 +1,28 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+    "sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
+  },
+  "packages": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    },
+    "sherpa_onnx@sherpa_onnx_2.har": {
+      "name": "sherpa_onnx",
+      "version": "1.10.33",
+      "resolved": "sherpa_onnx_2.har",
+      "registryType": "local",
+      "dependencies": {
+        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package.json5
new file mode 100644
index 0000000000..1f07c029ee
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package.json5
@@ -0,0 +1,12 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {
+    "sherpa_onnx": "1.10.42",
+  }
+}
+
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..c17b8b0f67
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,490 @@
+import worker, { MessageEvents } from '@ohos.worker';
+import { audio } from '@kit.AudioKit';
+import { allAllowed, requestPermissions } from './Permission';
+import { Permissions } from '@kit.AbilityKit';
+import { picker } from '@kit.CoreFileKit';
+import fs from '@ohos.file.fs';
+
+
+
+function flatten(samples: Float32Array[]): Float32Array {
+  let n = 0;
+  for (let i = 0; i < samples.length; ++i) {
+    n += samples[i].length;
+  }
+
+  const ans: Float32Array = new Float32Array(n);
+  let offset: number = 0;
+  for (let i = 0; i < samples.length; ++i) {
+    ans.set(samples[i], offset);
+    offset += samples[i].length;
+  }
+
+  return ans;
+}
+
+function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
+  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
+
+  const header = new ArrayBuffer(44);
+  const view = new DataView(header);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true); // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
+  view.setUint32(8, 0x45564157, true); // format // //                      t m f
+  view.setUint32(12, 0x20746d66, true); // subchunk1ID
+  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true); // numChannels: 1 channel
+  view.setUint32(24, sampleRate, true); // sampleRate
+  view.setUint32(28, sampleRate * 2, true); // byteRate
+  view.setUint16(32, 2, true); // blockAlign
+  view.setUint16(34, 16, true); // bitsPerSample
+  view.setUint32(36, 0x61746164, true); // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true); // subchunk2Size
+
+  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
+  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
+
+  fs.closeSync(fp.fd);
+}
+
+function toInt16Samples(samples: Float32Array): Int16Array {
+  const int16Samples = new Int16Array(samples.length);
+  for (let i = 0; i < samples.length; ++i) {
+    let s = samples[i] * 32767;
+    s = s > 32767 ? 32767 : s;
+    s = s < -32768 ? -32768 : s;
+    int16Samples[i] = s;
+  }
+
+  return int16Samples;
+}
+
+@Entry
+@Component
+struct Index {
+  @State title: string = 'Next-gen Kaldi: Speaker Identification';
+  @State titleFontSize: number = 18;
+  private controller: TabsController = new TabsController();
+
+  @State currentIndex: number = 0;
+
+  private threshold: string = '0.5';
+
+  private workerInstance?: worker.ThreadWorker
+  private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'
+
+  @State allSpeakerNames: string[] = [];
+  private inputSpeakerName: string = '';
+
+  @State btnSaveAudioEnabled: boolean = false;
+  @State btnAddEnabled: boolean = false;
+
+  private sampleRate: number = 48000;
+  private sampleListForAdding: Float32Array[] = []
+  private sampleListForTesting: Float32Array[] = []
+  private mic?: audio.AudioCapturer;
+
+  @State infoHome: string = '';
+  @State infoAdd: string = '';
+
+  @State micBtnCaptionForAdding: string = 'Start recording';
+  @State micStartedForAdding: boolean = false;
+  @State micBtnEnabledForAdding: boolean = true;
+
+  @State micBtnCaptionForTesting: string = 'Start recording';
+  @State micStartedForTesting: boolean = false;
+  @State micBtnEnabledForTesting: boolean = true;
+
+  async initMic() {
+    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
+    let allowed: boolean = await allAllowed(permissions);
+    if (!allowed) {
+      console.log("request to access the microphone");
+      const status: boolean = await requestPermissions(permissions);
+
+      if (!status) {
+        console.error('access to microphone is denied')
+        this.infoHome = "Failed to get microphone permission. Please retry";
+        this.infoAdd = this.infoHome;
+        return;
+      }
+
+      allowed = await allAllowed(permissions);
+      if (!allowed) {
+        console.error('failed to get microphone permission');
+        this.infoHome = "Failed to get microphone permission. Please retry";
+        this.infoAdd = this.infoHome;
+        return;
+      }
+    } else {
+      console.log("allowed to access microphone");
+    }
+
+    const audioStreamInfo: audio.AudioStreamInfo = {
+      samplingRate: this.sampleRate,
+      channels: audio.AudioChannel.CHANNEL_1,
+      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
+      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
+    };
+
+    const audioCapturerInfo: audio.AudioCapturerInfo = {
+      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
+    };
+
+    const audioCapturerOptions: audio.AudioCapturerOptions = {
+      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
+
+    };
+    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
+      if (err) {
+        console.error(`error code is ${err.code}, error message is ${err.message}`);
+        this.infoHome = 'Failed to init microphone';
+        this.infoAdd = this.infoHome;
+      } else {
+        console.info(`init mic successfully`);
+        this.mic = data;
+        this.mic.on('readData', this.micCallback);
+      }
+    });
+  }
+
+  async aboutToAppear() {
+    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
+      name: 'Speaker identification worker'
+    });
+
+    this.workerInstance.onmessage = (e: MessageEvents) => {
+      const msgType = e.data['msgType'] as string;
+      console.log(`received msg from worker: ${msgType}`);
+
+      if (msgType == 'manager-all-speaker-names') {
+        this.allSpeakerNames = e.data['allSpeakers'] as string[];
+      }
+
+      if (msgType == 'manager-add-speaker-done') {
+        const ok: boolean = e.data['ok'] as boolean;
+        const status: string = e.data['status'] as string;
+        this.infoAdd += '\n' + status;
+
+        if (ok) {
+          this.sampleListForAdding = [];
+          this.btnSaveAudioEnabled = false;
+          this.btnAddEnabled = false;
+        }
+      }
+
+      if (msgType == 'manager-search-speaker-done') {
+        const name = e.data['name'] as string;
+        this.infoHome = name;
+      }
+    };
+
+    this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});
+
+    await this.initMic();
+  }
+
+  @Builder
+  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
+    Column() {
+      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
+      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
+    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
+      this.currentIndex = targetIndex;
+      this.controller.changeIndex(this.currentIndex);
+    })
+  }
+
+  build() {
+    Column() {
+      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            Row() {
+              Text('Similary threshold').width('60%');
+
+              TextInput({ text: this.threshold }).onChange((text) => {
+                this.threshold = text.trim();
+              }).width('20%')
+            }
+            Row() {
+              Button(this.micBtnCaptionForTesting)
+                .enabled(this.micBtnEnabledForTesting)
+                .onClick(()=>{
+                  if (this.allSpeakerNames.length == 0) {
+                    this.infoHome = 'There are no speakers registered. Please add them first';
+                    return;
+                  }
+
+                  let threshold = parseFloat(this.threshold);
+                  if (isNaN(threshold)) {
+                    this.infoHome = 'Please enter a valid threshold';
+                    return;
+                  }
+
+                  if (threshold <= 0) {
+                    this.infoHome = 'Please enter a positive threshold';
+                    return;
+                  }
+                  console.log(`threshold: ${threshold}`);
+
+                  if (this.micStartedForTesting) {
+                    this.micStartedForTesting = false;
+                    this.micBtnCaptionForTesting = 'Start';
+                    this.micBtnEnabledForAdding = true;
+                    this.mic?.stop();
+
+                    const samples = flatten(this.sampleListForTesting);
+                    const duration = samples.length / this.sampleRate;
+                    if (duration < 0.5) {
+                      this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
+                      return;
+                    }
+                    if (this.workerInstance) {
+                      this.workerInstance.postMessage({
+                        msgType: 'manager-search-speaker',
+                        samples: samples,
+                        sampleRate: this.sampleRate,
+                        threshold,
+                      });
+                    }
+                  } else {
+                    this.sampleListForTesting = [];
+                    this.micStartedForTesting = true;
+                    this.micBtnCaptionForTesting = 'Stop';
+                    this.micBtnEnabledForAdding = false;
+                    this.mic?.start();
+                    this.infoHome = `Use threshold: ${threshold}`;
+                    this.infoHome += '\nPlease speak and then click Stop';
+                  }
+                })
+
+              Button('Save audio')
+                .enabled(!this.micStartedForTesting)
+                .onClick(()=>{
+                  if (this.sampleListForTesting.length == 0) {
+                    this.infoHome = 'No audio samples recorded';
+                    return;
+                  }
+                  const samples = flatten(this.sampleListForTesting);
+
+                  if (samples.length == 0) {
+                    this.infoHome = 'Empty samples';
+                    return;
+                  }
+
+                  let uri: string = '';
+
+                  const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
+
+                  const audioViewPicker = new picker.AudioViewPicker();
+
+                  audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
+                    uri = audioSelectResult[0];
+                    savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
+                    console.log(`Saved to ${uri}`);
+                    this.infoHome+= `\nSaved to ${uri}`;
+                  });
+                })
+            }
+            TextArea({text: this.infoHome})
+              .height('100%')
+              .focusable(false)
+          }
+        }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+
+            if (this.allSpeakerNames.length == 0) {
+              Text('Please add speakers first')
+            } else {
+              List({ space: 10, initialIndex: 0 }) {
+                ForEach(this.allSpeakerNames, (item: string, index: number) => {
+                  ListItem() {
+                    Flex({ direction: FlexDirection.Row, alignItems: ItemAlign.Center }) {
+                      Text(item)
+                        .width('100%')
+                        .height(80)
+                        .fontSize(20)
+                        .textAlign(TextAlign.Center)
+                        .borderRadius(10)
+                        .flexShrink(1)
+
+                      Button('Delete')
+                      .width('30%')
+                        .height(40)
+                      .onClick(() => {
+                        if (index != undefined) {
+                          const name = this.allSpeakerNames[index];
+                          console.log(`Deleting speaker ${name}`);
+                          if (this.workerInstance) {
+                            this.workerInstance.postMessage({
+                              msgType: 'manager-delete-speaker',
+                              name: name
+                            });
+                          }
+                        }
+                      }).stateEffect(true)
+
+                      Text('')
+                        .width('15%')
+                        .height(80)
+                    }
+                  }
+                }, (item: string) => item)
+              }
+            }
+          }
+        }.tabBar(this.TabBuilder('View', 1, $r('app.media.icon_view'), $r('app.media.icon_view')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+
+            Row({space: 10}) {
+              Text('Speaker name')
+              TextInput({placeholder: 'Input speaker name'})
+                .onChange((value: string)=>{
+                  this.inputSpeakerName = value.trim();
+                });
+            }.width('100%')
+
+            Row({space: 10}) {
+              Button(this.micBtnCaptionForAdding)
+                .enabled(this.micBtnEnabledForAdding)
+                .onClick(()=> {
+                  if (this.mic) {
+                    if (this.micStartedForAdding) {
+                      this.micStartedForAdding = false;
+                      this.micBtnEnabledForTesting = true;
+                      this.micBtnCaptionForAdding = 'Start recording';
+                      this.mic.stop();
+                      this.infoAdd = '';
+                      if (this.sampleListForAdding.length > 0) {
+                        this.btnAddEnabled = true;
+                        this.btnSaveAudioEnabled = true;
+                      }
+                    } else {
+                      this.micStartedForAdding = true;
+                      this.micBtnEnabledForTesting = false;
+                      this.micBtnCaptionForAdding = 'Stop recording';
+                      this.sampleListForAdding = [];
+                      this.mic.start();
+                      this.infoAdd = '';
+
+                      this.btnAddEnabled = false;
+                      this.btnSaveAudioEnabled = false;
+                    }
+                  }
+                })
+
+              Button('Add')
+                .enabled(this.btnAddEnabled)
+                .onClick(()=>{
+                  if (this.inputSpeakerName.trim() == '') {
+                    this.infoAdd += '\nPlease input a speaker name first';
+                    return;
+                  }
+
+                  const samples = flatten(this.sampleListForAdding);
+                  const duration = samples.length / this.sampleRate;
+                  if (duration < 0.5) {
+                    this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
+                    return;
+                  }
+                  if (this.workerInstance) {
+                    this.workerInstance.postMessage({
+                      msgType: 'manager-add-speaker',
+                      name: this.inputSpeakerName,
+                      samples: samples,
+                      sampleRate: this.sampleRate,
+                    })
+                  }
+                })
+
+              Button('Save audio')
+                .enabled(this.btnSaveAudioEnabled)
+                .onClick(()=>{
+                  if (this.sampleListForAdding.length == 0) {
+                    this.btnSaveAudioEnabled = false;
+                    return;
+                  }
+
+                  const samples = flatten(this.sampleListForAdding);
+
+                  if (samples.length == 0) {
+                    this.btnSaveAudioEnabled = false;
+                    return;
+                  }
+
+                  let uri: string = '';
+
+
+                  const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
+
+                  const audioViewPicker = new picker.AudioViewPicker();
+
+                  audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
+                    uri = audioSelectResult[0];
+                    savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
+                    console.log(`Saved to ${uri}`);
+                    this.infoAdd += `\nSaved to ${uri}`;
+                  });
+                })
+            }
+            TextArea({text: this.infoAdd})
+              .height('100%')
+              .width('100%')
+              .focusable(false)
+          }
+        }.tabBar(this.TabBuilder('Add', 2, $r('app.media.icon_add'), $r('app.media.icon_add')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            TextArea({
+              text: `
+Everyting is open-sourced.
+
+It runs locally, without accessing the network
+
+See also https://github.com/k2-fsa/sherpa-onnx
+
+新一代 Kaldi QQ 和微信交流群: 请看
+
+https://k2-fsa.github.io/sherpa/social-groups.html
+
+微信公众号: 新一代 Kaldi
+            `
+            }).width('100%').height('100%').focusable(false)
+          }
+        }.tabBar(this.TabBuilder('Help', 3, $r('app.media.icon_info'), $r('app.media.icon_info')))
+
+      }.scrollable(false)
+    }.width('100%')
+  }
+
+  private micCallback = (buffer: ArrayBuffer) => {
+    const view: Int16Array = new Int16Array(buffer);
+
+    const samplesFloat: Float32Array = new Float32Array(view.length);
+    for (let i = 0; i < view.length; ++i) {
+      samplesFloat[i] = view[i] / 32768.0;
+    }
+
+    if (this.micStartedForAdding) {
+      this.sampleListForAdding.push(samplesFloat);
+    }
+
+    if (this.micStartedForTesting) {
+      this.sampleListForTesting.push(samplesFloat);
+    }
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Permission.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Permission.ets
new file mode 100644
index 0000000000..40ef391ad0
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Permission.ets
@@ -0,0 +1,26 @@
+// This file is modified from
+// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
+import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';
+
+export function allAllowed(permissions: Permissions[]): boolean {
+  if (permissions.length == 0) {
+    return false;
+  }
+
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+
+  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
+
+  let tokenID: number = bundleInfo.appInfo.accessTokenId;
+
+  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
+  mgr.checkAccessTokenSync(tokenID, permission));
+}
+
+export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+  const context: Context = getContext() as common.UIAbilityContext;
+
+  const result = await mgr.requestPermissionsFromUser(context, permissions);
+  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets
new file mode 100644
index 0000000000..5b0679742e
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets
@@ -0,0 +1,143 @@
+import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
+import {
+  OnlineStream,
+  readWaveFromBinary,
+  Samples,
+  SpeakerEmbeddingExtractor,
+  SpeakerEmbeddingExtractorConfig,
+  SpeakerEmbeddingManager
+} from 'sherpa_onnx';
+
+const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
+
+let extractor: SpeakerEmbeddingExtractor;
+let manager: SpeakerEmbeddingManager;
+
+function readWaveFromRawfile(filename: string, context: Context): Samples {
+  const data: Uint8Array = context.resourceManager.getRawFileContentSync(filename);
+  return readWaveFromBinary(data) as Samples;
+}
+
+function initExtractor(context: Context): SpeakerEmbeddingExtractor {
+  const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
+
+  // Please put the model file inside the directory
+  // harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
+/*
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
+total 77336
+-rw-r--r--  1 fangjun  staff    38M Dec  9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+ */
+  // You can find more models at
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+  config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
+  config.numThreads = 2;
+  config.debug = true;
+
+  return new SpeakerEmbeddingExtractor(config, context.resourceManager);
+}
+
+function extractEmbedding(samples: Samples): Float32Array {
+  const stream: OnlineStream = extractor.createStream();
+  stream.acceptWaveform(samples);
+  return extractor.compute(stream);
+}
+
+/**
+ * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessage = (e: MessageEvents) => {
+  const msgType = e.data['msgType'] as string;
+
+  console.log(`from the main thread, msg-type: ${msgType}`);
+
+  if (msgType == 'init-extractor' && !extractor) {
+    const context: Context = e.data['context'] as Context;
+    extractor = initExtractor(context);
+    manager = new SpeakerEmbeddingManager(extractor.dim);
+
+    workerPort.postMessage({
+      msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
+    });
+  }
+
+  if (msgType == 'manager-delete-speaker') {
+    const name = e.data['name'] as string;
+    const ok: boolean = manager.remove(name);
+    if (ok) {
+      console.log(`Removed ${name}.`);
+
+      console.log(`Number of speakers: ${manager.getNumSpeakers()}`);
+      console.log(`Number of speakers2: ${manager.getAllSpeakerNames().length}`);
+      console.log(JSON.stringify(manager.getAllSpeakerNames()));
+      workerPort.postMessage({
+        msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
+      });
+    }
+  }
+
+  if (msgType == 'manager-add-speaker') {
+    const name = e.data['name'] as string;
+    const samples = e.data['samples'] as Float32Array;
+    const sampleRate = e.data['sampleRate'] as number;
+
+    const v = extractEmbedding({ samples, sampleRate });
+    const ok: boolean = manager.add({ name, v });
+    if (ok) {
+      workerPort.postMessage({
+        msgType: 'manager-add-speaker-done',
+        status: `Added ${name}`,
+        ok,
+      });
+      workerPort.postMessage({
+        msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
+      }
+      );
+    } else {
+      workerPort.postMessage({
+        msgType: 'manager-add-speaker-done',
+        status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
+        ok,
+      });
+    }
+  }
+
+  if (msgType == 'manager-search-speaker') {
+    const threshold = e.data['threshold'] as number;
+    const samples = e.data['samples'] as Float32Array;
+    const sampleRate = e.data['sampleRate'] as number;
+
+    const v = extractEmbedding({ samples, sampleRate });
+    let name: string = manager.search({ threshold, v });
+    if (name == '' || name == undefined) {
+      name = "===<Unknown>===";
+    }
+    workerPort.postMessage({
+      msgType: 'manager-search-speaker-done',
+      name
+    });
+  }
+}
+
+/**
+ * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessageerror = (e: MessageEvents) => {
+}
+
+/**
+ * Defines the event handler to be called when an exception occurs during worker execution.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e error message
+ */
+workerPort.onerror = (e: ErrorEvent) => {
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/module.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/module.json5
new file mode 100644
index 0000000000..80e93ca6a7
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/module.json5
@@ -0,0 +1,64 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ],
+    "requestPermissions": [
+      {
+        "name": "ohos.permission.MICROPHONE",
+        "reason": "$string:mic_reason",
+        "usedScene": {
+          "abilities": [
+            "EntryAbility",
+          ],
+          "when": "inuse",
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..d2ba012129
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device speaker identification with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device speaker identification with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Speaker identification"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device speaker identification with Next-gen Kaldi"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_add.svg b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_add.svg
new file mode 100644
index 0000000000..6ddbb59df7
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_add.svg
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="24" height="24" viewBox="0 0 24 24" version="1.1">
+    <!-- Generator: Sketch 63.1 (92452) - https://sketch.com -->
+    <title>Public/ic_public_list_add_light</title>
+    <desc>Created with Sketch.</desc>
+    <g id="_Public/ic_public_list_add_light" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <path d="M12,1 C18.0751322,1 23,5.92486775 23,12 C23,18.0751322 18.0751322,23 12,23 C5.92486775,23 1,18.0751322 1,12 C1,5.92486775 5.92486775,1 12,1 Z" id="_椭圆形" fill="#0A59F7"/>
+        <path d="M12.75,17.25 C12.75,17.6642136 12.4142136,18 12,18 C11.5857864,18 11.25,17.6642136 11.25,17.25 L11.25,12.749 L6.75,12.75 C6.33578644,12.75 6,12.4142136 6,12 C6,11.5857864 6.33578644,11.25 6.75,11.25 L11.25,11.249 L11.25,6.75 C11.25,6.33578644 11.5857864,6 12,6 C12.4142136,6 12.75,6.33578644 12.75,6.75 L12.75,17.25 Z M13.75,11.25 L17.25,11.25 C17.6642136,11.25 18,11.5857864 18,12 C18,12.4142136 17.6642136,12.75 17.25,12.75 L13.75,12.75 L13.75,11.25 Z" id="_形状" fill="#FFFFFF"/>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_home.svg b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_home.svg
new file mode 100644
index 0000000000..504af3400f
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_home.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_info.svg b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_info.svg
new file mode 100644
index 0000000000..2210223f4f
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_info.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_view.svg b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_view.svg
new file mode 100644
index 0000000000..b7958b5ed4
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/icon_view.svg
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="24" height="24" viewBox="0 0 24 24" version="1.1">
+    <title>Public/ic_public_view_list_filled</title>
+    <defs>
+        <path d="M3.75,17.25 C4.44035594,17.25 5,17.8096441 5,18.5 C5,19.1903559 4.44035594,19.75 3.75,19.75 C3.05964406,19.75 2.5,19.1903559 2.5,18.5 C2.5,17.8096441 3.05964406,17.25 3.75,17.25 Z M21,17.5 C21.5522847,17.5 22,17.9477153 22,18.5 C22,19.0522847 21.5522847,19.5 21,19.5 L8,19.5 C7.44771525,19.5 7,19.0522847 7,18.5 C7,17.9477153 7.44771525,17.5 8,17.5 L21,17.5 Z M3.75,10.75 C4.44035594,10.75 5,11.3096441 5,12 C5,12.6903559 4.44035594,13.25 3.75,13.25 C3.05964406,13.25 2.5,12.6903559 2.5,12 C2.5,11.3096441 3.05964406,10.75 3.75,10.75 Z M21,11 C21.5522847,11 22,11.4477153 22,12 C22,12.5522847 21.5522847,13 21,13 L8,13 C7.44771525,13 7,12.5522847 7,12 C7,11.4477153 7.44771525,11 8,11 L21,11 Z M3.75,4.25 C4.44035594,4.25 5,4.80964406 5,5.5 C5,6.19035594 4.44035594,6.75 3.75,6.75 C3.05964406,6.75 2.5,6.19035594 2.5,5.5 C2.5,4.80964406 3.05964406,4.25 3.75,4.25 Z M21,4.5 C21.5522847,4.5 22,4.94771525 22,5.5 C22,6.05228475 21.5522847,6.5 21,6.5 L8,6.5 C7.44771525,6.5 7,6.05228475 7,5.5 C7,4.94771525 7.44771525,4.5 8,4.5 L21,4.5 Z" id="_path-1"/>
+    </defs>
+    <g id="_Public/ic_public_view_list_filled" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <mask id="_mask-2" fill="white">
+            <use xlink:href="#_path-1"/>
+        </mask>
+        <use id="_形状结合" fill="#000000" fill-rule="nonzero" xlink:href="#_path-1"/>
+    </g>
+</svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..d2ba012129
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device speaker identification with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device speaker identification with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Speaker identification"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device speaker identification with Next-gen Kaldi"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..0a580accd5
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "新一代Kaldi: 本地说话人识别"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "新一代Kaldi: 本地说话人识别"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "说话人识别"
+    },
+    {
+      "name": "mic_reason",
+      "value": "使用新一代Kaldi, 访问麦克风进行本地说话人识别 (不需要联网)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/hvigorfile.ts b/harmony-os/SherpaOnnxSpeakerIdentification/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/oh-package-lock.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxSpeakerIdentification/oh-package.json5 b/harmony-os/SherpaOnnxSpeakerIdentification/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/.gitignore b/harmony-os/SherpaOnnxStreamingAsr/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/AppScope/app.json5 b/harmony-os/SherpaOnnxStreamingAsr/AppScope/app.json5
new file mode 100644
index 0000000000..88d26d63f4
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx.streaming.asr",
+    "vendor": "example",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..1bd22c6e6f
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxStreamingAsr"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxStreamingAsr/build-profile.json5 b/harmony-os/SherpaOnnxStreamingAsr/build-profile.json5
new file mode 100644
index 0000000000..8e63d97684
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/build-profile.json5
@@ -0,0 +1,40 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/code-linter.json5 b/harmony-os/SherpaOnnxStreamingAsr/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/.gitignore b/harmony-os/SherpaOnnxStreamingAsr/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/build-profile.json5 b/harmony-os/SherpaOnnxStreamingAsr/entry/build-profile.json5
new file mode 100644
index 0000000000..4e4aa2bc19
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/build-profile.json5
@@ -0,0 +1,33 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "sourceOption": {
+      "workers": [
+        './src/main/ets/workers/StreamingAsrWorker.ets'
+      ]
+    }
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/hvigorfile.ts b/harmony-os/SherpaOnnxStreamingAsr/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxStreamingAsr/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package-lock.json5
new file mode 100644
index 0000000000..ce848c049d
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package-lock.json5
@@ -0,0 +1,29 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+    "sherpa_onnx@1.10.33": "sherpa_onnx@1.10.33"
+  },
+  "packages": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    },
+    "sherpa_onnx@1.10.33": {
+      "name": "sherpa_onnx",
+      "version": "1.10.33",
+      "integrity": "sha512-cmZ8zwOMx4qmDvOjF1/PL6/suBgReanSf5XdQTuMWWZ6qN74rynODHrt4C+Qz754MTXg0q/phAKeVjGA4rHHSA==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.33.har",
+      "registryType": "ohpm",
+      "dependencies": {
+        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package.json5 b/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package.json5
new file mode 100644
index 0000000000..1f07c029ee
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/oh-package.json5
@@ -0,0 +1,12 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {
+    "sherpa_onnx": "1.10.42",
+  }
+}
+
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..3a08b92139
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,428 @@
+import { LengthUnit } from '@kit.ArkUI';
+import worker, { MessageEvents } from '@ohos.worker';
+import { BusinessError } from '@kit.BasicServicesKit';
+import { picker } from '@kit.CoreFileKit';
+import systemTime from '@ohos.systemTime';
+import { Permissions } from '@kit.AbilityKit';
+import { allAllowed, requestPermissions } from './Permission';
+import { audio } from '@kit.AudioKit';
+import fs from '@ohos.file.fs';
+
+
+function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
+  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
+
+  const header = new ArrayBuffer(44);
+  const view = new DataView(header);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true); // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
+  view.setUint32(8, 0x45564157, true); // format // //                      t m f
+  view.setUint32(12, 0x20746d66, true); // subchunk1ID
+  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true); // numChannels: 1 channel
+  view.setUint32(24, sampleRate, true); // sampleRate
+  view.setUint32(28, sampleRate * 2, true); // byteRate
+  view.setUint16(32, 2, true); // blockAlign
+  view.setUint16(34, 16, true); // bitsPerSample
+  view.setUint32(36, 0x61746164, true); // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true); // subchunk2Size
+
+  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
+  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
+
+  fs.closeSync(fp.fd);
+}
+
+function toInt16Samples(samples: Float32Array): Int16Array {
+  const int16Samples = new Int16Array(samples.length);
+  for (let i = 0; i < samples.length; ++i) {
+    let s = samples[i] * 32767;
+    s = s > 32767 ? 32767 : s;
+    s = s < -32768 ? -32768 : s;
+    int16Samples[i] = s;
+  }
+
+  return int16Samples;
+}
+
+
+@Entry
+@Component
+struct Index {
+  @State title: string = 'Next-gen Kaldi: Real-time speech recognition';
+  @State titleFontSize: number = 15;
+  @State currentIndex: number = 0;
+  @State lang: string = 'English';
+  @State resultForFile: string = ''
+  @State resultForMic: string = ''
+  @State selectFileBtnEnabled: boolean = false;
+  @State micBtnCaption: string = 'Start';
+  @State micStarted: boolean = false;
+  @State micAllowed: boolean = false;
+  @State micBtnEnabled: boolean = false;
+  @State micSaveBtnCaption: string = 'Save recorded audio';
+  @State micSaveBtnEnabled: boolean = false;
+  @State info: string = '';
+  @State micInfo: string = '';
+  @State micInitDone: boolean = false;
+  private resultListForMic: string[] = [];
+  private controller: TabsController = new TabsController();
+  private workerInstance?: worker.ThreadWorker
+  private readonly scriptURL: string = 'entry/ets/workers/StreamingAsrWorker.ets'
+  private startTime: number = 0;
+  private stopTime: number = 0;
+  private sampleRate: number = 48000;
+  private sampleList: Float32Array[] = []
+  private mic?: audio.AudioCapturer;
+
+  flatten(samples: Float32Array[]): Float32Array {
+    let n = 0;
+    for (let i = 0; i < samples.length; ++i) {
+      n += samples[i].length;
+    }
+
+    const ans: Float32Array = new Float32Array(n);
+    let offset: number = 0;
+    for (let i = 0; i < samples.length; ++i) {
+      ans.set(samples[i], offset);
+      offset += samples[i].length;
+    }
+
+    return ans;
+  }
+
+  async initMic() {
+    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
+    let allowed: boolean = await allAllowed(permissions);
+    if (!allowed) {
+      console.log("request to access the microphone");
+      const status: boolean = await requestPermissions(permissions);
+
+      if (!status) {
+        console.error('access to microphone is denied')
+        this.resultForMic = "Failed to get microphone permission. Please retry";
+        return;
+      }
+
+      allowed = await allAllowed(permissions);
+      if (!allowed) {
+        console.error('failed to get microphone permission');
+        this.resultForMic = "Failed to get microphone permission. Please retry";
+        return;
+      }
+      this.micAllowed = true;
+    } else {
+      console.log("allowed to access microphone");
+      this.micAllowed = true;
+    }
+
+    const audioStreamInfo: audio.AudioStreamInfo = {
+      samplingRate: this.sampleRate,
+      channels: audio.AudioChannel.CHANNEL_1,
+      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
+      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
+    };
+
+    const audioCapturerInfo: audio.AudioCapturerInfo = {
+      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
+    };
+
+    const audioCapturerOptions: audio.AudioCapturerOptions = {
+      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
+
+    };
+    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
+      if (err) {
+        console.error(`error code is ${err.code}, error message is ${err.message}`);
+        this.resultForMic = 'Failed to init microphone';
+      } else {
+        console.info(`init mic successfully`);
+        this.mic = data;
+        this.mic.on('readData', this.micCallback);
+      }
+    });
+  }
+
+  async aboutToAppear() {
+    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
+      name: 'Streaming ASR worker'
+    });
+
+    this.workerInstance.onmessage = (e: MessageEvents) => {
+      const msgType = e.data['msgType'] as string;
+      console.log(`received msg from worker: ${msgType}`);
+
+      if (msgType == 'init-streaming-asr-done') {
+        this.selectFileBtnEnabled = true;
+        this.micBtnEnabled = true;
+        this.info = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
+        this.micInfo = `Initializing done.\n\nPlease click Start and speak`;
+      }
+
+      if (msgType == 'streaming-asr-decode-file-done') {
+        const text = e.data['text'] as string;
+        this.resultForFile = text;
+        this.selectFileBtnEnabled = true;
+
+        systemTime.getRealTime((err, data) => {
+          if (err) {
+            console.log('Failed to get stop time');
+          } else {
+            this.stopTime = data;
+
+            const audioDuration = e.data['duration'] as number;
+            const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
+            const RTF = elapsedSeconds / audioDuration;
+            this.info = `Audio duration: ${audioDuration.toFixed(2)} s
+Elapsed: ${elapsedSeconds.toFixed(2)} s
+RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
+`;
+          }
+        });
+      }
+
+      if (msgType == 'streaming-asr-decode-mic-result') {
+        const text = e.data['text'] as string;
+        if (text.trim() == '') {
+          return;
+        }
+
+        const isEndpoint = e.data['isEndpoint'] as boolean;
+
+        let s = '';
+        let i = 0;
+        for (; i < this.resultListForMic.length; ++i) {
+          s += `${i}: ${this.resultListForMic[i]}\n`
+        }
+
+        s += `${i}: ${text}`;
+        this.resultForMic = s;
+
+        if (isEndpoint) {
+          this.resultListForMic.push(text);
+        }
+      }
+    };
+
+    const context = getContext();
+    this.workerInstance.postMessage({ msgType: 'init-streaming-asr', context });
+    this.info = 'Initializing ASR model.\nPlease wait';
+    this.micInfo = 'Initializing ASR model.\nPlease wait';
+
+    await this.initMic();
+  }
+
+  @Builder
+  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
+    Column() {
+      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
+      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
+    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
+      this.currentIndex = targetIndex;
+      this.controller.changeIndex(this.currentIndex);
+    })
+  }
+
+  build() {
+    Column() {
+      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            Button('Select .wav file (16kHz) ')
+              .enabled(this.selectFileBtnEnabled)
+              .fontSize(13)
+              .width(296)
+              .height(60)
+              .onClick(() => {
+                this.resultForFile = '';
+                this.info = '';
+                this.selectFileBtnEnabled = false;
+
+                const documentSelectOptions = new picker.DocumentSelectOptions();
+                documentSelectOptions.maxSelectNumber = 1;
+                documentSelectOptions.fileSuffixFilters = ['.wav'];
+                const documentViewPicker = new picker.DocumentViewPicker();
+
+                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
+                  console.log(`select file result: ${result}`);
+
+                  if (!result[0]) {
+                    this.resultForFile = 'Please select a file to decode';
+                    this.selectFileBtnEnabled = true;
+                    return;
+                  }
+
+                  if (this.workerInstance) {
+                    systemTime.getRealTime((err, data) => {
+                      if (err) {
+                        console.log('Failed to get start time');
+                      } else {
+                        this.startTime = data;
+                      }
+                    });
+
+                    this.workerInstance.postMessage({
+                      msgType: 'streaming-asr-decode-file', filename: result[0],
+                    });
+                    this.info = `Decoding ${result[0]} ... ...`;
+                  } else {
+                    console.log(`this worker instance is undefined ${this.workerInstance}`);
+                  }
+
+                }).catch((err: BusinessError) => {
+                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
+                  this.selectFileBtnEnabled = true;
+                })
+              })
+
+            Text(`Supported languages: ${this.lang}`);
+            if (this.info != '') {
+              TextArea({ text: this.info }).focusable(false);
+            }
+            TextArea({ text: this.resultForFile })
+              .width('100%')
+              .lineSpacing({ value: 10, unit: LengthUnit.VP })
+              .height('100%');
+          }
+        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            Button(this.micBtnCaption)
+              .enabled(this.micBtnEnabled)
+              .fontSize(13)
+              .width(296)
+              .height(60)
+              .onClick(() => {
+                this.micInfo = '';
+                if (this.mic) {
+                  if (this.micStarted) {
+                    this.micStarted = false;
+                    this.micBtnCaption = 'Start';
+                    this.mic.stop();
+                    this.micSaveBtnEnabled = true;
+
+                    if (this.workerInstance) {
+                      this.workerInstance.postMessage({
+                        msgType: 'streaming-asr-decode-mic-stop'
+                      });
+                    }
+                  } else {
+                    this.micStarted = true;
+                    this.micSaveBtnEnabled = false;
+                    this.micBtnCaption = 'Stop';
+                    this.resultForMic = '';
+                    this.resultListForMic = [];
+
+                    if (this.workerInstance) {
+                      this.workerInstance.postMessage({
+                        msgType: 'streaming-asr-decode-mic-start'
+                      });
+                    }
+
+                    this.sampleList = [];
+                    this.mic.start();
+                  }
+                }
+              });
+            Button(this.micSaveBtnCaption)
+              .enabled(this.micSaveBtnEnabled)
+              .fontSize(13)
+              .width(296)
+              .height(60)
+              .onClick(() => {
+                if (this.sampleList.length == 0) {
+                  this.micSaveBtnEnabled = false;
+                  return;
+                }
+
+                const samples = this.flatten(this.sampleList);
+
+                if (samples.length == 0) {
+                  this.micSaveBtnEnabled = false;
+                  return;
+                }
+
+
+                let uri: string = '';
+
+
+                const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
+
+                const audioViewPicker = new picker.AudioViewPicker();
+
+                audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
+                  uri = audioSelectResult[0];
+                  savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
+                  console.log(`Saved to ${uri}`);
+                  this.micInfo += `\nSaved to ${uri}`;
+                });
+
+              })
+
+
+            Text(`Supported languages: ${this.lang}`)
+
+            if (this.micInfo != '') {
+              TextArea({ text: this.micInfo })
+                .focusable(false);
+            }
+
+            TextArea({ text: this.resultForMic })
+              .width('100%')
+              .lineSpacing({ value: 10, unit: LengthUnit.VP })
+              .width('100%')
+              .height('100%');
+          }
+        }.tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'), $r('app.media.icon_mic')))
+
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            TextArea({
+              text: `
+Everyting is open-sourced.
+
+It runs locally, without accessing the network
+
+See also https://github.com/k2-fsa/sherpa-onnx
+
+新一代 Kaldi QQ 和微信交流群: 请看
+
+https://k2-fsa.github.io/sherpa/social-groups.html
+
+微信公众号: 新一代 Kaldi
+            `
+            }).width('100%').height('100%').focusable(false)
+          }.justifyContent(FlexAlign.Start)
+        }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info')))
+      }.scrollable(false)
+    }.width('100%')
+  }
+
+  private micCallback = (buffer: ArrayBuffer) => {
+    const view: Int16Array = new Int16Array(buffer);
+
+    const samplesFloat: Float32Array = new Float32Array(view.length);
+    for (let i = 0; i < view.length; ++i) {
+      samplesFloat[i] = view[i] / 32768.0;
+    }
+
+    this.sampleList.push(samplesFloat);
+
+    if (this.workerInstance) {
+      this.workerInstance.postMessage({
+        msgType: 'streaming-asr-decode-mic-samples',
+        samples: samplesFloat,
+        sampleRate: this.sampleRate,
+      })
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Permission.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Permission.ets
new file mode 100644
index 0000000000..40ef391ad0
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Permission.ets
@@ -0,0 +1,26 @@
+// This file is modified from
+// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
+import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';
+
+export function allAllowed(permissions: Permissions[]): boolean {
+  if (permissions.length == 0) {
+    return false;
+  }
+
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+
+  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
+
+  let tokenID: number = bundleInfo.appInfo.accessTokenId;
+
+  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
+  mgr.checkAccessTokenSync(tokenID, permission));
+}
+
+export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+  const context: Context = getContext() as common.UIAbilityContext;
+
+  const result = await mgr.requestPermissionsFromUser(context, permissions);
+  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/workers/StreamingAsrWorker.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/workers/StreamingAsrWorker.ets
new file mode 100644
index 0000000000..d521cc3e41
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/workers/StreamingAsrWorker.ets
@@ -0,0 +1,294 @@
+import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
+import {
+  OnlineModelConfig,
+  OnlineRecognizer,
+  OnlineRecognizerConfig,
+  OnlineStream,
+  readWaveFromBinary,
+  Samples
+} from 'sherpa_onnx';
+import { fileIo } from '@kit.CoreFileKit';
+
+const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
+
+
+let recognizer: OnlineRecognizer;
+let micStream: OnlineStream;
+
+function getModelConfig(type: number): OnlineModelConfig {
+  const modelConfig = new OnlineModelConfig();
+  switch (type) {
+    case 0: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+
+    case 1: {
+      const modelDir = 'sherpa-onnx-lstm-zh-2023-02-20';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-11-avg-1.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-11-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-11-avg-1.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'lstm';
+      break;
+    }
+
+    case 2: {
+      const modelDir = 'sherpa-onnx-lstm-en-2023-02-17';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'lstm';
+      break;
+    }
+
+    case 3: {
+      const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
+      modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
+      modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
+      modelConfig.modelType = 'zipformer2';
+      break;
+    }
+
+    case 4: {
+      const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
+      modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
+      modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
+      modelConfig.modelType = 'zipformer2';
+      break;
+    }
+
+    case 5: {
+      const modelDir = 'sherpa-onnx-streaming-paraformer-bilingual-zh-en';
+      modelConfig.paraformer.encoder = `${modelDir}/encoder.int8.onnx`;
+      modelConfig.paraformer.decoder = `${modelDir}/decoder.int8.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'paraformer';
+      break;
+    }
+
+    case 6: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-en-2023-06-26';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1-chunk-16-left-128.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1-chunk-16-left-128.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer2';
+      break;
+    }
+
+    case 7: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-fr-2023-04-14';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-29-avg-9-with-averaged-model.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-29-avg-9-with-averaged-model.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+
+    case 8: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+
+    case 9: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23'
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+
+    case 10: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-en-20M-2023-02-17';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+
+    case 14: {
+      const modelDir = 'sherpa-onnx-streaming-zipformer-korean-2024-06-16';
+      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      modelConfig.tokens = `${modelDir}/tokens.txt`;
+      modelConfig.modelType = 'zipformer';
+      break;
+    }
+    default: {
+      console.log(`Please specify a supported type. Given type ${type}`);
+    }
+  }
+  return modelConfig;
+}
+
+function initStreamingAsr(context: Context): OnlineRecognizer {
+  let type: number;
+
+  /*
+
+If you use type = 8, then you should have the following directory structure in the rawfile directory
+
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls
+sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
+.
+└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
+    ├── decoder-epoch-99-avg-1.onnx
+    ├── encoder-epoch-99-avg-1.int8.onnx
+    ├── joiner-epoch-99-avg-1.int8.onnx
+    └── tokens.txt
+
+1 directory, 4 files
+
+You can download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+
+Note that please delete files that are not used. Otherwise, you APP will be very large
+due to containing unused large files.
+
+   */
+  type = 8;
+
+  const config: OnlineRecognizerConfig = new OnlineRecognizerConfig();
+  config.modelConfig = getModelConfig(type);
+  config.modelConfig.debug = true;
+  config.modelConfig.numThreads = 2;
+  config.enableEndpoint = true;
+
+  return new OnlineRecognizer(config, context.resourceManager);
+}
+
+interface DecodeFileResult {
+  text: string;
+  duration: number;
+}
+
+function decodeFile(filename: string): DecodeFileResult {
+  const fp = fileIo.openSync(filename);
+  const stat = fileIo.statSync(fp.fd);
+  const arrayBuffer = new ArrayBuffer(stat.size);
+  fileIo.readSync(fp.fd, arrayBuffer);
+  const data: Uint8Array = new Uint8Array(arrayBuffer);
+  const wave: Samples = readWaveFromBinary(data) as Samples;
+  console.log(`Sample rate: ${wave.sampleRate}`);
+
+  const stream = recognizer.createStream();
+  stream.acceptWaveform(wave);
+  const tailPadding = new Float32Array(0.5 * wave.sampleRate);
+  tailPadding.fill(0);
+
+  stream.acceptWaveform({ samples: tailPadding, sampleRate: wave.sampleRate });
+
+  while (recognizer.isReady(stream)) {
+    recognizer.decode(stream);
+  }
+
+  const audioDuration = wave.samples.length / wave.sampleRate;
+
+  return { text: recognizer.getResult(stream).text, duration: audioDuration };
+}
+
+/**
+ * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessage = (e: MessageEvents) => {
+  const msgType = e.data['msgType'] as string;
+
+  if (msgType != 'streaming-asr-decode-mic-samples') {
+    console.log(`from the main thread, msg-type: ${msgType}`);
+  }
+
+  if (msgType == 'init-streaming-asr' && !recognizer) {
+    console.log('initializing streaming ASR...');
+    const context = e.data['context'] as Context;
+    recognizer = initStreamingAsr(context);
+    console.log('streaming ASR is initialized. ');
+    workerPort.postMessage({ 'msgType': 'init-streaming-asr-done' });
+  }
+
+  if (msgType == 'streaming-asr-decode-file') {
+    const filename = e.data['filename'] as string;
+    console.log(`decoding ${filename}`);
+    const result = decodeFile(filename);
+    workerPort.postMessage({
+      'msgType': 'streaming-asr-decode-file-done', text: result.text, duration: result.duration
+    });
+  }
+
+  if (msgType == 'streaming-asr-decode-mic-start') {
+    micStream = recognizer.createStream();
+  }
+
+  if (msgType == 'streaming-asr-decode-mic-stop') { // nothing to do
+  }
+
+  if (msgType == 'streaming-asr-decode-mic-samples') {
+    const samples = e.data['samples'] as Float32Array;
+    const sampleRate = e.data['sampleRate'] as number;
+
+    micStream.acceptWaveform({ samples, sampleRate });
+    while (recognizer.isReady(micStream)) {
+      recognizer.decode(micStream);
+
+      let isEndpoint = false;
+      let text = recognizer.getResult(micStream).text;
+
+      if (recognizer.isEndpoint(micStream)) {
+        isEndpoint = true;
+        recognizer.reset(micStream);
+      }
+
+      if (text.trim() != '') {
+        workerPort.postMessage({
+          'msgType': 'streaming-asr-decode-mic-result', text: text, isEndpoint: isEndpoint,
+        });
+      }
+    }
+  }
+
+}
+
+/**
+ * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessageerror = (e: MessageEvents) => {
+}
+
+/**
+ * Defines the event handler to be called when an exception occurs during worker execution.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e error message
+ */
+workerPort.onerror = (e: ErrorEvent) => {
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/module.json5 b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/module.json5
new file mode 100644
index 0000000000..80e93ca6a7
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/module.json5
@@ -0,0 +1,64 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ],
+    "requestPermissions": [
+      {
+        "name": "ohos.permission.MICROPHONE",
+        "reason": "$string:mic_reason",
+        "usedScene": {
+          "abilities": [
+            "EntryAbility",
+          ],
+          "when": "inuse",
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..207a999827
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device real-time speech recognition with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device real-time speech recognition with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Real-time ASR"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device real-time speech recognition with Next-gen Kaldi"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/home.svg b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/home.svg
new file mode 100644
index 0000000000..504af3400f
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/home.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_doc.svg b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_doc.svg
new file mode 100644
index 0000000000..ab6b1fd763
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_doc.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M6.22 1.01Q5.35 1.01 4.61 1.45Q3.86 1.9 3.42 2.64Q2.98 3.38 2.98 4.25L2.98 19.75Q2.98 20.64 3.42 21.38Q3.86 22.13 4.61 22.56Q5.35 22.99 6.22 22.99L17.76 22.99Q18.65 22.99 19.39 22.56Q20.14 22.13 20.58 21.38Q21.02 20.64 21.02 19.75L21.02 7.25L14.76 1.01L6.22 1.01ZM15.48 7.25Q15.17 7.25 14.95 7.02Q14.74 6.79 14.74 6.48L14.74 3.1L18.89 7.25L15.48 7.25ZM6.22 21.5Q5.5 21.5 4.98 20.99Q4.46 20.47 4.46 19.75L4.46 4.25Q4.46 3.53 4.98 3.01Q5.5 2.5 6.22 2.5L13.22 2.5L13.22 6.48Q13.22 7.1 13.52 7.62Q13.82 8.14 14.34 8.44Q14.86 8.74 15.48 8.74L19.51 8.74L19.51 19.75Q19.51 20.47 19 20.99Q18.48 21.5 17.76 21.5L6.22 21.5Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_mic.svg b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_mic.svg
new file mode 100644
index 0000000000..0aeb30d63b
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/icon_mic.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M12 3.46Q13.06 3.46 13.78 4.18Q14.5 4.9 14.5 5.95L14.5 11.21Q14.5 12.24 13.78 12.97Q13.06 13.7 12 13.7Q10.97 13.7 10.24 12.97Q9.5 12.24 9.5 11.21L9.5 5.95Q9.5 4.9 10.24 4.18Q10.97 3.46 12 3.46ZM12 1.94Q10.92 1.94 10 2.48Q9.07 3.02 8.53 3.95Q7.99 4.87 7.99 5.95L7.99 11.21Q7.99 12.29 8.53 13.21Q9.07 14.14 10 14.68Q10.92 15.22 12 15.22Q13.08 15.22 14 14.68Q14.93 14.14 15.47 13.21Q16.01 12.29 16.01 11.21L16.01 5.95Q16.01 4.87 15.47 3.95Q14.93 3.02 14 2.48Q13.08 1.94 12 1.94ZM19.51 11.23Q19.51 10.92 19.28 10.69Q19.06 10.46 18.74 10.46Q18.43 10.46 18.22 10.69Q18 10.92 18 11.23Q18 12.84 17.2 14.22Q16.39 15.6 15.01 16.4Q13.63 17.21 12 17.21Q10.37 17.21 8.99 16.4Q7.61 15.6 6.8 14.22Q6 12.84 6 11.23Q6 10.92 5.78 10.69Q5.57 10.46 5.26 10.46Q4.94 10.46 4.73 10.69Q4.51 10.92 4.51 11.23Q4.51 13.13 5.4 14.76Q6.29 16.39 7.84 17.44Q9.38 18.48 11.26 18.67L11.26 21.29Q11.26 21.6 11.47 21.82Q11.69 22.03 12 22.03Q12.31 22.03 12.53 21.82Q12.74 21.6 12.74 21.29L12.74 18.67Q14.62 18.48 16.16 17.44Q17.71 16.39 18.61 14.76Q19.51 13.13 19.51 11.23Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/info.svg b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/info.svg
new file mode 100644
index 0000000000..2210223f4f
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/info.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..207a999827
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device real-time speech recognition with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device real-time speech recognition with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "Real-time ASR"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device real-time speech recognition with Next-gen Kaldi"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..e4ff4f4e31
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "新一代Kaldi: 本地实时语音识别"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "新一代Kaldi: 本地实时语音识别"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "实时语音识别"
+    },
+    {
+      "name": "mic_reason",
+      "value": "使用新一代Kaldi, 访问麦克风进行本地实时语音识别 (不需要联网)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxStreamingAsr/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/hvigorfile.ts b/harmony-os/SherpaOnnxStreamingAsr/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxStreamingAsr/oh-package-lock.json5 b/harmony-os/SherpaOnnxStreamingAsr/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxStreamingAsr/oh-package.json5 b/harmony-os/SherpaOnnxStreamingAsr/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxStreamingAsr/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/harmony-os/SherpaOnnxTts/.gitignore b/harmony-os/SherpaOnnxTts/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/AppScope/app.json5 b/harmony-os/SherpaOnnxTts/AppScope/app.json5
new file mode 100644
index 0000000000..e5d0228acf
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx.tts",
+    "vendor": "next-gen Kaldi",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..2db317614d
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxTts"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxTts/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxTts/README.md b/harmony-os/SherpaOnnxTts/README.md
new file mode 100644
index 0000000000..03fb763137
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/README.md
@@ -0,0 +1,5 @@
+# Introduction
+
+Please see
+https://k2-fsa.github.io/sherpa/onnx/harmony-os/tts.html
+for how to run code in this folder.
diff --git a/harmony-os/SherpaOnnxTts/build-profile.json5 b/harmony-os/SherpaOnnxTts/build-profile.json5
new file mode 100644
index 0000000000..8e63d97684
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/build-profile.json5
@@ -0,0 +1,40 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/code-linter.json5 b/harmony-os/SherpaOnnxTts/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/.gitignore b/harmony-os/SherpaOnnxTts/entry/.gitignore
new file mode 100644
index 0000000000..e2713a2779
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/.gitignore
@@ -0,0 +1,6 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/build-profile.json5 b/harmony-os/SherpaOnnxTts/entry/build-profile.json5
new file mode 100644
index 0000000000..554d19f3bc
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/build-profile.json5
@@ -0,0 +1,33 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "sourceOption": {
+      "workers": [
+        "./src/main/ets/workers/NonStreamingTtsWorker.ets"
+      ]
+    }
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts b/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5
new file mode 100644
index 0000000000..debb8e01e2
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5
@@ -0,0 +1,29 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+    "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
+  },
+  "packages": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    },
+    "sherpa_onnx@1.10.32": {
+      "name": "sherpa_onnx",
+      "version": "1.10.32",
+      "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
+      "registryType": "ohpm",
+      "dependencies": {
+        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/oh-package.json5 b/harmony-os/SherpaOnnxTts/entry/oh-package.json5
new file mode 100644
index 0000000000..1f07c029ee
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/oh-package.json5
@@ -0,0 +1,12 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {
+    "sherpa_onnx": "1.10.42",
+  }
+}
+
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..a98e669c95
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,412 @@
+import { CircularBuffer } from 'sherpa_onnx';
+import worker, { MessageEvents } from '@ohos.worker';
+import { audio } from '@kit.AudioKit';
+import picker from '@ohos.file.picker';
+import fs from '@ohos.file.fs';
+import systemTime from '@ohos.systemTime';
+
+
+function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
+  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
+
+  const header = new ArrayBuffer(44);
+  const view = new DataView(header);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true); // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
+  view.setUint32(8, 0x45564157, true); // format // //                      t m f
+  view.setUint32(12, 0x20746d66, true); // subchunk1ID
+  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true); // numChannels: 1 channel
+  view.setUint32(24, sampleRate, true); // sampleRate
+  view.setUint32(28, sampleRate * 2, true); // byteRate
+  view.setUint16(32, 2, true); // blockAlign
+  view.setUint16(34, 16, true); // bitsPerSample
+  view.setUint32(36, 0x61746164, true); // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true); // subchunk2Size
+
+  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
+  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
+
+  fs.closeSync(fp.fd);
+}
+
+function toInt16Samples(samples: Float32Array): Int16Array {
+  const int16Samples = new Int16Array(samples.length);
+  for (let i = 0; i < samples.length; ++i) {
+    let s = samples[i] * 32767;
+    s = s > 32767 ? 32767 : s;
+    s = s < -32768 ? -32768 : s;
+    int16Samples[i] = s;
+  }
+
+  return int16Samples;
+}
+
+
+@Entry
+@Component
+struct Index {
+  @State currentIndex: number = 0;
+  @State title: string = 'Next-gen Kaldi: Text-to-speech';
+  @State info: string = '';
+  @State btnStartCaption: string = 'Start';
+  @State btnStartEnabled: boolean = false;
+  @State btnStopCaption: string = 'Stop';
+  @State btnStopEnabled: boolean = false;
+  @State btnSaveCaption: string = 'Save';
+  @State btnSaveEnabled: boolean = false;
+  @State progress: number = 0;
+  @State sid: string = '0';
+  @State speechSpeed: string = '1.0';
+  @State isGenerating: boolean = false;
+  @State initTtsDone: boolean = false;
+  @State ttsGeneratedDone: boolean = true;
+  @State numSpeakers: number = 1;
+  @State numThreads: number = 1;
+  @State initAudioDone: boolean = false;
+  private controller: TabsController = new TabsController();
+  private cancelled: boolean = false;
+  private sampleRate: number = 0;
+  private startTime: number = 0;
+  private stopTime: number = 0;
+  private inputText: string = '';
+  // it specifies only the initial capacity.
+  private workerInstance?: worker.ThreadWorker
+  private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets'
+  // note that circular buffer can automatically resize.
+  private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5);
+  private finalSamples: Float32Array | null = null;
+  private audioRenderer: audio.AudioRenderer | null = null;
+
+  initAudioRenderer() {
+    if (this.audioRenderer) {
+      console.log(`Audio renderer has already been created. Skip creating`);
+      return;
+    } // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5
+    console.log('Initializing audio renderer');
+    const audioStreamInfo: audio.AudioStreamInfo = {
+      samplingRate: this.sampleRate,
+      channels: audio.AudioChannel.CHANNEL_1, // 通道
+      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
+      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
+    };
+
+    const audioRendererInfo: audio.AudioRendererInfo = {
+      usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0
+    };
+
+    const audioRendererOptions: audio.AudioRendererOptions = {
+      streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo
+    };
+
+    audio.createAudioRenderer(audioRendererOptions, (err, renderer) => {
+      if (!err) {
+        console.log('audio renderer initialized successfully');
+        this.initAudioDone = true;
+        if (renderer) {
+          this.audioRenderer = renderer;
+          this.audioRenderer.on("writeData", this.audioPlayCallback);
+          if (this.sampleBuffer.size()) {
+            this.audioRenderer.start();
+          }
+        } else {
+          console.log(`returned audio renderer is ${renderer}`);
+        }
+      } else {
+        console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`);
+      }
+    });
+  }
+
+  async aboutToAppear() {
+    this.initAudioRenderer();
+
+    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
+      name: 'NonStreaming TTS worker'
+    });
+    this.workerInstance.onmessage = (e: MessageEvents) => {
+      const msgType = e.data['msgType'] as string;
+      console.log(`received msg from worker: ${msgType}`);
+
+      if (msgType == 'init-tts-done') {
+        this.info = 'Model initialized!\nPlease enter text and press start.';
+        this.sampleRate = e.data['sampleRate'] as number;
+        this.numSpeakers = e.data['numSpeakers'] as number;
+        this.numThreads = e.data['numThreads'] as number;
+
+        this.initTtsDone = true;
+      }
+
+      if (msgType == 'tts-generate-partial') {
+        if (this.cancelled) {
+          return;
+        }
+
+        const samples: Float32Array = e.data['samples'] as Float32Array;
+        const progress: number = e.data['progress'] as number;
+        this.progress = progress;
+
+        this.sampleBuffer.push(samples);
+
+        if (!this.initAudioDone) {
+          this.initAudioRenderer();
+        }
+
+        if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) {
+          this.audioRenderer.start();
+        }
+      }
+
+      if (msgType == 'tts-generate-done') {
+        this.isGenerating = false;
+        const samples: Float32Array = e.data['samples'] as Float32Array;
+
+        systemTime.getRealTime((err, data) => {
+
+          if (err) {
+            console.log(`Failed to get stop time`)
+          } else {
+            this.stopTime = data;
+
+            const audioDuration = samples.length / this.sampleRate;
+            const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
+            const RTF = elapsedSeconds / audioDuration;
+
+            this.info = `Audio duration: ${audioDuration} s
+Elapsed: ${elapsedSeconds} s
+RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
+Number of threads: ${this.numThreads}
+`;
+            if (this.cancelled) {
+              this.info += '\nCancelled.';
+            }
+          }
+        });
+
+        this.finalSamples = samples;
+        this.ttsGeneratedDone = true;
+        this.btnSaveEnabled = true;
+
+        this.ttsGeneratedDone = true;
+
+        if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING &&
+          this.sampleBuffer.size() == 0) {
+          this.sampleBuffer.push(samples);
+          this.progress = 1;
+          this.audioRenderer.start();
+        }
+
+        if (!this.initAudioDone) {
+          this.btnStartEnabled = true;
+          this.btnStopEnabled = false;
+          this.info += '\nAudio renderer is not initialized. Disable playing audio.';
+        }
+      }
+    }
+
+    this.info = 'Initializing TTS model ...';
+    this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() });
+  }
+
+  @Builder
+  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
+    Column() {
+      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
+      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
+    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
+      this.currentIndex = targetIndex;
+      this.controller.changeIndex(this.currentIndex);
+    })
+  }
+
+  build() {
+    Column() {
+      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
+            if (this.numSpeakers > 1) {
+              Row({ space: 10 }) {
+                Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%')
+
+                TextInput({ text: this.sid }).onChange((text) => {
+                  this.sid = text.trim();
+                }).width('20%')
+              }.justifyContent(FlexAlign.Center)
+            }
+
+            Row() {
+              Text('Speech speed').width('60%');
+
+              TextInput({ text: this.speechSpeed }).onChange((text) => {
+                this.speechSpeed = text.trim();
+              }).width('20%')
+            }
+
+            Row({ space: 10 }) {
+              Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => {
+                let sid = parseInt(this.sid);
+                if (sid.toString() != this.sid) {
+                  this.info = 'Please input a valid speaker ID';
+                  return;
+                }
+
+                let speed = parseFloat(this.speechSpeed);
+                if (isNaN(speed)) {
+                  this.info = 'Please enter a valid speech speed';
+                  return;
+                }
+
+                if (speed <= 0) {
+                  this.info = 'Please enter a positive speech speed';
+                  return;
+                }
+
+                if (this.workerInstance && this.initTtsDone) {
+                  this.info = 'Generating...';
+                  this.cancelled = false;
+                  this.finalSamples = null;
+                  this.sampleBuffer.reset();
+                  this.ttsGeneratedDone = false;
+                  this.progress = 0;
+
+                  this.btnStartEnabled = false;
+                  this.btnStopEnabled = true;
+                  this.btnSaveEnabled = false;
+                  console.log(`sending ${this.inputText}`)
+                  this.ttsGeneratedDone = false;
+                  this.startTime = await systemTime.getRealTime();
+                  this.workerInstance?.postMessage({
+                    msgType: 'tts-generate',
+                    text: this.inputText,
+                    sid: sid,
+                    speed: speed,
+                  });
+                  this.isGenerating = true;
+                  this.info = '';
+                } else {
+                  this.info = 'Failed to initialize tts model';
+                  this.btnStartEnabled = false;
+                }
+              });
+
+              Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => {
+                this.ttsGeneratedDone = true;
+                this.btnStartEnabled = true;
+                this.btnStopEnabled = false;
+                this.sampleBuffer.reset();
+                this.cancelled = true;
+                this.isGenerating = false;
+
+                if (this.workerInstance && this.initTtsDone) {
+                  this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' });
+                }
+                this.audioRenderer?.stop();
+              })
+
+              Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => {
+                if (!this.finalSamples || this.finalSamples.length == 0) {
+
+                  this.btnSaveEnabled = false;
+                  return;
+                }
+
+                let uri: string = '';
+
+                const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
+
+                const audioViewPicker = new picker.AudioViewPicker();
+
+                audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
+                  uri = audioSelectResult[0];
+                  if (this.finalSamples) {
+                    savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate);
+                    console.log(`Saved to ${uri}`);
+                    this.info += `\nSaved to ${uri}`;
+                  }
+                });
+              });
+            }
+
+            if (this.info != '') {
+              TextArea({ text: this.info }).focusable(false);
+            }
+            if (this.progress > 0) {
+              Row() {
+                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
+                  .width('80%')
+                  .height(20)
+                  .value(this.progress * 100);
+
+                Text(`${(this.progress * 100).toFixed(2)}%`).width('15%')
+              }.width('100%').justifyContent(FlexAlign.Center)
+            }
+
+            TextArea({ placeholder: 'Input text for TTS and click the start button' })
+              .width('100%')
+              .height('100%')
+              .focusable(this.isGenerating == false && this.initTtsDone)
+              .onChange((text) => {
+                this.inputText = text;
+                if (text.trim() == '') {
+                  this.btnStartEnabled = false;
+                  return;
+                }
+                this.btnStartEnabled = true;
+              })
+          }.width('100%')
+
+          // see https://composeicons.com/
+        }.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home')))
+
+        TabContent() {
+          Column({space: 10}) {
+            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
+            TextArea({text: `
+Everyting is open-sourced.
+
+It runs locally, without accessing the network
+
+See also https://github.com/k2-fsa/sherpa-onnx
+
+新一代 Kaldi QQ 和微信交流群: 请看
+
+https://k2-fsa.github.io/sherpa/social-groups.html
+
+微信公众号: 新一代 Kaldi
+            `}).width('100%')
+              .height('100%')
+              .focusable(false)
+          }.justifyContent(FlexAlign.Start)
+        }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
+      }.scrollable(false)
+    }
+  }
+
+  private audioPlayCallback = (buffer: ArrayBuffer) => {
+    const numSamples = buffer.byteLength / 2;
+    if (this.sampleBuffer.size() >= numSamples) {
+      const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples);
+
+      const int16Samples = new Int16Array(buffer);
+      for (let i = 0; i < numSamples; ++i) {
+        let s = samples[i] * 32767;
+        s = s > 32767 ? 32767 : s;
+        s = s < -32768 ? -32768 : s;
+        int16Samples[i] = s;
+      }
+      this.sampleBuffer.pop(numSamples);
+    } else {
+      (new Int16Array(buffer)).fill(0);
+      if (this.ttsGeneratedDone) {
+        this.audioRenderer?.stop();
+        this.btnStartEnabled = true;
+        this.btnStopEnabled = false;
+      }
+    }
+  };
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
new file mode 100644
index 0000000000..99751babd3
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
@@ -0,0 +1,391 @@
+import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
+
+import { fileIo as fs } from '@kit.CoreFileKit';
+
+import { OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput } from 'sherpa_onnx';
+import { buffer } from '@kit.ArkTS';
+
+const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
+
+let tts: OfflineTts;
+let cancelled = false;
+
+function mkdir(context: Context, parts: string[]) {
+  const path = parts.join('/');
+  if (fs.accessSync(path)) {
+    return;
+  }
+
+  const sandboxPath: string = context.getApplicationContext().filesDir;
+  let d = sandboxPath
+  for (const p of parts) {
+    d = d + '/' + p;
+
+    if (fs.accessSync(d)) {
+      continue;
+    }
+
+    fs.mkdirSync(d);
+  }
+}
+
+function copyRawFileDirToSandbox(context: Context, srcDir: string) {
+  let mgr = context.resourceManager;
+  const allFiles: string[] = listRawfileDir(mgr, srcDir);
+  for (const src of allFiles) {
+    const parts: string[] = src.split('/');
+    if (parts.length != 1) {
+      mkdir(context, parts.slice(0, -1));
+    }
+
+    copyRawFileToSandbox(context, src, src);
+  }
+}
+
+function copyRawFileToSandbox(context: Context, src: string,
+  dst: string) {
+  /* see
+   https://blog.csdn.net/weixin_44640245/article/details/142634846
+   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
+   */
+  let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
+
+  // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
+  let sandboxPath: string = context.getApplicationContext().filesDir;
+  let filepath = sandboxPath + '/' + dst;
+
+  if (fs.accessSync(filepath)) {
+    /* if the destination exists and has the expected file size
+       then we skip copying it
+     */
+    let stat = fs.statSync(filepath);
+    if (stat.size == uint8Array.length) {
+      return;
+    }
+  }
+
+  const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC);
+  fs.writeSync(fp.fd, buffer.from(uint8Array).buffer)
+  fs.close(fp.fd);
+}
+
+function initTts(context: Context): OfflineTts {
+  /* Such a design is to make it easier to build flutter APPs with
+     github actions for a variety of tts models
+
+     See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
+     for details
+   */
+
+  let modelDir = '';
+
+  // for VITS begin
+  let modelName = '';
+  // for VITS end
+
+  // for Matcha begin
+  let acousticModelName = '';
+  let vocoder = '';
+  // for Matcha end
+
+  // for Kokoro begin
+  let voices = '';
+  // for Kokoro end
+
+  let ruleFsts = '';
+  let ruleFars = '';
+  let lexicon = '';
+  let dataDir = '';
+  let dictDir = '';
+  /*
+    You can select an example below and change it according to match your
+    selected tts model
+   */
+
+  // ============================================================
+  // Your change starts here
+  // ============================================================
+
+  // Example 1:
+  // modelDir = 'vits-vctk';
+  // modelName = 'vits-vctk.onnx';
+  // lexicon = 'lexicon.txt';
+
+  // Example 2:
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
+  // modelDir = 'vits-piper-en_US-amy-low';
+  // modelName = 'en_US-amy-low.onnx';
+  // dataDir = 'espeak-ng-data';
+
+  // Example 3:
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
+  // modelDir = 'vits-icefall-zh-aishell3';
+  // modelName = 'model.onnx';
+  // ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst';
+  // ruleFars = 'rule.far';
+  // lexicon = 'lexicon.txt';
+
+  // Example 4:
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
+  // modelDir = 'vits-zh-hf-fanchen-C';
+  // modelName = 'vits-zh-hf-fanchen-C.onnx';
+  // lexicon = 'lexicon.txt';
+  // dictDir = 'dict';
+
+  // Example 5:
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
+  // modelDir = 'vits-coqui-de-css10';
+  // modelName = 'model.onnx';
+
+  // Example 6
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
+  // modelDir = 'vits-piper-en_US-libritts_r-medium';
+  // modelName = 'en_US-libritts_r-medium.onnx';
+  // dataDir = 'espeak-ng-data';
+
+  // Example 7
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
+  // modelDir = 'vits-melo-tts-zh_en';
+  // modelName = 'model.onnx';
+  // lexicon = 'lexicon.txt';
+  // dictDir = 'dict';
+  // ruleFsts = `date.fst,phone.fst,number.fst`;
+
+  // Example 8
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+  // modelDir = 'matcha-icefall-zh-baker';
+  // acousticModelName = 'model-steps-3.onnx';
+  // vocoder = 'hifigan_v2.onnx';
+  // lexicon = 'lexicon.txt';
+  // dictDir = 'dict';
+  // ruleFsts = `date.fst,phone.fst,number.fst`;
+
+  // Example 9
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+  // modelDir = 'matcha-icefall-en_US-ljspeech';
+  // acousticModelName = 'model-steps-3.onnx';
+  // vocoder = 'hifigan_v2.onnx';
+  // dataDir = 'espeak-ng-data';
+
+  // Example 10
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
+  // modelDir = 'kokoro-en-v0_19';
+  // modelName = 'model.onnx';
+  // voices = 'voices.bin'
+  // dataDir = 'espeak-ng-data';
+
+  // Example 11
+  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+  // modelDir = 'kokoro-multi-lang-v1_0';
+  // modelName = 'model.onnx';
+  // voices = 'voices.bin'
+  // dataDir = 'espeak-ng-data';
+  // dictDir = 'dict';
+  // lexicon = 'lexicon-us-en.txt,lexicon-zh.txt';
+  // ruleFsts = `date-zh.fst,phone-zh.fst,number-zh.fst`;
+
+  // ============================================================
+  // Please don't change the remaining part of this function
+  // ============================================================
+
+  if (modelName == '' && acousticModelName == '' && vocoder == '') {
+    throw new Error('You are supposed to select a model by changing the code before you run the app');
+  }
+
+  if (modelName != '' && acousticModelName != '') {
+    throw new Error('Please select either VITS or Matcha, not both');
+  }
+
+  if (acousticModelName != '' && vocoder == '') {
+    throw new Error('Please provider vocoder for matcha tts models');
+  }
+
+  if (modelName != '') {
+    modelName = modelDir + '/' + modelName;
+  }
+
+  if (acousticModelName != '') {
+    acousticModelName = modelDir + '/' + acousticModelName;
+  }
+
+  if (voices != '') {
+    voices = modelDir + '/' + voices;
+  }
+
+  if (ruleFsts != '') {
+    let fsts = ruleFsts.split(',')
+    let tmp: string[] = [];
+    for (const f of fsts) {
+      tmp.push(modelDir + '/' + f);
+    }
+    ruleFsts = tmp.join(',');
+  }
+
+  if (ruleFars != '') {
+    let fars = ruleFars.split(',')
+    let tmp: string[] = [];
+    for (const f of fars) {
+      tmp.push(modelDir + '/' + f);
+    }
+    ruleFars = tmp.join(',');
+  }
+
+  if (lexicon.includes(",")) {
+    let v = lexicon.split(',')
+    let tmp: string[] = [];
+    for (const f of v) {
+      tmp.push(modelDir + '/' + f);
+    }
+    lexicon = tmp.join(',');
+  } else if (lexicon != '') {
+    lexicon = modelDir + '/' + lexicon;
+  }
+
+  if (dataDir != '') {
+    copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
+    let sandboxPath: string = context.getApplicationContext().filesDir;
+    dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
+  }
+
+  if (dictDir != '') {
+    copyRawFileDirToSandbox(context, modelDir + '/' + dictDir)
+    let sandboxPath: string = context.getApplicationContext().filesDir;
+    dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
+  }
+
+  const tokens = modelDir + '/tokens.txt';
+
+  const config: OfflineTtsConfig = new OfflineTtsConfig();
+  if (voices != '') {
+    config.model.vits.model = '';
+  } else {
+    config.model.vits.model = modelName;
+  }
+
+  if (voices == '') {
+    config.model.vits.lexicon = lexicon;
+    config.model.vits.tokens = tokens;
+    config.model.vits.dataDir = dataDir;
+    config.model.vits.dictDir = dictDir;
+
+    config.model.matcha.acousticModel = acousticModelName;
+    config.model.matcha.vocoder = vocoder;
+    config.model.matcha.lexicon = lexicon;
+    config.model.matcha.tokens = tokens;
+    config.model.matcha.dataDir = dataDir;
+    config.model.matcha.dictDir = dictDir;
+  }
+
+  if (voices != '') {
+    config.model.kokoro.model = modelName;
+  } else {
+    config.model.kokoro.model = '';
+  }
+
+  if (voices != '') {
+    config.model.kokoro.voices = voices;
+    config.model.kokoro.tokens = tokens;
+    config.model.kokoro.dataDir = dataDir;
+    config.model.kokoro.dictDir = dictDir;
+    config.model.kokoro.lexicon = lexicon;
+  }
+
+  config.model.numThreads = 2;
+  config.model.debug = true;
+  config.ruleFsts = ruleFsts;
+  config.ruleFars = ruleFars;
+
+  return new OfflineTts(config, context.resourceManager);
+}
+
+interface TtsCallbackData {
+  samples: Float32Array;
+  progress: number;
+}
+
+function callback(data: TtsCallbackData): number {
+  workerPort.postMessage({
+    'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
+  });
+
+  // 0 means to stop generating in C++
+  // 1 means to continue generating in C++
+  return cancelled ? 0 : 1;
+}
+
+/**
+ * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessage = (e: MessageEvents) => {
+  const msgType = e.data['msgType'] as string;
+  console.log(`msg-type: ${msgType}`);
+  if (msgType == 'init-tts' && !tts) {
+    const context = e.data['context'] as Context;
+    tts = initTts(context);
+    workerPort.postMessage({
+      'msgType': 'init-tts-done',
+      sampleRate: tts.sampleRate,
+      numSpeakers: tts.numSpeakers,
+      numThreads: tts.config.model.numThreads,
+    });
+  }
+
+  if (msgType == 'tts-generate-cancel') {
+    cancelled = true;
+  }
+
+  if (msgType == 'tts-generate') {
+    const text = e.data['text'] as string;
+    console.log(`recevied text ${text}`);
+    const input: TtsInput = new TtsInput();
+    input.text = text;
+    input.sid = e.data['sid'] as number;
+    input.speed = e.data['speed'] as number;
+    input.callback = callback;
+
+    cancelled = false;
+    if (true) {
+      tts.generateAsync(input).then((ttsOutput: TtsOutput) => {
+        console.log(`sampleRate: ${ttsOutput.sampleRate}`);
+
+        workerPort.postMessage({
+          'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
+        });
+
+      });
+    } else {
+      const ttsOutput: TtsOutput = tts.generate(input);
+      workerPort.postMessage({
+        'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
+      });
+    }
+
+
+  }
+}
+
+/**
+ * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessageerror = (e: MessageEvents) => {
+}
+
+/**
+ * Defines the event handler to be called when an exception occurs during worker execution.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e error message
+ */
+workerPort.onerror = (e: ErrorEvent) => {
+}
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/module.json5 b/harmony-os/SherpaOnnxTts/entry/src/main/module.json5
new file mode 100644
index 0000000000..a1cea8b6a4
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/module.json5
@@ -0,0 +1,52 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..29b5d21cde
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device text-to-speech with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device text-to-speech with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "TTS"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg
new file mode 100644
index 0000000000..504af3400f
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/home.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg
new file mode 100644
index 0000000000..2210223f4f
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/info.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..29b5d21cde
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device text-to-speech with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device text-to-speech with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "TTS"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxTts/entry/src/main/resources/rawfile/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..c545b1b46e
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,16 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "使用新一代Kaldi进行本地离线语音合成"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "使用新一代Kaldi进行本地离线语音合成"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "本地语音合成"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxTts/hvigorfile.ts b/harmony-os/SherpaOnnxTts/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxTts/oh-package-lock.json5 b/harmony-os/SherpaOnnxTts/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxTts/oh-package.json5 b/harmony-os/SherpaOnnxTts/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxTts/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/.gitignore b/harmony-os/SherpaOnnxVadAsr/.gitignore
new file mode 100644
index 0000000000..d2ff20141c
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/.gitignore
@@ -0,0 +1,12 @@
+/node_modules
+/oh_modules
+/local.properties
+/.idea
+**/build
+/.hvigor
+.cxx
+/.clangd
+/.clang-format
+/.clang-tidy
+**/.test
+/.appanalyzer
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/AppScope/app.json5 b/harmony-os/SherpaOnnxVadAsr/AppScope/app.json5
new file mode 100644
index 0000000000..3a141cf2ee
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/AppScope/app.json5
@@ -0,0 +1,10 @@
+{
+  "app": {
+    "bundleName": "com.k2fsa.sherpa.onnx.vad.asr",
+    "vendor": "example",
+    "versionCode": 1000000,
+    "versionName": "1.0.0",
+    "icon": "$media:app_icon",
+    "label": "$string:app_name"
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/element/string.json b/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/element/string.json
new file mode 100644
index 0000000000..9e27cd6046
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/element/string.json
@@ -0,0 +1,8 @@
+{
+  "string": [
+    {
+      "name": "app_name",
+      "value": "SherpaOnnxVadAsr"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/media/app_icon.png b/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/media/app_icon.png
new file mode 100644
index 0000000000..a39445dc87
Binary files /dev/null and b/harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/media/app_icon.png differ
diff --git a/harmony-os/SherpaOnnxVadAsr/README.md b/harmony-os/SherpaOnnxVadAsr/README.md
new file mode 100644
index 0000000000..9d9338e8a0
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/README.md
@@ -0,0 +1,5 @@
+# Introduction
+
+Please see
+https://k2-fsa.github.io/sherpa/onnx/harmony-os/vad-asr.html
+for how to run code in this folder.
diff --git a/harmony-os/SherpaOnnxVadAsr/build-profile.json5 b/harmony-os/SherpaOnnxVadAsr/build-profile.json5
new file mode 100644
index 0000000000..8e63d97684
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/build-profile.json5
@@ -0,0 +1,40 @@
+{
+  "app": {
+    "signingConfigs": [],
+    "products": [
+      {
+        "name": "default",
+        "signingConfig": "default",
+        "compatibleSdkVersion": "4.0.0(10)",
+        "runtimeOS": "HarmonyOS",
+        "buildOption": {
+          "strictMode": {
+            "caseSensitiveCheck": true,
+          }
+        }
+      }
+    ],
+    "buildModeSet": [
+      {
+        "name": "debug",
+      },
+      {
+        "name": "release"
+      }
+    ]
+  },
+  "modules": [
+    {
+      "name": "entry",
+      "srcPath": "./entry",
+      "targets": [
+        {
+          "name": "default",
+          "applyToProducts": [
+            "default"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/code-linter.json5 b/harmony-os/SherpaOnnxVadAsr/code-linter.json5
new file mode 100644
index 0000000000..77b31b517a
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/code-linter.json5
@@ -0,0 +1,20 @@
+{
+  "files": [
+    "**/*.ets"
+  ],
+  "ignore": [
+    "**/src/ohosTest/**/*",
+    "**/src/test/**/*",
+    "**/src/mock/**/*",
+    "**/node_modules/**/*",
+    "**/oh_modules/**/*",
+    "**/build/**/*",
+    "**/.preview/**/*"
+  ],
+  "ruleSet": [
+    "plugin:@performance/recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "rules": {
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/.gitignore b/harmony-os/SherpaOnnxVadAsr/entry/.gitignore
new file mode 100644
index 0000000000..53823fed89
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/.gitignore
@@ -0,0 +1,7 @@
+/node_modules
+/oh_modules
+/.preview
+/build
+/.cxx
+/.test
+*.har
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/README.md b/harmony-os/SherpaOnnxVadAsr/entry/README.md
new file mode 100644
index 0000000000..8f6c7fadce
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/README.md
@@ -0,0 +1,7 @@
+# Introduction
+
+Please download ./sherpa_onnx-v1.10.42.har
+from <https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>
+
+Hint: For users who have no access to huggingface, please use
+<https://hf-mirror.com/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/build-profile.json5 b/harmony-os/SherpaOnnxVadAsr/entry/build-profile.json5
new file mode 100644
index 0000000000..fa8ffe231b
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/build-profile.json5
@@ -0,0 +1,33 @@
+{
+  "apiType": "stageMode",
+  "buildOption": {
+    "sourceOption": {
+      "workers": [
+        './src/main/ets/workers/NonStreamingAsrWithVadWorker.ets'
+      ]
+    }
+  },
+  "buildOptionSet": [
+    {
+      "name": "release",
+      "arkOptions": {
+        "obfuscation": {
+          "ruleOptions": {
+            "enable": false,
+            "files": [
+              "./obfuscation-rules.txt"
+            ]
+          }
+        }
+      }
+    },
+  ],
+  "targets": [
+    {
+      "name": "default"
+    },
+    {
+      "name": "ohosTest",
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/hvigorfile.ts b/harmony-os/SherpaOnnxVadAsr/entry/hvigorfile.ts
new file mode 100644
index 0000000000..c6edcd9048
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { hapTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/obfuscation-rules.txt b/harmony-os/SherpaOnnxVadAsr/entry/obfuscation-rules.txt
new file mode 100644
index 0000000000..272efb6ca3
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/obfuscation-rules.txt
@@ -0,0 +1,23 @@
+# Define project specific obfuscation rules here.
+# You can include the obfuscation configuration files in the current module's build-profile.json5.
+#
+# For more details, see
+#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
+
+# Obfuscation options:
+# -disable-obfuscation: disable all obfuscations
+# -enable-property-obfuscation: obfuscate the property names
+# -enable-toplevel-obfuscation: obfuscate the names in the global scope
+# -compact: remove unnecessary blank spaces and all line feeds
+# -remove-log: remove all console.* statements
+# -print-namecache: print the name cache that contains the mapping from the old names to new names
+# -apply-namecache: reuse the given cache file
+
+# Keep options:
+# -keep-property-name: specifies property names that you want to keep
+# -keep-global-name: specifies names that you want to keep in the global scope
+
+-enable-property-obfuscation
+-enable-toplevel-obfuscation
+-enable-filename-obfuscation
+-enable-export-obfuscation
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5 b/harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5
new file mode 100644
index 0000000000..debb8e01e2
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5
@@ -0,0 +1,29 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+    "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
+  },
+  "packages": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    },
+    "sherpa_onnx@1.10.32": {
+      "name": "sherpa_onnx",
+      "version": "1.10.32",
+      "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
+      "registryType": "ohpm",
+      "dependencies": {
+        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5 b/harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5
new file mode 100644
index 0000000000..10aa076e86
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5
@@ -0,0 +1,13 @@
+{
+  "name": "entry",
+  "version": "1.0.0",
+  "description": "Please describe the basic information.",
+  "main": "",
+  "author": "",
+  "license": "",
+  "dependencies": {
+    // please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
+    "sherpa_onnx": "1.10.42",
+  }
+}
+
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entryability/EntryAbility.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entryability/EntryAbility.ets
new file mode 100644
index 0000000000..679d914538
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entryability/EntryAbility.ets
@@ -0,0 +1,43 @@
+import AbilityConstant from '@ohos.app.ability.AbilityConstant';
+import hilog from '@ohos.hilog';
+import UIAbility from '@ohos.app.ability.UIAbility';
+import Want from '@ohos.app.ability.Want';
+import window from '@ohos.window';
+
+export default class EntryAbility extends UIAbility {
+  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
+  }
+
+  onDestroy(): void {
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
+  }
+
+  onWindowStageCreate(windowStage: window.WindowStage): void {
+    // Main window is created, set main page for this ability
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
+
+    windowStage.loadContent('pages/Index', (err) => {
+      if (err.code) {
+        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
+        return;
+      }
+      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
+    });
+  }
+
+  onWindowStageDestroy(): void {
+    // Main window is destroyed, release UI related resources
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
+  }
+
+  onForeground(): void {
+    // Ability has brought to foreground
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
+  }
+
+  onBackground(): void {
+    // Ability has back to background
+    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
new file mode 100644
index 0000000000..d2c48b4212
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
@@ -0,0 +1,12 @@
+import hilog from '@ohos.hilog';
+import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
+
+export default class EntryBackupAbility extends BackupExtensionAbility {
+  async onBackup() {
+    hilog.info(0x0000, 'testTag', 'onBackup ok');
+  }
+
+  async onRestore(bundleVersion: BundleVersion) {
+    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets
new file mode 100644
index 0000000000..5bd4b092ff
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets
@@ -0,0 +1,318 @@
+import { LengthUnit } from '@kit.ArkUI';
+import worker, { MessageEvents } from '@ohos.worker';
+import { BusinessError } from '@kit.BasicServicesKit';
+import { picker } from '@kit.CoreFileKit';
+
+import { Permissions } from '@kit.AbilityKit';
+import { allAllowed, requestPermissions } from './Permission';
+import { audio } from '@kit.AudioKit';
+
+
+@Entry
+@Component
+struct Index {
+  @State title: string = 'Next-gen Kaldi: VAD + ASR';
+  @State currentIndex: number = 0;
+  @State resultForFile: string = '';
+  @State progressForFile: number = 0;
+  @State selectFileBtnEnabled: boolean = false;
+  @State lang: string = 'English';
+  @State resultForMic: string = '';
+  @State micStarted: boolean = false;
+  @State message: string = 'Start recording';
+  @State micInitDone: boolean = false;
+  private controller: TabsController = new TabsController();
+  private workerInstance?: worker.ThreadWorker
+  private readonly scriptURL: string = 'entry/ets/workers/NonStreamingAsrWithVadWorker.ets'
+  private mic?: audio.AudioCapturer;
+  private sampleList: Float32Array[] = []
+
+  flatten(samples: Float32Array[]): Float32Array {
+    let n = 0;
+    for (let i = 0; i < samples.length; ++i) {
+      n += samples[i].length;
+    }
+
+    const ans: Float32Array = new Float32Array(n);
+    let offset: number = 0;
+    for (let i = 0; i < samples.length; ++i) {
+      ans.set(samples[i], offset);
+      offset += samples[i].length;
+    }
+
+    return ans;
+  }
+
+  async initMic() {
+    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
+    let allowed: boolean = await allAllowed(permissions);
+    if (!allowed) {
+      console.log("request to access the microphone");
+      const status: boolean = await requestPermissions(permissions);
+
+      if (!status) {
+        console.error('access to microphone is denied')
+        this.resultForMic = "Failed to get microphone permission. Please retry";
+        return;
+      }
+
+      allowed = await allAllowed(permissions);
+      if (!allowed) {
+        console.error('failed to get microphone permission');
+        this.resultForMic = "Failed to get microphone permission. Please retry";
+        return;
+      }
+    } else {
+      console.log("allowed to access microphone");
+    }
+
+    const audioStreamInfo: audio.AudioStreamInfo = {
+      samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000,
+      channels: audio.AudioChannel.CHANNEL_1,
+      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
+      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
+    };
+
+    const audioCapturerInfo: audio.AudioCapturerInfo = {
+      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
+    };
+
+    const audioCapturerOptions: audio.AudioCapturerOptions = {
+      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
+
+    };
+    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
+      if (err) {
+        console.error(`error code is ${err.code}, error message is ${err.message}`);
+        this.resultForMic = 'Failed to init microphone';
+      } else {
+        console.info(`init mic successfully`);
+        this.mic = data;
+        this.mic.on('readData', this.micCallback);
+
+        if (this.workerInstance) {
+          this.workerInstance.postMessage({ msgType: 'init-vad-mic', context: getContext() });
+        }
+      }
+    });
+  }
+
+  async aboutToAppear() {
+    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
+      name: 'NonStreaming ASR worker'
+    });
+
+    this.workerInstance.onmessage = (e: MessageEvents) => {
+      const msgType = e.data['msgType'] as string;
+      console.log(`received msg from worker: ${msgType}`);
+
+      if (msgType == 'init-vad-mic-done') {
+        this.micInitDone = true;
+      }
+
+      if (msgType == 'init-non-streaming-asr-done') {
+        this.selectFileBtnEnabled = true;
+        this.resultForFile = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
+      }
+
+      if (msgType == 'non-streaming-asr-vad-decode-done') {
+        this.resultForFile = e.data['text'] as string + '\n';
+      }
+
+      if (msgType == 'non-streaming-asr-vad-decode-partial') {
+        if (this.resultForFile == '') {
+          this.resultForFile = e.data['text'] as string;
+        } else {
+          this.resultForFile += '\n\n' + e.data['text'] as string;
+        }
+      }
+
+      if (msgType == 'non-streaming-asr-vad-decode-error') {
+        this.resultForFile = e.data['text'] as string;
+      }
+
+      if (msgType == 'non-streaming-asr-vad-decode-progress') {
+        this.progressForFile = e.data['progress'] as number;
+
+        this.selectFileBtnEnabled = this.progressForFile >= 100;
+      }
+
+      if (msgType == 'non-streaming-asr-vad-mic-partial') {
+        if (this.resultForMic == '') {
+          this.resultForMic = e.data['text'] as string;
+        } else {
+          this.resultForMic += '\n\n' + e.data['text'] as string;
+        }
+      }
+
+      if (msgType == 'non-streaming-asr-vad-mic-error') {
+        this.resultForMic = e.data['text'] as string;
+      }
+    }
+
+    const context = getContext();
+    this.resultForFile = 'Initializing models';
+    this.workerInstance.postMessage({ msgType: 'init-vad', context });
+    this.workerInstance.postMessage({ msgType: 'init-non-streaming-asr', context });
+
+    await this.initMic();
+  }
+
+  @Builder
+  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
+    Column() {
+      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
+      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
+    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
+      this.currentIndex = targetIndex;
+      this.controller.changeIndex(this.currentIndex);
+    })
+  }
+
+  build() {
+    Column() {
+      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
+
+            Button('Select .wav file (16kHz) ')
+              .enabled(this.selectFileBtnEnabled)
+              .fontSize(13)
+              .width(296)
+              .height(60)
+              .onClick(() => {
+                this.resultForFile = '';
+                this.progressForFile = 0;
+
+                const documentSelectOptions = new picker.DocumentSelectOptions();
+                documentSelectOptions.maxSelectNumber = 1;
+                documentSelectOptions.fileSuffixFilters = ['.wav'];
+                const documentViewPicker = new picker.DocumentViewPicker();
+                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
+                  console.log(`Result: ${result}`);
+
+                  if (!result[0]) {
+                    this.resultForFile = 'Please select a file to decode';
+                    this.selectFileBtnEnabled = true;
+                    return;
+                  }
+
+                  if (this.workerInstance) {
+                    this.workerInstance.postMessage({
+                      msgType: 'non-streaming-asr-vad-decode', filename: result[0],
+                    });
+                  } else {
+                    console.log(`this worker instance is undefined ${this.workerInstance}`);
+                  }
+                }).catch((err: BusinessError) => {
+                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
+                })
+
+              })
+
+            Text(`Supported languages: ${this.lang}`)
+
+            if (this.progressForFile > 0) {
+              Row() {
+                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
+                  .width('80%')
+                  .height(20)
+                  .value(this.progressForFile);
+
+                Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
+              }.width('100%').justifyContent(FlexAlign.Center)
+            }
+
+            TextArea({ text: this.resultForFile })
+              .width('100%')
+              .lineSpacing({ value: 10, unit: LengthUnit.VP })
+              .height('100%');
+          }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
+        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
+            Button(this.message).enabled(this.micInitDone).onClick(() => {
+              console.log('clicked mic button');
+              this.resultForMic = '';
+              if (this.mic) {
+                if (this.micStarted) {
+                  this.mic.stop();
+                  this.message = "Start recording";
+                  this.micStarted = false;
+                  console.log('mic stopped');
+
+                  const samples = this.flatten(this.sampleList);
+                  let s = 0;
+                  for (let i = 0; i < samples.length; ++i) {
+                    s += samples[i];
+                  }
+                  console.log(`samples ${samples.length}, sum: ${s}`);
+
+                  if (this.workerInstance) {
+                    console.log('decode mic');
+                    this.workerInstance.postMessage({
+                      msgType: 'non-streaming-asr-vad-mic', samples,
+                    });
+                  } else {
+                    console.log(`this worker instance is undefined ${this.workerInstance}`);
+                  }
+                } else {
+                  this.sampleList = [];
+                  this.mic.start();
+                  this.message = "Stop recording";
+                  this.micStarted = true;
+                  console.log('mic started');
+                }
+              }
+            });
+
+            Text(`Supported languages: ${this.lang}`)
+
+            TextArea({ text: this.resultForMic })
+              .width('100%')
+              .lineSpacing({ value: 10, unit: LengthUnit.VP })
+              .width('100%')
+              .height('100%');
+          }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
+        }
+        .tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'),
+          $r('app.media.icon_mic')))
+
+        TabContent() {
+          Column({ space: 10 }) {
+            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
+            TextArea({
+              text: `
+Everyting is open-sourced.
+
+It runs locally, without accessing the network
+
+See also https://github.com/k2-fsa/sherpa-onnx
+
+新一代 Kaldi QQ 和微信交流群: 请看
+
+https://k2-fsa.github.io/sherpa/social-groups.html
+
+微信公众号: 新一代 Kaldi
+            `
+            }).width('100%').height('100%').focusable(false)
+          }.justifyContent(FlexAlign.Start)
+        }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info')))
+
+      }.scrollable(false)
+    }.width('100%').justifyContent(FlexAlign.Start)
+  }
+
+  private micCallback = (buffer: ArrayBuffer) => {
+    const view: Int16Array = new Int16Array(buffer);
+
+    const samplesFloat: Float32Array = new Float32Array(view.length);
+    for (let i = 0; i < view.length; ++i) {
+      samplesFloat[i] = view[i] / 32768.0;
+    }
+    this.sampleList.push(samplesFloat);
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets
new file mode 100644
index 0000000000..f6a263ead0
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets
@@ -0,0 +1,250 @@
+// Please keep in sync with
+// https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/kotlin-api/OfflineRecognizer.kt#L184
+
+import { OfflineModelConfig } from 'sherpa_onnx';
+
+export function getOfflineModelConfig(type: number): OfflineModelConfig {
+  const c: OfflineModelConfig = new OfflineModelConfig();
+  switch (type) {
+    case 0: {
+      const modelDir = 'sherpa-onnx-paraformer-zh-2023-09-14'
+      c.paraformer.model = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "paraformer";
+
+      break;
+    }
+
+    case 1: {
+      const modelDir = 'icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04'
+      c.transducer.encoder = `$modelDir}/encoder-epoch-30-avg-4.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-30-avg-4.onnx`;
+      c.transducer.encoder = `${modelDir}/joiner-epoch-30-avg-4.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 2: {
+      const modelDir = 'sherpa-onnx-whisper-tiny.en';
+      c.whisper.encoder = `${modelDir}/tiny.en-encoder.int8.onnx`;
+      c.whisper.decoder = `${modelDir}/tiny.en-decoder.int8.onnx`;
+      c.tokens = `${modelDir}/tiny.en-tokens.txt`;
+      c.modelType = "whisper";
+
+      break;
+    }
+
+    case 3: {
+      const modelDir = 'sherpa-onnx-whisper-base.en';
+      c.whisper.encoder = `${modelDir}/base.en-encoder.int8.onnx`;
+      c.whisper.decoder = `${modelDir}/base.en-decoder.int8.onnx`;
+      c.tokens = `${modelDir}/base.en-tokens.txt`;
+      c.modelType = "whisper";
+
+      break;
+    }
+
+    case 4: {
+      const modelDir = "icefall-asr-zipformer-wenetspeech-20230615";
+      c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-4.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-4.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-4.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 5: {
+      const modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2";
+      c.transducer.encoder = `${modelDir}/encoder-epoch-20-avg-1.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-20-avg-1.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-20-avg-1.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 6: {
+      const modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512";
+      c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 7: {
+      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k"
+      c.nemoCtc.model = `${modelDir}/model.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 8: {
+      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-24500"
+      c.nemoCtc.model = `${modelDir}/model.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 9: {
+      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288"
+      c.nemoCtc.model = `${modelDir}/model.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 10: {
+      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-es-1424"
+      c.nemoCtc.model = `${modelDir}/model.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 11: {
+      const modelDir = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04"
+      c.telespeechCtc = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "telespeech_ctc";
+
+      break;
+    }
+
+    case 12: {
+      const modelDir = "sherpa-onnx-zipformer-thai-2024-06-20"
+      c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-5.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-5.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-5.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 13: {
+      const modelDir = "sherpa-onnx-zipformer-korean-2024-06-24";
+      c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 14: {
+      const modelDir = "sherpa-onnx-paraformer-zh-small-2024-03-09";
+      c.paraformer.model = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "paraformer";
+
+      break;
+    }
+
+    case 15: {
+      const modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17";
+      c.senseVoice.model = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 16: {
+      const modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01";
+      c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 17: {
+      const modelDir = "sherpa-onnx-zipformer-ru-2024-09-18";
+      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 18: {
+      const modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18";
+      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    case 19: {
+      const modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24";
+      c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 20: {
+      const modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24";
+      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "nemo_transducer";
+
+      break;
+    }
+
+    case 21: {
+      const modelDir = "sherpa-onnx-moonshine-tiny-en-int8";
+      c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
+      c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
+      c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
+      c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 22: {
+      const modelDir = "sherpa-onnx-moonshine-base-en-int8";
+      c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
+      c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
+      c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
+      c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+
+      break;
+    }
+
+    case 23: {
+      const modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22";
+      c.transducer.encoder = `${modelDir}/encoder-epoch-34-avg-19.int8.onnx`;
+      c.transducer.decoder = `${modelDir}/decoder-epoch-34-avg-19.onnx`;
+      c.transducer.joiner = `${modelDir}/joiner-epoch-34-avg-19.int8.onnx`;
+      c.tokens = `${modelDir}/tokens.txt`;
+      c.modelType = "transducer";
+
+      break;
+    }
+
+    default: {
+      console.log(`Please specify a supported type. Given type ${type}`);
+    }
+  }
+
+  return c;
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets
new file mode 100644
index 0000000000..40ef391ad0
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets
@@ -0,0 +1,26 @@
+// This file is modified from
+// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
+import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';
+
+export function allAllowed(permissions: Permissions[]): boolean {
+  if (permissions.length == 0) {
+    return false;
+  }
+
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+
+  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
+
+  let tokenID: number = bundleInfo.appInfo.accessTokenId;
+
+  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
+  mgr.checkAccessTokenSync(tokenID, permission));
+}
+
+export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
+  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
+  const context: Context = getContext() as common.UIAbilityContext;
+
+  const result = await mgr.requestPermissionsFromUser(context, permissions);
+  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets
new file mode 100644
index 0000000000..3076183d05
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets
@@ -0,0 +1,250 @@
+import { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope, worker } from '@kit.ArkTS';
+import {
+  OfflineRecognizer,
+  OfflineRecognizerConfig,
+  OfflineStream,
+  OnlineRecognizerResult,
+  readWaveFromBinary,
+  SileroVadConfig,
+  SpeechSegment,
+  Vad,
+  VadConfig,
+} from 'sherpa_onnx';
+import { Context } from '@kit.AbilityKit';
+import { fileIo } from '@kit.CoreFileKit';
+import { getOfflineModelConfig } from '../pages/NonStreamingAsrModels';
+import { BusinessError } from '@kit.BasicServicesKit';
+
+const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
+
+let recognizer: OfflineRecognizer;
+let vad: Vad; // vad for decoding files
+let vadMic: Vad; // vad for mic
+
+function initVad(context: Context): Vad {
+  let mgr = context.resourceManager;
+  const config: VadConfig = new VadConfig(
+    new SileroVadConfig(
+      'silero_vad.onnx',
+      0.5,
+      0.25,
+      0.5,
+      512,
+    ),
+    16000,
+    true,
+    1,
+  );
+
+  const bufferSizeInSeconds = 60;
+  return new Vad(config, bufferSizeInSeconds, mgr);
+}
+
+function initNonStreamingAsr(context: Context): OfflineRecognizer {
+  let mgr = context.resourceManager;
+  const config: OfflineRecognizerConfig = new OfflineRecognizerConfig();
+
+  // Note that you can switch to a new model by changing type
+  //
+  // If you use type = 2, which means you will use
+  // sherpa-onnx-whisper-tiny.en
+  // we assume you have the following folder structure in you resources/rawfile
+  /*
+  (py38) fangjuns-MacBook-Pro:main fangjun$ pwd
+  /Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxVadAsr/entry/src/main
+  (py38) fangjuns-MacBook-Pro:main fangjun$ tree resources/rawfile/
+  resources/rawfile/
+  ├── sherpa-onnx-whisper-tiny.en
+  │   ├── README.md
+  │   ├── tiny.en-decoder.int8.onnx
+  │   ├── tiny.en-encoder.int8.onnx
+  │   └── tiny.en-tokens.txt
+  └── silero_vad.onnx
+
+  1 directory, 5 files
+   */
+  const type = 2;
+  config.modelConfig = getOfflineModelConfig(type);
+  config.modelConfig.debug = true;
+  config.ruleFsts = '';
+  return new OfflineRecognizer(config, mgr);
+}
+
+interface Wave {
+  samples: Float32Array;
+  sampleRate: number;
+}
+
+function decodeFile(filename: string): string {
+  vad.reset();
+
+  const fp = fileIo.openSync(filename);
+  const stat = fileIo.statSync(fp.fd);
+  const arrayBuffer = new ArrayBuffer(stat.size);
+  fileIo.readSync(fp.fd, arrayBuffer);
+  const data: Uint8Array = new Uint8Array(arrayBuffer);
+
+  const wave: Wave = readWaveFromBinary(data);
+  if (wave.sampleRate != 16000) {
+    return `the sample rate in ${filename} is not 16000Hz. Given: ${wave.sampleRate}Hz.\nPlease select a wav file of 16kHz.`;
+  }
+
+  console.log(`sample rate ${wave.sampleRate}`);
+  console.log(`samples length ${wave.samples.length}`);
+  const resultList: string[] = [];
+
+  const windowSize: number = vad.config.sileroVad.windowSize;
+  for (let i = 0; i < wave.samples.length; i += windowSize) {
+    const thisWindow: Float32Array = wave.samples.subarray(i, i + windowSize)
+    vad.acceptWaveform(thisWindow);
+    if (i + windowSize >= wave.samples.length) {
+      vad.flush();
+    }
+    while (!vad.isEmpty()) {
+      const segment: SpeechSegment = vad.front();
+      const _startTime: number = (segment.start / wave.sampleRate);
+      const _endTime: number = _startTime + segment.samples.length / wave.sampleRate;
+
+      if (_endTime - _startTime < 0.2) {
+        vad.pop();
+        continue;
+      }
+
+      const startTime: string = _startTime.toFixed(2);
+      const endTime: string = _endTime.toFixed(2);
+
+      const progress: number = (segment.start + segment.samples.length) / wave.samples.length * 100;
+
+      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress });
+
+      const stream: OfflineStream = recognizer.createStream();
+      stream.acceptWaveform({ samples: segment.samples, sampleRate: wave.sampleRate });
+      recognizer.decode(stream);
+      const result: OnlineRecognizerResult = recognizer.getResult(stream);
+
+      const text: string = `${startTime} -- ${endTime} ${result.text}`
+      resultList.push(text);
+      console.log(`partial result ${text}`);
+
+      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-partial', text });
+
+      vad.pop();
+    }
+  }
+
+  return resultList.join('\n\n');
+}
+
+function decodeMic(samples: Float32Array) {
+  const resultList: string[] = [];
+
+  const windowSize: number = vad.config.sileroVad.windowSize;
+  for (let i = 0; i < samples.length; i += windowSize) {
+    const thisWindow: Float32Array = samples.subarray(i, i + windowSize)
+    vad.acceptWaveform(thisWindow);
+    if (i + windowSize >= samples.length) {
+      vad.flush();
+    }
+    while (!vad.isEmpty()) {
+      const segment: SpeechSegment = vad.front();
+      const _startTime: number = (segment.start / 16000);
+      const _endTime: number = _startTime + segment.samples.length / 16000;
+
+      if (_endTime - _startTime < 0.2) {
+        vad.pop();
+        continue;
+      }
+
+      const startTime: string = _startTime.toFixed(2);
+      const endTime: string = _endTime.toFixed(2);
+
+      const stream: OfflineStream = recognizer.createStream();
+      stream.acceptWaveform({ samples: segment.samples, sampleRate: 16000 });
+      recognizer.decode(stream);
+      const result: OnlineRecognizerResult = recognizer.getResult(stream);
+
+      const text: string = `${startTime} -- ${endTime} ${result.text}`
+      resultList.push(text);
+      console.log(`partial result ${text}`);
+
+      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-mic-partial', text });
+
+      vad.pop();
+    }
+  }
+
+  return resultList.join('\n\n');
+}
+
+/**
+ * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessage = (e: MessageEvents) => {
+  const msgType = e.data['msgType'] as string;
+  console.log(`msg-type: ${msgType}`)
+  if (msgType == 'init-vad' && !vad) {
+    const context = e.data['context'] as Context;
+    vad = initVad(context);
+    console.log('init vad done');
+    workerPort.postMessage({ 'msgType': 'init-vad-done' });
+  }
+
+  if (msgType == 'init-vad-mic' && !vadMic) {
+    const context = e.data['context'] as Context;
+    vadMic = initVad(context);
+    console.log('init vad mic done');
+    workerPort.postMessage({ 'msgType': 'init-vad-mic-done' });
+  }
+
+  if (msgType == 'init-non-streaming-asr' && !recognizer) {
+    const context = e.data['context'] as Context;
+    recognizer = initNonStreamingAsr(context);
+    console.log('init non streaming ASR done');
+    workerPort.postMessage({ 'msgType': 'init-non-streaming-asr-done' });
+  }
+
+  if (msgType == 'non-streaming-asr-vad-decode') {
+    const filename = e.data['filename'] as string;
+    console.log(`decoding ${filename}`);
+    try {
+      const text = decodeFile(filename);
+      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-done', text });
+    } catch (e) {
+      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-error', text: `Failed to decode ${filename}` });
+    }
+
+    workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress: 100 });
+  }
+
+  if (msgType == 'non-streaming-asr-vad-mic') {
+    const samples: Float32Array = e.data['samples'] as Float32Array;
+    vadMic.reset();
+    try {
+      const text = decodeMic(samples);
+      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-done', text });
+    } catch (e) {
+      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-error', text: `Failed to decode` });
+    }
+  }
+}
+
+/**
+ * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e message data
+ */
+workerPort.onmessageerror = (e: MessageEvents) => {
+}
+
+/**
+ * Defines the event handler to be called when an exception occurs during worker execution.
+ * The event handler is executed in the worker thread.
+ *
+ * @param e error message
+ */
+workerPort.onerror = (e: ErrorEvent) => {
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5
new file mode 100644
index 0000000000..660c2bb47a
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5
@@ -0,0 +1,64 @@
+{
+  "module": {
+    "name": "entry",
+    "type": "entry",
+    "description": "$string:module_desc",
+    "mainElement": "EntryAbility",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false,
+    "pages": "$profile:main_pages",
+    "abilities": [
+      {
+        "name": "EntryAbility",
+        "srcEntry": "./ets/entryability/EntryAbility.ets",
+        "description": "$string:EntryAbility_desc",
+        "icon": "$media:layered_image",
+        "label": "$string:EntryAbility_label",
+        "startWindowIcon": "$media:startIcon",
+        "startWindowBackground": "$color:start_window_background",
+        "exported": true,
+        "skills": [
+          {
+            "entities": [
+              "entity.system.home"
+            ],
+            "actions": [
+              "action.system.home"
+            ]
+          }
+        ]
+      }
+    ],
+    "extensionAbilities": [
+      {
+        "name": "EntryBackupAbility",
+        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
+        "type": "backup",
+        "exported": false,
+        "metadata": [
+          {
+            "name": "ohos.extension.backup",
+            "resource": "$profile:backup_config"
+          }
+        ],
+      }
+    ],
+    "requestPermissions": [
+      {
+        "name": "ohos.permission.MICROPHONE",
+        "reason": "$string:mic_reason",
+        "usedScene": {
+          "abilities": [
+            "EntryAbility",
+          ],
+          "when": "inuse",
+        }
+      }
+    ]
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/color.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/color.json
new file mode 100644
index 0000000000..3c712962da
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/color.json
@@ -0,0 +1,8 @@
+{
+  "color": [
+    {
+      "name": "start_window_background",
+      "value": "#FFFFFF"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json
new file mode 100644
index 0000000000..a47ca31cc5
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device VAD+ASR with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device VAD+ASR with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "On-device speech recognition"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device speech recognition with Next-gen Kaldi"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/background.png b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/background.png
new file mode 100644
index 0000000000..f939c9fa8c
Binary files /dev/null and b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/background.png differ
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/foreground.png b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/foreground.png
new file mode 100644
index 0000000000..4483ddad1f
Binary files /dev/null and b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/foreground.png differ
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_doc.svg b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_doc.svg
new file mode 100644
index 0000000000..ab6b1fd763
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_doc.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M6.22 1.01Q5.35 1.01 4.61 1.45Q3.86 1.9 3.42 2.64Q2.98 3.38 2.98 4.25L2.98 19.75Q2.98 20.64 3.42 21.38Q3.86 22.13 4.61 22.56Q5.35 22.99 6.22 22.99L17.76 22.99Q18.65 22.99 19.39 22.56Q20.14 22.13 20.58 21.38Q21.02 20.64 21.02 19.75L21.02 7.25L14.76 1.01L6.22 1.01ZM15.48 7.25Q15.17 7.25 14.95 7.02Q14.74 6.79 14.74 6.48L14.74 3.1L18.89 7.25L15.48 7.25ZM6.22 21.5Q5.5 21.5 4.98 20.99Q4.46 20.47 4.46 19.75L4.46 4.25Q4.46 3.53 4.98 3.01Q5.5 2.5 6.22 2.5L13.22 2.5L13.22 6.48Q13.22 7.1 13.52 7.62Q13.82 8.14 14.34 8.44Q14.86 8.74 15.48 8.74L19.51 8.74L19.51 19.75Q19.51 20.47 19 20.99Q18.48 21.5 17.76 21.5L6.22 21.5Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_mic.svg b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_mic.svg
new file mode 100644
index 0000000000..0aeb30d63b
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/icon_mic.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="no"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M12 3.46Q13.06 3.46 13.78 4.18Q14.5 4.9 14.5 5.95L14.5 11.21Q14.5 12.24 13.78 12.97Q13.06 13.7 12 13.7Q10.97 13.7 10.24 12.97Q9.5 12.24 9.5 11.21L9.5 5.95Q9.5 4.9 10.24 4.18Q10.97 3.46 12 3.46ZM12 1.94Q10.92 1.94 10 2.48Q9.07 3.02 8.53 3.95Q7.99 4.87 7.99 5.95L7.99 11.21Q7.99 12.29 8.53 13.21Q9.07 14.14 10 14.68Q10.92 15.22 12 15.22Q13.08 15.22 14 14.68Q14.93 14.14 15.47 13.21Q16.01 12.29 16.01 11.21L16.01 5.95Q16.01 4.87 15.47 3.95Q14.93 3.02 14 2.48Q13.08 1.94 12 1.94ZM19.51 11.23Q19.51 10.92 19.28 10.69Q19.06 10.46 18.74 10.46Q18.43 10.46 18.22 10.69Q18 10.92 18 11.23Q18 12.84 17.2 14.22Q16.39 15.6 15.01 16.4Q13.63 17.21 12 17.21Q10.37 17.21 8.99 16.4Q7.61 15.6 6.8 14.22Q6 12.84 6 11.23Q6 10.92 5.78 10.69Q5.57 10.46 5.26 10.46Q4.94 10.46 4.73 10.69Q4.51 10.92 4.51 11.23Q4.51 13.13 5.4 14.76Q6.29 16.39 7.84 17.44Q9.38 18.48 11.26 18.67L11.26 21.29Q11.26 21.6 11.47 21.82Q11.69 22.03 12 22.03Q12.31 22.03 12.53 21.82Q12.74 21.6 12.74 21.29L12.74 18.67Q14.62 18.48 16.16 17.44Q17.71 16.39 18.61 14.76Q19.51 13.13 19.51 11.23Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/info.svg b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/info.svg
new file mode 100644
index 0000000000..2210223f4f
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/info.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/layered_image.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/layered_image.json
new file mode 100644
index 0000000000..fb49920440
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/layered_image.json
@@ -0,0 +1,7 @@
+{
+  "layered-image":
+  {
+    "background" : "$media:background",
+    "foreground" : "$media:foreground"
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/startIcon.png b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/startIcon.png
new file mode 100644
index 0000000000..205ad8b5a8
Binary files /dev/null and b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/startIcon.png differ
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/backup_config.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/backup_config.json
new file mode 100644
index 0000000000..78f40ae7c4
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/backup_config.json
@@ -0,0 +1,3 @@
+{
+  "allowToBackupRestore": true
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/main_pages.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/main_pages.json
new file mode 100644
index 0000000000..1898d94f58
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/main_pages.json
@@ -0,0 +1,5 @@
+{
+  "src": [
+    "pages/Index"
+  ]
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json
new file mode 100644
index 0000000000..a47ca31cc5
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "On-device VAD+ASR with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "On-device VAD+ASR with Next-gen Kaldi"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "On-device speech recognition"
+    },
+    {
+      "name": "mic_reason",
+      "value": "access the microphone for on-device speech recognition with Next-gen Kaldi"
+    }
+  ]
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile/.gitkeep b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json
new file mode 100644
index 0000000000..00384ae7f3
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json
@@ -0,0 +1,20 @@
+{
+  "string": [
+    {
+      "name": "module_desc",
+      "value": "基于新一代Kaldi的本地语音识别"
+    },
+    {
+      "name": "EntryAbility_desc",
+      "value": "基于新一代Kaldi的本地语音识别"
+    },
+    {
+      "name": "EntryAbility_label",
+      "value": "本地语音识别"
+    },
+    {
+      "name": "mic_reason",
+      "value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/Ability.test.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/Ability.test.ets
new file mode 100644
index 0000000000..8aa3749775
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/Ability.test.ets
@@ -0,0 +1,35 @@
+import hilog from '@ohos.hilog';
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function abilityTest() {
+  describe('ActsAbilityTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    })
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    })
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    })
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    })
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    })
+  })
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/List.test.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/List.test.ets
new file mode 100644
index 0000000000..794c7dc4ed
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/List.test.ets
@@ -0,0 +1,5 @@
+import abilityTest from './Ability.test';
+
+export default function testsuite() {
+  abilityTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/module.json5 b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/module.json5
new file mode 100644
index 0000000000..55725a9299
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/module.json5
@@ -0,0 +1,13 @@
+{
+  "module": {
+    "name": "entry_test",
+    "type": "feature",
+    "deviceTypes": [
+      "phone",
+      "tablet",
+      "2in1"
+    ],
+    "deliveryWithInstall": true,
+    "installationFree": false
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/test/List.test.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/test/List.test.ets
new file mode 100644
index 0000000000..bb5b5c3731
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/test/List.test.ets
@@ -0,0 +1,5 @@
+import localUnitTest from './LocalUnit.test';
+
+export default function testsuite() {
+  localUnitTest();
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/test/LocalUnit.test.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/test/LocalUnit.test.ets
new file mode 100644
index 0000000000..165fc1615e
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/entry/src/test/LocalUnit.test.ets
@@ -0,0 +1,33 @@
+import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
+
+export default function localUnitTest() {
+  describe('localUnitTest', () => {
+    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
+    beforeAll(() => {
+      // Presets an action, which is performed only once before all test cases of the test suite start.
+      // This API supports only one parameter: preset action function.
+    });
+    beforeEach(() => {
+      // Presets an action, which is performed before each unit test case starts.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: preset action function.
+    });
+    afterEach(() => {
+      // Presets a clear action, which is performed after each unit test case ends.
+      // The number of execution times is the same as the number of test cases defined by **it**.
+      // This API supports only one parameter: clear action function.
+    });
+    afterAll(() => {
+      // Presets a clear action, which is performed after all test cases of the test suite end.
+      // This API supports only one parameter: clear action function.
+    });
+    it('assertContain', 0, () => {
+      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
+      let a = 'abc';
+      let b = 'b';
+      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
+      expect(a).assertContain(b);
+      expect(a).assertEqual(a);
+    });
+  });
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/hvigor/hvigor-config.json5 b/harmony-os/SherpaOnnxVadAsr/hvigor/hvigor-config.json5
new file mode 100644
index 0000000000..06b2783670
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/hvigor/hvigor-config.json5
@@ -0,0 +1,22 @@
+{
+  "modelVersion": "5.0.0",
+  "dependencies": {
+  },
+  "execution": {
+    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
+    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
+    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
+    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
+    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
+  },
+  "logging": {
+    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
+  },
+  "debugging": {
+    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
+  },
+  "nodeOptions": {
+    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
+    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
+  }
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/hvigorfile.ts b/harmony-os/SherpaOnnxVadAsr/hvigorfile.ts
new file mode 100644
index 0000000000..f3cb9f1a87
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/hvigorfile.ts
@@ -0,0 +1,6 @@
+import { appTasks } from '@ohos/hvigor-ohos-plugin';
+
+export default {
+    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
+    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
+}
diff --git a/harmony-os/SherpaOnnxVadAsr/oh-package-lock.json5 b/harmony-os/SherpaOnnxVadAsr/oh-package-lock.json5
new file mode 100644
index 0000000000..f538ae290f
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/oh-package-lock.json5
@@ -0,0 +1,19 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
+  },
+  "packages": {
+    "@ohos/hypium@1.0.19": {
+      "name": "@ohos/hypium",
+      "version": "1.0.19",
+      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
+      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
+      "registryType": "ohpm"
+    }
+  }
+}
\ No newline at end of file
diff --git a/harmony-os/SherpaOnnxVadAsr/oh-package.json5 b/harmony-os/SherpaOnnxVadAsr/oh-package.json5
new file mode 100644
index 0000000000..a79d5300e5
--- /dev/null
+++ b/harmony-os/SherpaOnnxVadAsr/oh-package.json5
@@ -0,0 +1,9 @@
+{
+  "modelVersion": "5.0.0",
+  "description": "Please describe the basic information.",
+  "dependencies": {
+  },
+  "devDependencies": {
+    "@ohos/hypium": "1.0.19"
+  }
+}
diff --git a/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift b/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift
index 6db7dabc05..b4a5f96884 100644
--- a/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift
+++ b/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift
@@ -5,8 +5,8 @@
 //  Created by knight on 2023/4/5.
 //
 
-import Foundation
 import AVFoundation
+import Foundation
 
 enum Status {
     case stop
@@ -22,6 +22,7 @@ class SherpaOnnxViewModel: ObservableObject {
 
     var audioEngine: AVAudioEngine? = nil
     var recognizer: SherpaOnnxRecognizer! = nil
+    private var audioSession: AVAudioSession!
 
     var lastSentence: String = ""
     let maxSentence: Int = 20
@@ -36,11 +37,16 @@ class SherpaOnnxViewModel: ObservableObject {
 
         let start = max(sentences.count - maxSentence, 0)
         if lastSentence.isEmpty {
-            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
-                .joined(separator: "\n")
+            return sentences.enumerated().map { (index, s) in
+                "\(index): \(s.lowercased())"
+            }[start...]
+            .joined(separator: "\n")
         } else {
-            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
-                .joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
+            return sentences.enumerated().map { (index, s) in
+                "\(index): \(s.lowercased())"
+            }[start...]
+            .joined(separator: "\n")
+                + "\n\(sentences.count): \(lastSentence.lowercased())"
         }
     }
 
@@ -48,8 +54,20 @@ class SherpaOnnxViewModel: ObservableObject {
         self.subtitles = self.results
     }
 
+    func setupAudioSession() {
+        audioSession = AVAudioSession.sharedInstance()
+        do {
+            try audioSession.setCategory(
+                .playAndRecord, mode: .default, options: [.defaultToSpeaker])
+            try audioSession.setActive(true)
+        } catch {
+            print("Failed to set up audio session: \(error)")
+        }
+    }
+
     init() {
         initRecognizer()
+        setupAudioSession()
         initRecorder()
     }
 
@@ -116,8 +134,8 @@ class SherpaOnnxViewModel: ObservableObject {
                 pcmFormat: outputFormat,
                 frameCapacity:
                     AVAudioFrameCount(outputFormat.sampleRate)
-                * buffer.frameLength
-                / AVAudioFrameCount(buffer.format.sampleRate))!
+                    * buffer.frameLength
+                    / AVAudioFrameCount(buffer.format.sampleRate))!
 
             var error: NSError?
             let _ = converter.convert(
@@ -129,7 +147,7 @@ class SherpaOnnxViewModel: ObservableObject {
             let array = convertedBuffer.array()
             if !array.isEmpty {
                 self.recognizer.acceptWaveform(samples: array)
-                while (self.recognizer.isReady()){
+                while self.recognizer.isReady() {
                     self.recognizer.decode()
                 }
                 let isEndpoint = self.recognizer.isEndpoint()
@@ -141,7 +159,7 @@ class SherpaOnnxViewModel: ObservableObject {
                     print(text)
                 }
 
-                if isEndpoint{
+                if isEndpoint {
                     if !text.isEmpty {
                         let tmp = self.lastSentence
                         self.lastSentence = ""
@@ -170,7 +188,8 @@ class SherpaOnnxViewModel: ObservableObject {
         do {
             try self.audioEngine?.start()
         } catch let error as NSError {
-            print("Got an error starting audioEngine: \(error.domain), \(error)")
+            print(
+                "Got an error starting audioEngine: \(error.domain), \(error)")
         }
         print("started")
     }
diff --git a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
index 45db6ec17e..b2000c3b50 100644
--- a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
+++ b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
@@ -127,10 +127,100 @@ func getTtsFor_zh_en_melo_tts() -> SherpaOnnxOfflineTtsWrapper {
   return SherpaOnnxOfflineTtsWrapper(config: &config)
 }
 
+func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper {
+  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+
+  let acousticModel = getResource("model-steps-3", "onnx")
+  let vocoder = getResource("hifigan_v2", "onnx")
+
+  let tokens = getResource("tokens", "txt")
+  let lexicon = getResource("lexicon", "txt")
+
+  let dictDir = resourceURL(to: "dict")
+
+  let numFst = getResource("number", "fst")
+  let dateFst = getResource("date", "fst")
+  let phoneFst = getResource("phone", "fst")
+  let ruleFsts = "\(dateFst),\(phoneFst),\(numFst)"
+
+  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
+    acousticModel: acousticModel,
+    vocoder: vocoder,
+    lexicon: lexicon,
+    tokens: tokens,
+    dictDir: dictDir
+  )
+
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha)
+  var config = sherpaOnnxOfflineTtsConfig(
+    model: modelConfig,
+    ruleFsts: ruleFsts
+  )
+
+  return SherpaOnnxOfflineTtsWrapper(config: &config)
+}
+
+func getTtsFor_kokoro_en_v0_19() -> SherpaOnnxOfflineTtsWrapper {
+  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
+
+  let model = getResource("model", "onnx")
+  let voices = getResource("voices", "bin")
+
+  // tokens.txt
+  let tokens = getResource("tokens", "txt")
+
+  // in this case, we don't need lexicon.txt
+  let dataDir = resourceURL(to: "espeak-ng-data")
+
+  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+    model: model, voices: voices, tokens: tokens, dataDir: dataDir)
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro)
+  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  return SherpaOnnxOfflineTtsWrapper(config: &config)
+}
+
+func getTtsFor_kokoro_multi_lang_v1_0() -> SherpaOnnxOfflineTtsWrapper {
+  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+
+  let model = getResource("model", "onnx")
+  let voices = getResource("voices", "bin")
+
+  // tokens.txt
+  let tokens = getResource("tokens", "txt")
+
+  let lexicon_en = getResource("lexicon-us-en", "txt")
+  let lexicon_zh = getResource("lexicon-zh", "txt")
+  let lexicon = "\(lexicon_en),\(lexicon_zh)"
+
+  // in this case, we don't need lexicon.txt
+  let dataDir = resourceURL(to: "espeak-ng-data")
+  let dictDir = resourceURL(to: "dict")
+
+  let numFst = getResource("number-zh", "fst")
+  let dateFst = getResource("date-zh", "fst")
+  let phoneFst = getResource("phone-zh", "fst")
+  let ruleFsts = "\(dateFst),\(phoneFst),\(numFst)"
+
+  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+    model: model, voices: voices, tokens: tokens, dataDir: dataDir,
+    dictDir: dictDir, lexicon: lexicon)
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro)
+  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  return SherpaOnnxOfflineTtsWrapper(config: &config)
+}
+
 func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
   // Please enable only one of them
 
-  return getTtsFor_en_US_amy_low()
+  return getTtsFor_kokoro_multi_lang_v1_0()
+
+  // return getTtsFor_kokoro_en_v0_19()
+
+  // return getTtsFor_matcha_icefall_zh_baker()
+
+  // return getTtsFor_en_US_amy_low()
 
   // return getTtsForVCTK()
 
diff --git a/java-api-examples/.gitignore b/java-api-examples/.gitignore
index 91c35d7ae1..93941e6afe 100644
--- a/java-api-examples/.gitignore
+++ b/java-api-examples/.gitignore
@@ -1,3 +1,4 @@
 lib
 hs_err*
 !run-*.sh
+./hotwords_cn.txt
diff --git a/java-api-examples/KeywordSpotterFromFile.java b/java-api-examples/KeywordSpotterFromFile.java
index 1b7a739a29..9634800a19 100644
--- a/java-api-examples/KeywordSpotterFromFile.java
+++ b/java-api-examples/KeywordSpotterFromFile.java
@@ -56,6 +56,8 @@ public static void main(String[] args) {
 
       String keyword = kws.getResult(stream).getKeyword();
       if (!keyword.isEmpty()) {
+        // Remember to reset the stream right after detecting a keyword
+        kws.reset(stream);
         System.out.printf("Detected keyword: %s\n", keyword);
       }
     }
diff --git a/java-api-examples/NonStreamingDecodeFileMoonshine.java b/java-api-examples/NonStreamingDecodeFileMoonshine.java
new file mode 100644
index 0000000000..232ceb69d7
--- /dev/null
+++ b/java-api-examples/NonStreamingDecodeFileMoonshine.java
@@ -0,0 +1,60 @@
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use an offline Moonshine,
+// i.e., non-streaming Moonshine model,
+// to decode files.
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingDecodeFileMoonshine {
+  public static void main(String[] args) {
+    // please refer to
+    // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
+    // to download model files
+
+    String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+    String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+    String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+    String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+
+    String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+    String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
+
+    WaveReader reader = new WaveReader(waveFilename);
+
+    OfflineMoonshineModelConfig moonshine =
+        OfflineMoonshineModelConfig.builder()
+            .setPreprocessor(preprocessor)
+            .setEncoder(encoder)
+            .setUncachedDecoder(uncachedDecoder)
+            .setCachedDecoder(cachedDecoder)
+            .build();
+
+    OfflineModelConfig modelConfig =
+        OfflineModelConfig.builder()
+            .setMoonshine(moonshine)
+            .setTokens(tokens)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineRecognizerConfig config =
+        OfflineRecognizerConfig.builder()
+            .setOfflineModelConfig(modelConfig)
+            .setDecodingMethod("greedy_search")
+            .build();
+
+    OfflineRecognizer recognizer = new OfflineRecognizer(config);
+    OfflineStream stream = recognizer.createStream();
+    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
+
+    recognizer.decode(stream);
+
+    String text = recognizer.getResult(stream).getText();
+
+    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
+
+    stream.release();
+    recognizer.release();
+  }
+}
diff --git a/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java b/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java
new file mode 100644
index 0000000000..c2996652f0
--- /dev/null
+++ b/java-api-examples/NonStreamingDecodeFileTransducerHotwords.java
@@ -0,0 +1,62 @@
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use an offline transducer, i.e., non-streaming transducer,
+// to decode files with hotwords support.
+//
+// See also
+// https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingDecodeFileTransducerHotwords {
+  public static void main(String[] args) {
+    // please refer to
+    // https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
+    // to download model files
+    String encoder =
+        "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/encoder-epoch-99-avg-1.int8.onnx";
+    String decoder = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/decoder-epoch-99-avg-1.onnx";
+    String joiner = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/joiner-epoch-99-avg-1.onnx";
+    String tokens = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt";
+
+    String waveFilename = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/test_wavs/6.wav";
+
+    WaveReader reader = new WaveReader(waveFilename);
+
+    OfflineTransducerModelConfig transducer =
+        OfflineTransducerModelConfig.builder()
+            .setEncoder(encoder)
+            .setDecoder(decoder)
+            .setJoiner(joiner)
+            .build();
+
+    OfflineModelConfig modelConfig =
+        OfflineModelConfig.builder()
+            .setTransducer(transducer)
+            .setTokens(tokens)
+            .setNumThreads(1)
+            .setDebug(true)
+            .setModelingUnit("cjkchar")
+            .build();
+
+    OfflineRecognizerConfig config =
+        OfflineRecognizerConfig.builder()
+            .setOfflineModelConfig(modelConfig)
+            .setDecodingMethod("modified_beam_search")
+            .setHotwordsFile("./hotwords_cn.txt")
+            .setHotwordsScore(2.0f)
+            .build();
+
+    OfflineRecognizer recognizer = new OfflineRecognizer(config);
+    OfflineStream stream = recognizer.createStream();
+    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
+
+    recognizer.decode(stream);
+
+    String text = recognizer.getResult(stream).getText();
+
+    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
+
+    stream.release();
+    recognizer.release();
+  }
+}
diff --git a/java-api-examples/NonStreamingTtsKokoroEn.java b/java-api-examples/NonStreamingTtsKokoroEn.java
new file mode 100644
index 0000000000..e36ba663c5
--- /dev/null
+++ b/java-api-examples/NonStreamingTtsKokoroEn.java
@@ -0,0 +1,60 @@
+// Copyright 2025 Xiaomi Corporation
+
+// This file shows how to use a Kokoro English model
+// to convert text to speech
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingTtsKokoroEn {
+  public static void main(String[] args) {
+    // please visit
+    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+    // to download model files
+    String model = "./kokoro-en-v0_19/model.onnx";
+    String voices = "./kokoro-en-v0_19/voices.bin";
+    String tokens = "./kokoro-en-v0_19/tokens.txt";
+    String dataDir = "./kokoro-en-v0_19/espeak-ng-data";
+    String text =
+        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
+            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
+            + " businessman, an official, or a scholar.";
+
+    OfflineTtsKokoroModelConfig kokoroModelConfig =
+        OfflineTtsKokoroModelConfig.builder()
+            .setModel(model)
+            .setVoices(voices)
+            .setTokens(tokens)
+            .setDataDir(dataDir)
+            .build();
+
+    OfflineTtsModelConfig modelConfig =
+        OfflineTtsModelConfig.builder()
+            .setKokoro(kokoroModelConfig)
+            .setNumThreads(2)
+            .setDebug(true)
+            .build();
+
+    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
+    OfflineTts tts = new OfflineTts(config);
+
+    int sid = 0;
+    float speed = 1.0f;
+    long start = System.currentTimeMillis();
+    GeneratedAudio audio = tts.generate(text, sid, speed);
+    long stop = System.currentTimeMillis();
+
+    float timeElapsedSeconds = (stop - start) / 1000.0f;
+
+    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
+    float real_time_factor = timeElapsedSeconds / audioDuration;
+
+    String waveFilename = "tts-kokoro-en.wav";
+    audio.save(waveFilename);
+    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
+    System.out.printf("-- text: %s\n", text);
+    System.out.printf("-- Saved to %s\n", waveFilename);
+
+    tts.release();
+  }
+}
diff --git a/java-api-examples/NonStreamingTtsKokoroZhEn.java b/java-api-examples/NonStreamingTtsKokoroZhEn.java
new file mode 100644
index 0000000000..b0282fce02
--- /dev/null
+++ b/java-api-examples/NonStreamingTtsKokoroZhEn.java
@@ -0,0 +1,64 @@
+// Copyright 2025 Xiaomi Corporation
+
+// This file shows how to use a Kokoro multi-lingual model
+// to convert Chinese and English text to speech
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingTtsKokoroZhEn {
+  public static void main(String[] args) {
+    // please visit
+    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+    // to download model files
+    String model = "./kokoro-multi-lang-v1_0/model.onnx";
+    String voices = "./kokoro-multi-lang-v1_0/voices.bin";
+    String tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
+    String dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
+    String dictDir = "./kokoro-multi-lang-v1_0/dict";
+    String lexicon =
+        "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt";
+    String text =
+        "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki."
+            + " 你觉得中英文说的如何呢？";
+
+    OfflineTtsKokoroModelConfig kokoroModelConfig =
+        OfflineTtsKokoroModelConfig.builder()
+            .setModel(model)
+            .setVoices(voices)
+            .setTokens(tokens)
+            .setDataDir(dataDir)
+            .setDictDir(dictDir)
+            .setLexicon(lexicon)
+            .build();
+
+    OfflineTtsModelConfig modelConfig =
+        OfflineTtsModelConfig.builder()
+            .setKokoro(kokoroModelConfig)
+            .setNumThreads(2)
+            .setDebug(true)
+            .build();
+
+    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
+    OfflineTts tts = new OfflineTts(config);
+
+    int sid = 0; // this model has 53 speakers. You can use sid in the range 0-52
+    float speed = 1.0f;
+    long start = System.currentTimeMillis();
+    GeneratedAudio audio = tts.generate(text, sid, speed);
+    long stop = System.currentTimeMillis();
+
+    float timeElapsedSeconds = (stop - start) / 1000.0f;
+
+    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
+    float real_time_factor = timeElapsedSeconds / audioDuration;
+
+    String waveFilename = "tts-kokoro-zh-en.wav";
+    audio.save(waveFilename);
+    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
+    System.out.printf("-- text: %s\n", text);
+    System.out.printf("-- Saved to %s\n", waveFilename);
+
+    tts.release();
+  }
+}
diff --git a/java-api-examples/NonStreamingTtsMatchaEn.java b/java-api-examples/NonStreamingTtsMatchaEn.java
new file mode 100644
index 0000000000..bda41f0614
--- /dev/null
+++ b/java-api-examples/NonStreamingTtsMatchaEn.java
@@ -0,0 +1,60 @@
+// Copyright 2025 Xiaomi Corporation
+
+// This file shows how to use a matcha English model
+// to convert text to speech
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingTtsMatchaEn {
+  public static void main(String[] args) {
+    // please visit
+    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+    // to download model files
+    String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
+    String vocoder = "./hifigan_v2.onnx";
+    String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
+    String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
+    String text =
+        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
+            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
+            + " businessman, an official, or a scholar.";
+
+    OfflineTtsMatchaModelConfig matchaModelConfig =
+        OfflineTtsMatchaModelConfig.builder()
+            .setAcousticModel(acousticModel)
+            .setVocoder(vocoder)
+            .setTokens(tokens)
+            .setDataDir(dataDir)
+            .build();
+
+    OfflineTtsModelConfig modelConfig =
+        OfflineTtsModelConfig.builder()
+            .setMatcha(matchaModelConfig)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
+    OfflineTts tts = new OfflineTts(config);
+
+    int sid = 0;
+    float speed = 1.0f;
+    long start = System.currentTimeMillis();
+    GeneratedAudio audio = tts.generate(text, sid, speed);
+    long stop = System.currentTimeMillis();
+
+    float timeElapsedSeconds = (stop - start) / 1000.0f;
+
+    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
+    float real_time_factor = timeElapsedSeconds / audioDuration;
+
+    String waveFilename = "tts-matcha-en.wav";
+    audio.save(waveFilename);
+    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
+    System.out.printf("-- text: %s\n", text);
+    System.out.printf("-- Saved to %s\n", waveFilename);
+
+    tts.release();
+  }
+}
diff --git a/java-api-examples/NonStreamingTtsMatchaZh.java b/java-api-examples/NonStreamingTtsMatchaZh.java
new file mode 100644
index 0000000000..dec24dbb3d
--- /dev/null
+++ b/java-api-examples/NonStreamingTtsMatchaZh.java
@@ -0,0 +1,66 @@
+// Copyright 2025 Xiaomi Corporation
+
+// This file shows how to use a matcha Chinese TTS model
+// to convert text to speech
+import com.k2fsa.sherpa.onnx.*;
+
+public class NonStreamingTtsMatchaZh {
+  public static void main(String[] args) {
+    // please visit
+    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+    // to download model files
+    String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx";
+    String vocoder = "./hifigan_v2.onnx";
+    String tokens = "./matcha-icefall-zh-baker/tokens.txt";
+    String lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
+    String dictDir = "./matcha-icefall-zh-baker/dict";
+    String ruleFsts =
+        "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
+    String text =
+        "某某银行的副行长和一些行政领导表示，他们去过长江"
+            + "和长白山; 经济不断增长。"
+            + "2024年12月31号，拨打110或者18920240511。"
+            + "123456块钱。";
+
+    OfflineTtsMatchaModelConfig matchaModelConfig =
+        OfflineTtsMatchaModelConfig.builder()
+            .setAcousticModel(acousticModel)
+            .setVocoder(vocoder)
+            .setTokens(tokens)
+            .setLexicon(lexicon)
+            .setDictDir(dictDir)
+            .build();
+
+    OfflineTtsModelConfig modelConfig =
+        OfflineTtsModelConfig.builder()
+            .setMatcha(matchaModelConfig)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineTtsConfig config =
+        OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();
+    OfflineTts tts = new OfflineTts(config);
+
+    int sid = 0;
+    float speed = 1.0f;
+    long start = System.currentTimeMillis();
+    GeneratedAudio audio = tts.generate(text, sid, speed);
+    long stop = System.currentTimeMillis();
+
+    float timeElapsedSeconds = (stop - start) / 1000.0f;
+
+    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
+    float real_time_factor = timeElapsedSeconds / audioDuration;
+
+    String waveFilename = "tts-matcha-zh.wav";
+    audio.save(waveFilename);
+    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
+    System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
+    System.out.printf("-- text: %s\n", text);
+    System.out.printf("-- Saved to %s\n", waveFilename);
+
+    tts.release();
+  }
+}
diff --git a/java-api-examples/OfflineSpeakerDiarizationDemo.java b/java-api-examples/OfflineSpeakerDiarizationDemo.java
new file mode 100644
index 0000000000..a5ef8d1f4f
--- /dev/null
+++ b/java-api-examples/OfflineSpeakerDiarizationDemo.java
@@ -0,0 +1,99 @@
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use sherpa-onnx Java API for speaker diarization,
+import com.k2fsa.sherpa.onnx.*;
+
+public class OfflineSpeakerDiarizationDemo {
+  public static void main(String[] args) {
+    /* Please use the following commands to download files used in this file
+    Step 1: Download a speaker segmentation model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+      rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+    Step 2: Download a speaker embedding extractor model
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+    for a list of available models. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+    Step 3. Download test wave files
+
+    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+    for a list of available test wave files. The following is an example
+
+      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+    Step 4. Run it
+        */
+
+    String segmentationModel = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
+    String embeddingModel = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
+    String waveFilename = "./0-four-speakers-zh.wav";
+
+    WaveReader reader = new WaveReader(waveFilename);
+
+    OfflineSpeakerSegmentationPyannoteModelConfig pyannote =
+        OfflineSpeakerSegmentationPyannoteModelConfig.builder().setModel(segmentationModel).build();
+
+    OfflineSpeakerSegmentationModelConfig segmentation =
+        OfflineSpeakerSegmentationModelConfig.builder()
+            .setPyannote(pyannote)
+            .setDebug(true)
+            .build();
+
+    SpeakerEmbeddingExtractorConfig embedding =
+        SpeakerEmbeddingExtractorConfig.builder().setModel(embeddingModel).setDebug(true).build();
+
+    // The test wave file ./0-four-speakers-zh.wav contains four speakers, so
+    // we use numClusters=4 here. If you don't know the number of speakers
+    // in the test wave file, please set the numClusters to -1 and provide
+    // threshold for clustering
+    FastClusteringConfig clustering =
+        FastClusteringConfig.builder()
+            .setNumClusters(4) // set it to -1 if you don't know the actual number
+            .setThreshold(0.5f)
+            .build();
+
+    OfflineSpeakerDiarizationConfig config =
+        OfflineSpeakerDiarizationConfig.builder()
+            .setSegmentation(segmentation)
+            .setEmbedding(embedding)
+            .setClustering(clustering)
+            .setMinDurationOn(0.2f)
+            .setMinDurationOff(0.5f)
+            .build();
+
+    OfflineSpeakerDiarization sd = new OfflineSpeakerDiarization(config);
+    if (sd.getSampleRate() != reader.getSampleRate()) {
+      System.out.printf(
+          "Expected sample rate: %d, given: %d\n", sd.getSampleRate(), reader.getSampleRate());
+      return;
+    }
+
+    // OfflineSpeakerDiarizationSegment[] segments = sd.process(reader.getSamples());
+    // without callback is also ok
+
+    // or you can use a callback to show the progress
+    OfflineSpeakerDiarizationSegment[] segments =
+        sd.processWithCallback(
+            reader.getSamples(),
+            (int numProcessedChunks, int numTotalChunks, long arg) -> {
+              float progress = 100.0f * numProcessedChunks / numTotalChunks;
+              System.out.printf("Progress: %.2f%%\n", progress);
+
+              return 0;
+            });
+
+    for (OfflineSpeakerDiarizationSegment s : segments) {
+      System.out.printf("%.3f -- %.3f speaker_%02d\n", s.getStart(), s.getEnd(), s.getSpeaker());
+    }
+
+    sd.release();
+  }
+}
diff --git a/java-api-examples/README.md b/java-api-examples/README.md
index 697f0c8763..779c1b254c 100755
--- a/java-api-examples/README.md
+++ b/java-api-examples/README.md
@@ -4,6 +4,12 @@ This directory contains examples for the JAVA API of sherpa-onnx.
 
 # Usage
 
+## Non-streaming speaker diarization
+
+```bash
+./run-offline-speaker-diarization.sh
+```
+
 ## Streaming Speech recognition
 
 ```
diff --git a/java-api-examples/VadFromMicWithNonStreamingMoonshine.java b/java-api-examples/VadFromMicWithNonStreamingMoonshine.java
new file mode 100644
index 0000000000..bfedf3fbc6
--- /dev/null
+++ b/java-api-examples/VadFromMicWithNonStreamingMoonshine.java
@@ -0,0 +1,152 @@
+// Copyright 2024 Xiaomi Corporation
+
+// This file shows how to use a silero_vad model with a non-streaming
+// Moonshine tiny for speech recognition.
+
+import com.k2fsa.sherpa.onnx.*;
+import javax.sound.sampled.*;
+
+public class VadFromMicNonStreamingMoonshine {
+  private static final int sampleRate = 16000;
+  private static final int windowSize = 512;
+
+  public static Vad createVad() {
+    // please download ./silero_vad.onnx from
+    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+    String model = "./silero_vad.onnx";
+    SileroVadModelConfig sileroVad =
+        SileroVadModelConfig.builder()
+            .setModel(model)
+            .setThreshold(0.5f)
+            .setMinSilenceDuration(0.25f)
+            .setMinSpeechDuration(0.5f)
+            .setWindowSize(windowSize)
+            .build();
+
+    VadModelConfig config =
+        VadModelConfig.builder()
+            .setSileroVadModelConfig(sileroVad)
+            .setSampleRate(sampleRate)
+            .setNumThreads(1)
+            .setDebug(true)
+            .setProvider("cpu")
+            .build();
+
+    return new Vad(config);
+  }
+
+  public static OfflineRecognizer createOfflineRecognizer() {
+    // please refer to
+    // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
+    // to download model files
+
+    String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
+    String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
+    String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
+    String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
+
+    String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";
+
+    OfflineMoonshineModelConfig moonshine =
+        OfflineMoonshineModelConfig.builder()
+            .setPreprocessor(preprocessor)
+            .setEncoder(encoder)
+            .setUncachedDecoder(uncachedDecoder)
+            .setCachedDecoder(cachedDecoder)
+            .build();
+
+    OfflineModelConfig modelConfig =
+        OfflineModelConfig.builder()
+            .setMoonshine(moonshine)
+            .setTokens(tokens)
+            .setNumThreads(1)
+            .setDebug(true)
+            .build();
+
+    OfflineRecognizerConfig config =
+        OfflineRecognizerConfig.builder()
+            .setOfflineModelConfig(modelConfig)
+            .setDecodingMethod("greedy_search")
+            .build();
+
+    return new OfflineRecognizer(config);
+  }
+
+  public static void main(String[] args) {
+    Vad vad = createVad();
+    OfflineRecognizer recognizer = createOfflineRecognizer();
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
+    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
+    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
+
+    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
+    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
+    TargetDataLine targetDataLine;
+    try {
+      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
+      targetDataLine.open(format);
+      targetDataLine.start();
+    } catch (LineUnavailableException e) {
+      System.out.println("Failed to open target data line: " + e.getMessage());
+      vad.release();
+      recognizer.release();
+      return;
+    }
+
+    boolean printed = false;
+    byte[] buffer = new byte[windowSize * 2];
+    float[] samples = new float[windowSize];
+
+    System.out.println("Started. Please speak");
+    boolean running = true;
+    while (targetDataLine.isOpen() && running) {
+      int n = targetDataLine.read(buffer, 0, buffer.length);
+      if (n <= 0) {
+        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
+        continue;
+      }
+      for (int i = 0; i != windowSize; ++i) {
+        short low = buffer[2 * i];
+        short high = buffer[2 * i + 1];
+        int s = (high << 8) + low;
+        samples[i] = (float) s / 32768;
+      }
+
+      vad.acceptWaveform(samples);
+      if (vad.isSpeechDetected() && !printed) {
+        System.out.println("Detected speech");
+        printed = true;
+      }
+
+      if (!vad.isSpeechDetected()) {
+        printed = false;
+      }
+
+      while (!vad.empty()) {
+        SpeechSegment segment = vad.front();
+        float startTime = segment.getStart() / (float) sampleRate;
+        float duration = segment.getSamples().length / (float) sampleRate;
+
+        OfflineStream stream = recognizer.createStream();
+        stream.acceptWaveform(segment.getSamples(), sampleRate);
+        recognizer.decode(stream);
+        String text = recognizer.getResult(stream).getText();
+        stream.release();
+
+        if (!text.isEmpty()) {
+          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
+        }
+
+        if (text.contains("exit the program")) {
+          running = false;
+        }
+
+        vad.pop();
+      }
+    }
+
+    vad.release();
+    recognizer.release();
+  }
+}
diff --git a/java-api-examples/run-non-streaming-decode-file-moonshine.sh b/java-api-examples/run-non-streaming-decode-file-moonshine.sh
new file mode 100755
index 0000000000..b0a09db47a
--- /dev/null
+++ b/java-api-examples/run-non-streaming-decode-file-moonshine.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingDecodeFileMoonshine.java
diff --git a/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh b/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh
new file mode 100755
index 0000000000..14c54848ca
--- /dev/null
+++ b/java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt ]; then
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
+  tar xvf sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
+  rm sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
+fi
+
+if [ ! -f hotwords_cn.txt ]; then
+  cat > hotwords_cn.txt <<EOF
+朱丽楠
+EOF
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingDecodeFileTransducerHotwords.java
diff --git a/java-api-examples/run-non-streaming-tts-kokoro-en.sh b/java-api-examples/run-non-streaming-tts-kokoro-en.sh
new file mode 100755
index 0000000000..cf8101cd88
--- /dev/null
+++ b/java-api-examples/run-non-streaming-tts-kokoro-en.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingTtsKokoroEn.java
diff --git a/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh b/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh
new file mode 100755
index 0000000000..a9da93a52a
--- /dev/null
+++ b/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingTtsKokoroZhEn.java
diff --git a/java-api-examples/run-non-streaming-tts-matcha-en.sh b/java-api-examples/run-non-streaming-tts-matcha-en.sh
new file mode 100755
index 0000000000..ba03beaf23
--- /dev/null
+++ b/java-api-examples/run-non-streaming-tts-matcha-en.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingTtsMatchaEn.java
diff --git a/java-api-examples/run-non-streaming-tts-matcha-zh.sh b/java-api-examples/run-non-streaming-tts-matcha-zh.sh
new file mode 100755
index 0000000000..a339e298ad
--- /dev/null
+++ b/java-api-examples/run-non-streaming-tts-matcha-zh.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  NonStreamingTtsMatchaZh.java
diff --git a/java-api-examples/run-offline-speaker-diarization.sh b/java-api-examples/run-offline-speaker-diarization.sh
new file mode 100755
index 0000000000..d5cd63b5ff
--- /dev/null
+++ b/java-api-examples/run-offline-speaker-diarization.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  ./OfflineSpeakerDiarizationDemo.java
diff --git a/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh b/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh
new file mode 100755
index 0000000000..78147e7fcc
--- /dev/null
+++ b/java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+  mkdir -p ../build
+  pushd ../build
+  cmake \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_ONNX_ENABLE_JNI=ON \
+    ..
+
+  make -j4
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+  pushd ../sherpa-onnx/java-api
+  make
+  popd
+fi
+
+if [ ! -f ./silero_vad.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+java \
+  -Djava.library.path=$PWD/../build/lib \
+  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+  ./VadFromMicWithNonStreamingMoonshine.java
diff --git a/jitpack.yml b/jitpack.yml
new file mode 100644
index 0000000000..f4a96b2b96
--- /dev/null
+++ b/jitpack.yml
@@ -0,0 +1,9 @@
+jdk:
+  - openjdk17
+
+before_install:
+  - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.10.42/sherpa-onnx-1.10.42.aar
+
+install:
+  - FILE="-Dfile=sherpa-onnx-1.10.42.aar"
+  - mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.10.42 -Dpackaging=aar -DgeneratePom=true
diff --git a/kotlin-api-examples/OfflineSpeakerDiarization.kt b/kotlin-api-examples/OfflineSpeakerDiarization.kt
new file mode 120000
index 0000000000..870612b4c6
--- /dev/null
+++ b/kotlin-api-examples/OfflineSpeakerDiarization.kt
@@ -0,0 +1 @@
+../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
\ No newline at end of file
diff --git a/kotlin-api-examples/SpeakerEmbeddingExtractorConfig.kt b/kotlin-api-examples/SpeakerEmbeddingExtractorConfig.kt
new file mode 120000
index 0000000000..57faadba0d
--- /dev/null
+++ b/kotlin-api-examples/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1 @@
+../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
\ No newline at end of file
diff --git a/kotlin-api-examples/Tts.kt b/kotlin-api-examples/Tts.kt
index a89bc93cb0..02becdb69f 120000
--- a/kotlin-api-examples/Tts.kt
+++ b/kotlin-api-examples/Tts.kt
@@ -1 +1 @@
-../android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
\ No newline at end of file
+../sherpa-onnx/kotlin-api/Tts.kt
\ No newline at end of file
diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh
index 23e86886e4..c815b6ad1c 100755
--- a/kotlin-api-examples/run.sh
+++ b/kotlin-api-examples/run.sh
@@ -48,6 +48,7 @@ function testSpeakerEmbeddingExtractor() {
     test_speaker_id.kt \
     OnlineStream.kt \
     Speaker.kt \
+    SpeakerEmbeddingExtractorConfig.kt \
     WaveReader.kt \
     faked-asset-manager.kt \
     faked-log.kt
@@ -104,6 +105,28 @@ function testTts() {
     rm vits-piper-en_US-amy-low.tar.bz2
   fi
 
+  if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+    tar xvf matcha-icefall-zh-baker.tar.bz2
+    rm matcha-icefall-zh-baker.tar.bz2
+  fi
+
+  if [ ! -f ./hifigan_v2.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+  fi
+
+  if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+    tar xf kokoro-multi-lang-v1_0.tar.bz2
+    rm kokoro-multi-lang-v1_0.tar.bz2
+  fi
+
+  if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+    tar xf kokoro-en-v0_19.tar.bz2
+    rm kokoro-en-v0_19.tar.bz2
+  fi
+
   out_filename=test_tts.jar
   kotlinc-jvm -include-runtime -d $out_filename \
     test_tts.kt \
@@ -167,6 +190,12 @@ function testSpokenLanguageIdentification() {
 }
 
 function testOfflineAsr() {
+  if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+    tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+    rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  fi
+
   if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
     tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
@@ -285,6 +314,38 @@ function testPunctuation() {
   java -Djava.library.path=../build/lib -jar $out_filename
 }
 
+function testOfflineSpeakerDiarization() {
+  if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+    tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+    rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  fi
+
+  if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+  fi
+
+  if [ ! -f ./0-four-speakers-zh.wav ]; then
+    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+  fi
+
+  out_filename=test_offline_speaker_diarization.jar
+  kotlinc-jvm -include-runtime -d $out_filename \
+    test_offline_speaker_diarization.kt \
+    OfflineSpeakerDiarization.kt \
+    Speaker.kt \
+    SpeakerEmbeddingExtractorConfig.kt \
+    OnlineStream.kt \
+    WaveReader.kt \
+    faked-asset-manager.kt \
+    faked-log.kt
+
+  ls -lh $out_filename
+
+  java -Djava.library.path=../build/lib -jar $out_filename
+}
+
+testOfflineSpeakerDiarization
 testSpeakerEmbeddingExtractor
 testOnlineAsr
 testTts
diff --git a/kotlin-api-examples/test_offline_asr.kt b/kotlin-api-examples/test_offline_asr.kt
index d094065639..6c2c10a6a9 100644
--- a/kotlin-api-examples/test_offline_asr.kt
+++ b/kotlin-api-examples/test_offline_asr.kt
@@ -1,7 +1,7 @@
 package com.k2fsa.sherpa.onnx
 
 fun main() {
-  val types = arrayOf(0, 2, 5, 6, 15)
+  val types = arrayOf(0, 2, 5, 6, 15, 21)
   for (type in types) {
     test(type)
   }
@@ -16,6 +16,7 @@ fun test(type: Int) {
     5 -> "./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav"
     6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav"
     15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
+    21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
     else -> null
   }
 
diff --git a/kotlin-api-examples/test_offline_speaker_diarization.kt b/kotlin-api-examples/test_offline_speaker_diarization.kt
new file mode 100644
index 0000000000..96c33f062e
--- /dev/null
+++ b/kotlin-api-examples/test_offline_speaker_diarization.kt
@@ -0,0 +1,53 @@
+package com.k2fsa.sherpa.onnx
+
+fun main() {
+  testOfflineSpeakerDiarization()
+}
+
+fun callback(numProcessedChunks: Int, numTotalChunks: Int, arg: Long): Int {
+  val progress = numProcessedChunks.toFloat() / numTotalChunks * 100
+  val s = "%.2f".format(progress)
+  println("Progress: ${s}%");
+
+  return 0
+}
+
+fun testOfflineSpeakerDiarization() {
+  var config = OfflineSpeakerDiarizationConfig(
+    segmentation=OfflineSpeakerSegmentationModelConfig(
+      pyannote=OfflineSpeakerSegmentationPyannoteModelConfig("./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"),
+    ),
+    embedding=SpeakerEmbeddingExtractorConfig(
+      model="./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx",
+    ),
+
+    // The test wave file ./0-four-speakers-zh.wav contains four speakers, so
+    // we use numClusters=4 here. If you don't know the number of speakers
+    // in the test wave file, please set the threshold like below.
+    //
+    // clustering=FastClusteringConfig(threshold=0.5),
+    //
+    // WARNING: You need to tune threshold by yourself.
+    // A larger threshold leads to fewer clusters, i.e., few speakers.
+    // A smaller threshold leads to more clusters, i.e., more speakers.
+    //
+    clustering=FastClusteringConfig(numClusters=4),
+  )
+
+  val sd = OfflineSpeakerDiarization(config=config)
+
+  val waveData = WaveReader.readWave(
+      filename = "./0-four-speakers-zh.wav",
+  )
+
+  if (sd.sampleRate() != waveData.sampleRate) {
+    println("Expected sample rate: ${sd.sampleRate()}, given: ${waveData.sampleRate}")
+    return
+  }
+
+  // val segments = sd.process(waveData.samples) // this one is also ok
+  val segments = sd.processWithCallback(waveData.samples, callback=::callback)
+  for (segment in segments) {
+    println("${segment.start} -- ${segment.end} speaker_${segment.speaker}")
+  }
+}
diff --git a/kotlin-api-examples/test_tts.kt b/kotlin-api-examples/test_tts.kt
index 8602a44615..c387dcf7aa 100644
--- a/kotlin-api-examples/test_tts.kt
+++ b/kotlin-api-examples/test_tts.kt
@@ -1,10 +1,81 @@
 package com.k2fsa.sherpa.onnx
 
 fun main() {
-  testTts()
+  testVits()
+  testMatcha()
+  testKokoroEn()
+  testKokoroZhEn()
 }
 
-fun testTts() {
+fun testKokoroZhEn() {
+  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  var config = OfflineTtsConfig(
+    model=OfflineTtsModelConfig(
+      kokoro=OfflineTtsKokoroModelConfig(
+        model="./kokoro-multi-lang-v1_0/model.onnx",
+        voices="./kokoro-multi-lang-v1_0/voices.bin",
+        tokens="./kokoro-multi-lang-v1_0/tokens.txt",
+        dataDir="./kokoro-multi-lang-v1_0/espeak-ng-data",
+        dictDir="./kokoro-multi-lang-v1_0/dict",
+        lexicon="./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt",
+      ),
+      numThreads=2,
+      debug=true,
+    ),
+  )
+  val tts = OfflineTts(config=config)
+  val audio = tts.generateWithCallback(text="中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？", callback=::callback)
+  audio.save(filename="test-kokoro-zh-en.wav")
+  tts.release()
+  println("Saved to test-kokoro-zh-en.wav")
+}
+
+fun testKokoroEn() {
+  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  var config = OfflineTtsConfig(
+    model=OfflineTtsModelConfig(
+      kokoro=OfflineTtsKokoroModelConfig(
+        model="./kokoro-en-v0_19/model.onnx",
+        voices="./kokoro-en-v0_19/voices.bin",
+        tokens="./kokoro-en-v0_19/tokens.txt",
+        dataDir="./kokoro-en-v0_19/espeak-ng-data",
+      ),
+      numThreads=2,
+      debug=true,
+    ),
+  )
+  val tts = OfflineTts(config=config)
+  val audio = tts.generateWithCallback(text="How are you doing today?", callback=::callback)
+  audio.save(filename="test-kokoro-en.wav")
+  tts.release()
+  println("Saved to test-kokoro-en.wav")
+}
+
+fun testMatcha() {
+  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  var config = OfflineTtsConfig(
+    model=OfflineTtsModelConfig(
+      matcha=OfflineTtsMatchaModelConfig(
+        acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx",
+        vocoder="./hifigan_v2.onnx",
+        tokens="./matcha-icefall-zh-baker/tokens.txt",
+        lexicon="./matcha-icefall-zh-baker/lexicon.txt",
+        dictDir="./matcha-icefall-zh-baker/dict",
+      ),
+      numThreads=1,
+      debug=true,
+    ),
+    ruleFsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
+  )
+  val tts = OfflineTts(config=config)
+  val audio = tts.generateWithCallback(text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。", callback=::callback)
+  audio.save(filename="test-matcha-zh.wav")
+  tts.release()
+  println("Saved to test-matcha-zh.wav")
+}
+
+fun testVits() {
   // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
   // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
   var config = OfflineTtsConfig(
diff --git a/lazarus-examples/generate_subtitles/my_init.pas b/lazarus-examples/generate_subtitles/my_init.pas
index d57448b6d9..d01cb60813 100644
--- a/lazarus-examples/generate_subtitles/my_init.pas
+++ b/lazarus-examples/generate_subtitles/my_init.pas
@@ -159,6 +159,30 @@ function CreateOfflineRecognizerWhisper(
   Result := TSherpaOnnxOfflineRecognizer.Create(Config);
 end;
 
+function CreateOfflineRecognizerMoonshine(
+  Tokens: AnsiString;
+  Preprocessor: AnsiString;
+  Encoder: AnsiString;
+  UncachedDecoder: AnsiString;
+  CachedDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+  Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+  Initialize(Config);
+
+  Config.ModelConfig.Moonshine.Preprocessor := Preprocessor;
+  Config.ModelConfig.Moonshine.Encoder := Encoder;
+  Config.ModelConfig.Moonshine.UncachedDecoder := UncachedDecoder;
+  Config.ModelConfig.Moonshine.CachedDecoder := CachedDecoder;
+
+  Config.ModelConfig.Tokens := Tokens;
+  Config.ModelConfig.Provider := 'cpu';
+  Config.ModelConfig.NumThreads := 2;
+  Config.ModelConfig.Debug := False;
+
+  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
 constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString);
 begin
   inherited Create(CreateSuspended);
@@ -193,6 +217,11 @@ procedure TMyInitThread.Execute;
   NeMoTransducerEncoder: AnsiString;
   NeMoTransducerDecoder: AnsiString;
   NeMoTransducerJoiner: AnsiString;
+
+  MoonshinePreprocessor: AnsiString;
+  MoonshineEncoder: AnsiString;
+  MoonshineUncachedDecoder: AnsiString;
+  MoonshineCachedDecoder: AnsiString;
 begin
     VadFilename := ModelDir + 'silero_vad.onnx';
     Tokens := ModelDir + 'tokens.txt';
@@ -292,6 +321,24 @@ procedure TMyInitThread.Execute;
     NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
     NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
 
+    {
+      Please Visit
+      https://k2-fsa.github.io/sherpa/onnx/moonshine/models.html
+      to download a Moonshine model.
+
+      Note that you have to rename model files after downloading. The following
+      is an example.
+
+      mv preprocess.onnx moonshine-preprocessor.onnx
+      mv encode.int8.onnx moonshine-encoder.onnx
+      mv uncached_decode.int8.onnx moonshine-uncached-decoder.onnx
+      mv cached_decode.int8.onnx moonshine-cached-decoder.onnx
+    }
+    MoonshinePreprocessor := ModelDir + 'moonshine-preprocessor.onnx';
+    MoonshineEncoder := ModelDir + 'moonshine-encoder.onnx';
+    MoonshineUncachedDecoder := ModelDir + 'moonshine-uncached-decoder.onnx';
+    MoonshineCachedDecoder := ModelDir + 'moonshine-cached-decoder.onnx';
+
     if not FileExists(VadFilename) then
       begin
         Status := VadFilename + ' does not exist! Please download it from' +
@@ -344,6 +391,13 @@ procedure TMyInitThread.Execute;
             NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
           Msg := 'NeMo transducer';
         end
+    else if FileExists(MoonshinePreprocessor) and FileExists(MoonshineEncoder) and FileExists(MoonshineUncachedDecoder) and FileExists(MoonshineCachedDecoder) then
+        begin
+          Form1.OfflineRecognizer := CreateOfflineRecognizerMoonshine(Tokens,
+            MoonshinePreprocessor, MoonshineEncoder, MoonshineUncachedDecoder,
+            MoonshineCachedDecoder);
+          Msg := 'Moonshine';
+        end
     else
       begin
         Status := 'Please download at least one non-streaming speech recognition model first.';
diff --git a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
index c559c93215..4629592473 100644
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
@@ -256,7 +256,7 @@ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
     }
     pa_stream_ = nullptr;
 
-    SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_);
+    const SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_);
 
     SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate,
                           samples_.data(), static_cast<int32_t>(samples_.size()));
diff --git a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
index 77a8992e95..19ab838803 100644
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
@@ -48,7 +48,7 @@ class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
  private:
   Microphone mic_;
 
-  SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
+  const SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
   SherpaOnnxOfflineRecognizerConfig config_;
 
   PaStream *pa_stream_ = nullptr;
diff --git a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc
index dace58ea1e..1a547bef45 100644
Binary files a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc and b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc differ
diff --git a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
index 0a34ef0fa2..98d98b5005 100644
--- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
+++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
@@ -419,7 +419,7 @@ bool Exists(const std::string &filename) {
 void CNonStreamingTextToSpeechDlg::InitHint() {
     AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0");
     AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0");
-    AppendLineToMultilineEditCtrl(my_hint_, "\r\n\r\nPlease input your text and click the button Generate");
+    AppendLineToMultilineEditCtrl(my_hint_, "\r\nPlease input your text and click the button Generate");
 
 }
 
@@ -430,7 +430,7 @@ void CNonStreamingTextToSpeechDlg::Init() {
     output_filename_.SetWindowText(Utf8ToUtf16("./generated.wav").c_str());
 
 	bool ok = true;
-    std::string error_message = "--------------------";
+  std::string error_message = "--------------------\r\n";
   if (!Exists("./model.onnx")) {
     error_message += "Cannot find ./model.onnx\r\n";
     ok = false;
@@ -447,17 +447,77 @@ void CNonStreamingTextToSpeechDlg::Init() {
     generate_btn_.EnableWindow(FALSE);
     error_message +=
         "\r\nPlease refer to\r\n"
-        "https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models";
+        "https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models"
+        "\r\nor\r\n"
+        "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models";
+
     error_message += "\r\nto download models.\r\n";
-    error_message += "\r\nWe give an example below\r\n\r\n";
-    error_message +=
-        "1. Download vits-piper-en_US-amy-low.tar.bz2 from the following URL\r\n\r\n"
-        "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2\r\n\r\n"
-        "2. Uncompress it and you will get a directory vits-piper-en_US-amy-low \r\n\r\n"
-        "3. Switch to the directory vits-piper-en_US-amy-low \r\n\r\n"
-        "4. Rename en_US-amy-low.onnx to model.onnx \r\n\r\n"
-        "5. Copy the current exe to the directory vits-piper-en_US-amy-low\r\n\r\n"
-        "6. Done! You can now run the exe in the directory vits-piper-en_US-amy-low\r\n\r\n";
+    error_message += "\r\nWe give several examples below\r\n";
+    error_message += "      1. Use a Kokoro TTS model (multi-lingual, e.g, English + Chinese)\r\n";
+    error_message += "      2. Use a Kokoro TTS model (English only)\r\n";
+    error_message += "      3. Use a VITS Piper TTS model\r\n";
+    error_message += "      4. Use a VITS Chinese TTS model\r\n";
+    error_message += "      5. Use a Matcha TTS model\r\n";
+    error_message += "\r\n";
+
+    error_message += 
+        "----------1. Use a Kokoro TTS model (multi-lingual, eg., English + Chinese)----------\r\n"
+        "(a) Download the model from \r\n"
+        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2\r\n"
+        "(b) Uncompress it and you will get a directory kokoro-multi-lang-v1_0\r\n"
+        "(c) Switch to the directory kokoro-multi-lang-v1_0\r\n"
+        "(d) Copy the current exe to the directory kokoro-multi-lang-v1_0\r\n"
+        "(e).Done! You can now run the exe in the directory kokoro-multi-lang-v1_0\r\n";
+
+    error_message +=  "\r\n";
+
+    error_message += 
+        "----------2. Use a Kokoro TTS model (English only)----------\r\n"
+        "(a) Download the model from \r\n"
+        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2\r\n"
+        "(b) Uncompress it and you will get a directory kokoro-en-v0_19\r\n"
+        "(c) Switch to the directory kokoro-en-v0_19\r\n"
+        "(d) Copy the current exe to the directory kokoro-en-v0_19\r\n"
+        "(e).Done! You can now run the exe in the directory kokoro-en-v0_19\r\n";
+
+    error_message +=  "\r\n";
+
+    error_message += 
+        "----------3. Use a VITS Piper TTS model----------\r\n"
+        "(a) Download the model from \r\n"
+        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2\r\n"
+        "(b) Uncompress it and you will get a directory vits-piper-en_US-amy-low\r\n"
+        "(c) Switch to the directory vits-piper-en_US-amy-low \r\n"
+        "(d) Rename en_US-amy-low.onnx to model.onnx\r\n"
+        "(e) Copy the current exe to the directory vits-piper-en_US-amy-low\r\n"
+        "(f) Done! You can now run the exe in the directory vits-piper-en_US-amy-low\r\n";
+
+    error_message +=  "\r\n";
+
+    error_message += 
+        "----------4. Use a VITS Chinese TTS model----------\r\n"
+        "(a) Download the model from \r\n"
+        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2\r\n"
+        "(b) Uncompress it and you will get a directory sherpa-onnx-vits-zh-ll\r\n"
+        "(c) Switch to the directory sherpa-onnx-vits-zh-ll\r\n"
+        "(d) Copy the current exe to the directory sherpa-onnx-vits-zh-ll\r\n"
+        "(e) Done! You can now run the exe in the directory sherpa-onnx-vits-zh-ll\r\n";
+
+    error_message +=  "\r\n";
+
+    error_message += 
+        "----------5. Use a Matcha TTS model----------\r\n"
+        "(a) Download the model from \r\n"
+        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2\r\n"
+        "(b) Uncompress it and you will get a directory matcha-icefall-zh-baker\r\n"
+        "(c) Switch to the directory matcha-icefall-zh-baker\r\n"
+        "(d) Rename model-steps-3.onnx to model.onnx\r\n"
+        "(e) Download a vocoder model from \r\n"
+        "      https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx\r\n"
+        "(f) Rename hifigan_v2.onnx to hifigan.onnx\r\n"
+        "(g) Remember to put hifigan.onnx in the directory matcha-icefall-zh-baker\r\n"
+        "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n"
+        "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n";
 
     AppendLineToMultilineEditCtrl(my_hint_, error_message);
     return;
@@ -467,29 +527,65 @@ void CNonStreamingTextToSpeechDlg::Init() {
   SherpaOnnxOfflineTtsConfig config;
   memset(&config, 0, sizeof(config));
   config.model.debug = 0;
-  config.model.num_threads = 2;
+  config.model.num_threads = 4;
   config.model.provider = "cpu";
-  config.model.vits.model = "./model.onnx";
-  if (Exists("./espeak-ng-data/phontab")) {
-    config.model.vits.data_dir = "./espeak-ng-data";
-  } else if (Exists("./lexicon.txt")) {
-    config.model.vits.lexicon = "./lexicon.txt";
-  }
 
-  if (Exists("./dict/jieba.dict.utf8")) {
-    config.model.vits.dict_dir = "./dict";
+  if (Exists("./voices.bin")) {
+    // it is a kokoro tts model
+    config.model.kokoro.model = "./model.onnx";
+    config.model.kokoro.voices = "./voices.bin";
+    config.model.kokoro.tokens = "./tokens.txt";
+    config.model.kokoro.data_dir = "./espeak-ng-data";
+    if (Exists("./dict/jieba.dict.utf8") && Exists("./lexicon-zh.txt")) {
+      config.model.kokoro.dict_dir = "./dict";
+      config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt";
+    }
+  } else if (Exists("./hifigan.onnx")) {
+    // it is a matcha tts model
+    config.model.matcha.acoustic_model = "./model.onnx";
+    config.model.matcha.vocoder = "./hifigan.onnx";
+    config.model.matcha.tokens = "./tokens.txt";
+
+    if (Exists("./espeak-ng-data/phontab")) {
+      config.model.matcha.data_dir = "./espeak-ng-data";
+    }
+
+    if(Exists("./lexicon.txt")) {
+      config.model.matcha.lexicon = "./lexicon.txt";
+    }
+
+    if (Exists("./dict/jieba.dict.utf8")) {
+      config.model.matcha.dict_dir = "./dict";
+    }
+  } else {
+    // it is a vits tts model
+    config.model.vits.model = "./model.onnx";
+    config.model.vits.tokens = "./tokens.txt";
+    if (Exists("./espeak-ng-data/phontab")) {
+      config.model.vits.data_dir = "./espeak-ng-data";
+    } 
+
+    if (Exists("./lexicon.txt")) {
+      config.model.vits.lexicon = "./lexicon.txt";
+    }
+
+    if (Exists("./dict/jieba.dict.utf8")) {
+      config.model.vits.dict_dir = "./dict";
+    }
   }
 
   if (Exists("./phone.fst") && Exists("./date.fst") && Exists("./number.fst")) {
     config.rule_fsts = "./phone.fst,./date.fst,number.fst";
   }
 
+  if (Exists("./phone-zh.fst") && Exists("./date-zh.fst") && Exists("./number-zh.fst")) {
+    config.rule_fsts = "./phone-zh.fst,./date-zh.fst,number-zh.fst";
+  }
+
   if (Exists("./rule.far")) {
     config.rule_fars = "./rule.far";
   }
 
-  config.model.vits.tokens = "./tokens.txt";
-
   tts_ = SherpaOnnxCreateOfflineTts(&config);
 }
 
diff --git a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
index 0f938bd611..f2db39e9c1 100644
--- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
+++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
@@ -53,7 +53,7 @@ class CNonStreamingTextToSpeechDlg : public CDialogEx
 	CButton generate_btn_;
 	afx_msg void OnBnClickedOk();
 
-	SherpaOnnxOfflineTts *tts_ = nullptr;
+	const SherpaOnnxOfflineTts *tts_ = nullptr;
 	CEdit my_text_;
 	CEdit output_filename_;
 
diff --git a/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.h b/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.h
index 36b3d9cadc..39acbcfe4d 100644
--- a/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.h
+++ b/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.h
@@ -45,7 +45,7 @@ class CStreamingSpeechRecognitionDlg : public CDialogEx {
  private:
   Microphone mic_;
 
-  SherpaOnnxOnlineRecognizer *recognizer_ = nullptr;
+  const SherpaOnnxOnlineRecognizer *recognizer_ = nullptr;
 
   PaStream *pa_stream_ = nullptr;
   RecognizerThread *thread_ = nullptr;
@@ -54,7 +54,7 @@ class CStreamingSpeechRecognitionDlg : public CDialogEx {
 
  public:
   bool started_ = false;
-  SherpaOnnxOnlineStream *stream_ = nullptr;
+  const SherpaOnnxOnlineStream *stream_ = nullptr;
 
  public:
   int RunThread();
@@ -79,4 +79,4 @@ class RecognizerThread : public CWinThread {
 
  private:
   CStreamingSpeechRecognitionDlg *dlg_;
-};
\ No newline at end of file
+};
diff --git a/new-release.sh b/new-release.sh
index a540d10afb..5c05ec385d 100755
--- a/new-release.sh
+++ b/new-release.sh
@@ -1,7 +1,21 @@
 #!/usr/bin/env bash
 
-find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.26/1\.10\.27/g' {} \;
-find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.26/1\.10\.27/g' {} \;
-find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.26/1\.10\.27/g' {} \;
-find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.26/1\.10\.27/g' {} \;
-find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.26/1\.10\.27/g' {} \;
+set -ex
+
+sed -i.bak 's/1\.10\.41/1\.10\.42/g' ./build-ios-shared.sh
+sed -i.bak 's/1\.10\.41/1\.10\.42/g' ./pom.xml
+sed -i.bak 's/1\.10\.41/1\.10\.42/g' ./jitpack.yml
+sed -i.bak 's/1\.10\.41/1\.10\.42/g' ./android/SherpaOnnxAar/README.md
+
+find android -name build.gradle -type f -exec sed -i.bak 's/sherpa-onnx:v1\.10\.41/sherpa-onnx:v1\.10\.42/g' {} \;
+
+find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+
+find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.41/1\.10\.42/g' {} \;
+
+find . -name "*.bak" -exec rm {} \;
diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md
index b979c51268..856b5c5d5c 100644
--- a/nodejs-addon-examples/README.md
+++ b/nodejs-addon-examples/README.md
@@ -12,25 +12,43 @@ Note: [../nodejs-examples](../nodejs-examples) uses WebAssembly to wrap
 Before you continue, please first run
 
 ```bash
-npm install
+npm install # or pnpm install
 
 # For macOS x64
+## With npm
 export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
+## With pnpm
+export DYLD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
 
 # For macOS arm64
+## With npm
 export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
+## With pnpm
+export DYLD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
 
 # For Linux x64
+## With npm
 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
+## With pnpm
+export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
 
 # For Linux arm64, e.g., Raspberry Pi 4
+## With npm
 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
+## With pnpm
+export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
 ```
 
 # Examples
 
 The following tables list the examples in this folder.
 
+## Speaker diarization
+
+|File| Description|
+|---|---|
+|[./test_offline_speaker_diarization.js](./test_offline_speaker_diarization.js)| It demonstrates how to use sherpa-onnx JavaScript API for speaker diarization. It supports speaker segmentation models from [pyannote-audio](https://github.com/pyannote/pyannote-audio)|
+
 ## Add punctuations to text
 
 |File| Description|
@@ -94,6 +112,8 @@ The following tables list the examples in this folder.
 |[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model|
 |[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)|
 |[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)|
+|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
+|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
 |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
 |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
@@ -104,6 +124,7 @@ The following tables list the examples in this folder.
 |---|---|
 |[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model|
 |[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)|
+|[./test_vad_asr_non_streaming_moonshine_microphone.js](./test_vad_asr_non_streaming_moonshine_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Moonshine](https://github.com/usefulsensors/moonshine)|
 |[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
 |[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
 |[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
@@ -112,12 +133,31 @@ The following tables list the examples in this folder.
 
 |File| Description|
 |---|---|
+|[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model|
+|[./test_tts_non_streaming_kokoro_zh_en.js](./test_tts_non_streaming_kokoro_zh_en.js)| Text-to-speech with a Kokoro Model supporting Chinese and English|
+|[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)|
+|[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)|
 |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model|
 |[./test_tts_non_streaming_vits_coqui_de.js](./test_tts_non_streaming_vits_coqui_de.js)| Text-to-speech with a [coqui](https://github.com/coqui-ai/TTS) German model|
 |[./test_tts_non_streaming_vits_zh_ll.js](./test_tts_non_streaming_vits_zh_ll.js)| Text-to-speech with a Chinese model using [cppjieba](https://github.com/yanyiwu/cppjieba)|
 |[./test_tts_non_streaming_vits_zh_aishell3.js](./test_tts_non_streaming_vits_zh_aishell3.js)| Text-to-speech with a Chinese TTS model|
 
 
+### Speaker diarization
+
+```bash
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test_offline_speaker_diarization.js
+```
+
 ### Voice Activity detection (VAD)
 
 ```bash
@@ -227,6 +267,33 @@ npm install naudiodon2
 node ./test_vad_asr_non_streaming_whisper_microphone.js
 ```
 
+### Non-streaming speech recognition with Moonshine
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+node ./test_asr_non_streaming_moonshine.js
+
+# To run VAD + non-streaming ASR with Moonshine using a microphone
+npm install naudiodon2
+node ./test_vad_asr_non_streaming_moonshine_microphone.js
+```
+
+### Non-streaming speech recognition with Moonshine + VAD
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+
+node ./test_vad_with_non_streaming_asr_moonshine.js
+```
+
 ### Non-streaming speech recognition with Whisper + VAD
 
 ```bash
@@ -282,6 +349,48 @@ npm install naudiodon2
 node ./test_vad_asr_non_streaming_sense_voice_microphone.js
 ```
 
+### Text-to-speech with Kokoro TTS models (Chinese + English TTS)
+
+```bash
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_zh_en.js
+```
+
+### Text-to-speech with Kokoro TTS models (English TTS)
+
+```bash
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test_tts_non_streaming_kokoro_en.js
+```
+
+### Text-to-speech with MatchaTTS models (English TTS)
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_en.js
+```
+
+### Text-to-speech with MatchaTTS models (Chinese TTS)
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test_tts_non_streaming_matcha_icefall_zh.js
+```
+
 ### Text-to-speech with piper VITS models (TTS)
 
 ```bash
diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json
index 41bcfb8fc8..a15a78bdb6 100644
--- a/nodejs-addon-examples/package.json
+++ b/nodejs-addon-examples/package.json
@@ -1,5 +1,5 @@
 {
   "dependencies": {
-    "sherpa-onnx-node": "^1.10.27"
+    "sherpa-onnx-node": "^1.10.42"
   }
 }
diff --git a/nodejs-addon-examples/test_asr_non_streaming_moonshine.js b/nodejs-addon-examples/test_asr_non_streaming_moonshine.js
new file mode 100644
index 0000000000..9e676b8c55
--- /dev/null
+++ b/nodejs-addon-examples/test_asr_non_streaming_moonshine.js
@@ -0,0 +1,50 @@
+// Copyright (c)  2024  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// Please download test files from
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+const config = {
+  'featConfig': {
+    'sampleRate': 16000,
+    'featureDim': 80,
+  },
+  'modelConfig': {
+    'moonshine': {
+      'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
+      'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
+      'uncachedDecoder':
+          './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
+      'cachedDecoder':
+          './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
+    },
+    'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
+    'numThreads': 2,
+    'provider': 'cpu',
+    'debug': 1,
+  }
+};
+
+const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
+
+const recognizer = new sherpa_onnx.OfflineRecognizer(config);
+console.log('Started')
+let start = Date.now();
+const stream = recognizer.createStream();
+const wave = sherpa_onnx.readWave(waveFilename);
+stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
+
+recognizer.decode(stream);
+result = recognizer.getResult(stream)
+let stop = Date.now();
+console.log('Done')
+
+const elapsed_seconds = (stop - start) / 1000;
+const duration = wave.samples.length / wave.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'secodns')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+console.log(waveFilename)
+console.log('result\n', result)
diff --git a/nodejs-addon-examples/test_offline_speaker_diarization.js b/nodejs-addon-examples/test_offline_speaker_diarization.js
new file mode 100644
index 0000000000..1e02562a8a
--- /dev/null
+++ b/nodejs-addon-examples/test_offline_speaker_diarization.js
@@ -0,0 +1,62 @@
+// Copyright (c)  2024  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// clang-format off
+/* Please use the following commands to download files
+   used in this script
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+ */
+// clang-format on
+
+const config = {
+  segmentation: {
+    pyannote: {
+      model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
+    },
+  },
+  embedding: {
+    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
+  },
+  clustering: {
+    // since we know that the test wave file
+    // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
+    // here. if you don't have such information, please set numClusters to -1
+    numClusters: 4,
+
+    // If numClusters is not -1, then threshold is ignored.
+    //
+    // A larger threshold leads to fewer clusters, i.e., fewer speakers
+    // A smaller threshold leads to more clusters, i.e., more speakers
+    // You need to tune it by yourself.
+    threshold: 0.5,
+  },
+
+  // If a segment is shorter than minDurationOn, we discard it
+  minDurationOn: 0.2,  // in seconds
+
+  // If the gap between two segments is less than minDurationOff, then we
+  // merge these two segments into a single one
+  minDurationOff: 0.5,  // in seconds
+};
+
+const waveFilename = './0-four-speakers-zh.wav';
+
+const sd = new sherpa_onnx.OfflineSpeakerDiarization(config);
+console.log('Started')
+
+const wave = sherpa_onnx.readWave(waveFilename);
+if (sd.sampleRate != wave.sampleRate) {
+  throw new Error(
+      `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
+}
+
+const segments = sd.process(wave.samples);
+console.log(segments);
diff --git a/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js b/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js
new file mode 100644
index 0000000000..84b03982b5
--- /dev/null
+++ b/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js
@@ -0,0 +1,47 @@
+// Copyright (c)  2025  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// please refer to
+// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+// to download model files
+function createOfflineTts() {
+  const config = {
+    model: {
+      kokoro: {
+        model: './kokoro-en-v0_19/model.onnx',
+        voices: './kokoro-en-v0_19/voices.bin',
+        tokens: './kokoro-en-v0_19/tokens.txt',
+        dataDir: './kokoro-en-v0_19/espeak-ng-data',
+      },
+      debug: true,
+      numThreads: 1,
+      provider: 'cpu',
+    },
+    maxNumSentences: 1,
+  };
+  return new sherpa_onnx.OfflineTts(config);
+}
+
+const tts = createOfflineTts();
+
+const text =
+    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
+
+let start = Date.now();
+const audio = tts.generate({text: text, sid: 6, speed: 1.0});
+let stop = Date.now();
+const elapsed_seconds = (stop - start) / 1000;
+const duration = audio.samples.length / audio.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'secodns')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+
+const filename = 'test-kokoro-en-6.wav';
+sherpa_onnx.writeWave(
+    filename, {samples: audio.samples, sampleRate: audio.sampleRate});
+
+console.log(`Saved to ${filename}`);
diff --git a/nodejs-addon-examples/test_tts_non_streaming_kokoro_zh_en.js b/nodejs-addon-examples/test_tts_non_streaming_kokoro_zh_en.js
new file mode 100644
index 0000000000..cd7037272d
--- /dev/null
+++ b/nodejs-addon-examples/test_tts_non_streaming_kokoro_zh_en.js
@@ -0,0 +1,49 @@
+// Copyright (c)  2025  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// please refer to
+// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+// to download model files
+function createOfflineTts() {
+  const config = {
+    model: {
+      kokoro: {
+        model: './kokoro-multi-lang-v1_0/model.onnx',
+        voices: './kokoro-multi-lang-v1_0/voices.bin',
+        tokens: './kokoro-multi-lang-v1_0/tokens.txt',
+        dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data',
+        dictDir: './kokoro-multi-lang-v1_0/dict',
+        lexicon:
+            './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt',
+      },
+      debug: true,
+      numThreads: 1,
+      provider: 'cpu',
+    },
+    maxNumSentences: 1,
+  };
+  return new sherpa_onnx.OfflineTts(config);
+}
+
+const tts = createOfflineTts();
+
+const text =
+    '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？'
+
+let start = Date.now();
+const audio = tts.generate({text: text, sid: 48, speed: 1.0});
+let stop = Date.now();
+const elapsed_seconds = (stop - start) / 1000;
+const duration = audio.samples.length / audio.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'secodns')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+
+const filename = 'test-kokoro-zh-en-48.wav';
+sherpa_onnx.writeWave(
+    filename, {samples: audio.samples, sampleRate: audio.sampleRate});
+
+console.log(`Saved to ${filename}`);
diff --git a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js
new file mode 100644
index 0000000000..8c45d1dd3b
--- /dev/null
+++ b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js
@@ -0,0 +1,48 @@
+// Copyright (c)  2025  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// please refer to
+// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+// to download model files
+function createOfflineTts() {
+  const config = {
+    model: {
+      matcha: {
+        acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
+        vocoder: './hifigan_v2.onnx',
+        lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
+        tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
+        dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
+      },
+      debug: true,
+      numThreads: 1,
+      provider: 'cpu',
+    },
+    maxNumSentences: 1,
+  };
+  return new sherpa_onnx.OfflineTts(config);
+}
+
+const tts = createOfflineTts();
+
+const text =
+    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
+
+let start = Date.now();
+const audio = tts.generate({text: text, sid: 0, speed: 1.0});
+let stop = Date.now();
+const elapsed_seconds = (stop - start) / 1000;
+const duration = audio.samples.length / audio.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'secodns')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+
+const filename = 'test-matcha-en.wav';
+sherpa_onnx.writeWave(
+    filename, {samples: audio.samples, sampleRate: audio.sampleRate});
+
+console.log(`Saved to ${filename}`);
diff --git a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js
new file mode 100644
index 0000000000..1f667e3d24
--- /dev/null
+++ b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js
@@ -0,0 +1,50 @@
+// Copyright (c)  2025  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx-node');
+
+// please refer to
+// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+// to download model files
+function createOfflineTts() {
+  const config = {
+    model: {
+      matcha: {
+        acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
+        vocoder: './hifigan_v2.onnx',
+        lexicon: './matcha-icefall-zh-baker/lexicon.txt',
+        tokens: './matcha-icefall-zh-baker/tokens.txt',
+        dictDir: './matcha-icefall-zh-baker/dict',
+      },
+      debug: true,
+      numThreads: 1,
+      provider: 'cpu',
+    },
+    maxNumSentences: 1,
+    ruleFsts:
+        './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
+  };
+  return new sherpa_onnx.OfflineTts(config);
+}
+
+const tts = createOfflineTts();
+
+const text =
+    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+
+
+let start = Date.now();
+const audio = tts.generate({text: text, sid: 0, speed: 1.0});
+let stop = Date.now();
+const elapsed_seconds = (stop - start) / 1000;
+const duration = audio.samples.length / audio.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'secodns')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+
+const filename = 'test-matcha-zh.wav';
+sherpa_onnx.writeWave(
+    filename, {samples: audio.samples, sampleRate: audio.sampleRate});
+
+console.log(`Saved to ${filename}`);
diff --git a/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js b/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
index 73ba4b61ce..89a913fd3a 100644
--- a/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
+++ b/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
@@ -14,7 +14,7 @@ function createOfflineTts() {
       numThreads: 1,
       provider: 'cpu',
     },
-    maxNumStences: 1,
+    maxNumSentences: 1,
   };
   return new sherpa_onnx.OfflineTts(config);
 }
diff --git a/nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js b/nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
index 910f126507..6d18fb076f 100644
--- a/nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
+++ b/nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
@@ -15,7 +15,7 @@ function createOfflineTts() {
       numThreads: 1,
       provider: 'cpu',
     },
-    maxNumStences: 1,
+    maxNumSentences: 1,
   };
   return new sherpa_onnx.OfflineTts(config);
 }
diff --git a/nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js b/nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
index 505247cb31..8409bca50b 100644
--- a/nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
+++ b/nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
@@ -15,7 +15,7 @@ function createOfflineTts() {
       numThreads: 1,
       provider: 'cpu',
     },
-    maxNumStences: 1,
+    maxNumSentences: 1,
     ruleFsts:
         './vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
     ruleFars: './vits-icefall-zh-aishell3/rule.far',
diff --git a/nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js b/nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
index 5e258e2ad6..b739c391ea 100644
--- a/nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
+++ b/nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
@@ -16,7 +16,7 @@ function createOfflineTts() {
       numThreads: 1,
       provider: 'cpu',
     },
-    maxNumStences: 1,
+    maxNumSentences: 1,
     ruleFsts:
         './sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
   };
diff --git a/nodejs-addon-examples/test_vad_asr_non_streaming_moonshine_microphone.js b/nodejs-addon-examples/test_vad_asr_non_streaming_moonshine_microphone.js
new file mode 100644
index 0000000000..b3cbcca5d9
--- /dev/null
+++ b/nodejs-addon-examples/test_vad_asr_non_streaming_moonshine_microphone.js
@@ -0,0 +1,113 @@
+// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
+//
+const portAudio = require('naudiodon2');
+// console.log(portAudio.getDevices());
+
+const sherpa_onnx = require('sherpa-onnx-node');
+
+function createRecognizer() {
+  // Please download test files from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+  const config = {
+    'featConfig': {
+      'sampleRate': 16000,
+      'featureDim': 80,
+    },
+    'modelConfig': {
+      'moonshine': {
+        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
+        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
+        'uncachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
+        'cachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
+      },
+      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
+      'numThreads': 2,
+      'provider': 'cpu',
+      'debug': 1,
+    }
+  };
+
+  return new sherpa_onnx.OfflineRecognizer(config);
+}
+
+function createVad() {
+  // please download silero_vad.onnx from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+  const config = {
+    sileroVad: {
+      model: './silero_vad.onnx',
+      threshold: 0.5,
+      minSpeechDuration: 0.25,
+      minSilenceDuration: 0.5,
+      windowSize: 512,
+    },
+    sampleRate: 16000,
+    debug: true,
+    numThreads: 1,
+  };
+
+  const bufferSizeInSeconds = 60;
+
+  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
+}
+
+const recognizer = createRecognizer();
+const vad = createVad();
+
+const bufferSizeInSeconds = 30;
+const buffer =
+    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
+
+const ai = new portAudio.AudioIO({
+  inOptions: {
+    channelCount: 1,
+    closeOnError: true,  // Close the stream if an audio error is detected, if
+                         // set false then just log the error
+    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
+    sampleFormat: portAudio.SampleFormatFloat32,
+    sampleRate: vad.config.sampleRate
+  }
+});
+
+let printed = false;
+let index = 0;
+ai.on('data', data => {
+  const windowSize = vad.config.sileroVad.windowSize;
+  buffer.push(new Float32Array(data.buffer));
+  while (buffer.size() > windowSize) {
+    const samples = buffer.get(buffer.head(), windowSize);
+    buffer.pop(windowSize);
+    vad.acceptWaveform(samples);
+  }
+
+  while (!vad.isEmpty()) {
+    const segment = vad.front();
+    vad.pop();
+    const stream = recognizer.createStream();
+    stream.acceptWaveform({
+      samples: segment.samples,
+      sampleRate: recognizer.config.featConfig.sampleRate
+    });
+    recognizer.decode(stream);
+    const r = recognizer.getResult(stream);
+    if (r.text.length > 0) {
+      const text = r.text.toLowerCase().trim();
+      console.log(`${index}: ${text}`);
+
+      const filename = `${index}-${text}-${
+          new Date()
+              .toLocaleTimeString('en-US', {hour12: false})
+              .split(' ')[0]}.wav`;
+      sherpa_onnx.writeWave(
+          filename,
+          {samples: segment.samples, sampleRate: vad.config.sampleRate});
+
+      index += 1;
+    }
+  }
+});
+
+ai.start();
+console.log('Started! Please speak')
diff --git a/nodejs-addon-examples/test_vad_with_non_streaming_asr_moonshine.js b/nodejs-addon-examples/test_vad_with_non_streaming_asr_moonshine.js
new file mode 100644
index 0000000000..3b0510604e
--- /dev/null
+++ b/nodejs-addon-examples/test_vad_with_non_streaming_asr_moonshine.js
@@ -0,0 +1,132 @@
+// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx-node');
+
+function createRecognizer() {
+  // Please download test files from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+  const config = {
+    'featConfig': {
+      'sampleRate': 16000,
+      'featureDim': 80,
+    },
+    'modelConfig': {
+      'moonshine': {
+        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
+        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
+        'uncachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
+        'cachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
+      },
+      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
+      'numThreads': 2,
+      'provider': 'cpu',
+      'debug': 1,
+    }
+  };
+
+  return new sherpa_onnx.OfflineRecognizer(config);
+}
+
+function createVad() {
+  // please download silero_vad.onnx from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+  const config = {
+    sileroVad: {
+      model: './silero_vad.onnx',
+      threshold: 0.5,
+      minSpeechDuration: 0.25,
+      minSilenceDuration: 0.5,
+      maxSpeechDuration: 5,
+      windowSize: 512,
+    },
+    sampleRate: 16000,
+    debug: true,
+    numThreads: 1,
+  };
+
+  const bufferSizeInSeconds = 60;
+
+  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
+}
+
+const recognizer = createRecognizer();
+const vad = createVad();
+
+// please download ./Obama.wav from
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+const waveFilename = './Obama.wav';
+const wave = sherpa_onnx.readWave(waveFilename);
+
+if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
+  throw new Error(
+      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
+}
+
+console.log('Started')
+let start = Date.now();
+
+const windowSize = vad.config.sileroVad.windowSize;
+for (let i = 0; i < wave.samples.length; i += windowSize) {
+  const thisWindow = wave.samples.subarray(i, i + windowSize);
+  vad.acceptWaveform(thisWindow);
+
+  while (!vad.isEmpty()) {
+    const segment = vad.front();
+    vad.pop();
+
+    let start_time = segment.start / wave.sampleRate;
+    let end_time = start_time + segment.samples.length / wave.sampleRate;
+
+    start_time = start_time.toFixed(2);
+    end_time = end_time.toFixed(2);
+
+    const stream = recognizer.createStream();
+    stream.acceptWaveform(
+        {samples: segment.samples, sampleRate: wave.sampleRate});
+
+    recognizer.decode(stream);
+    const r = recognizer.getResult(stream);
+    if (r.text.length > 0) {
+      const text = r.text.toLowerCase().trim();
+      console.log(`${start_time} -- ${end_time}: ${text}`);
+    }
+  }
+}
+
+vad.flush();
+
+while (!vad.isEmpty()) {
+  const segment = vad.front();
+  vad.pop();
+
+  let start_time = segment.start / wave.sampleRate;
+  let end_time = start_time + segment.samples.length / wave.sampleRate;
+
+  start_time = start_time.toFixed(2);
+  end_time = end_time.toFixed(2);
+
+  const stream = recognizer.createStream();
+  stream.acceptWaveform(
+      {samples: segment.samples, sampleRate: wave.sampleRate});
+
+  recognizer.decode(stream);
+  const r = recognizer.getResult(stream);
+  if (r.text.length > 0) {
+    const text = r.text.toLowerCase().trim();
+    console.log(`${start_time} -- ${end_time}: ${text}`);
+  }
+}
+
+let stop = Date.now();
+console.log('Done')
+
+const elapsed_seconds = (stop - start) / 1000;
+const duration = wave.samples.length / wave.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'seconds')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md
index 73a85de77c..c544303a48 100644
--- a/nodejs-examples/README.md
+++ b/nodejs-examples/README.md
@@ -22,13 +22,81 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
 for text-to-speech and speech-to-text.
 
 
+# Speaker diarization
+
+In the following, we demonstrate how to run speaker diarization.
+
+```bash
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test-offline-speaker-diarization.js
+```
+
 # Text-to-speech
 
 In the following, we demonstrate how to run text-to-speech.
 
-## ./test-offline-tts-en.js
+## ./test-offline-tts-kokoro-en.js
+
+[./test-offline-tts-kokoro-en.js](./test-offline-tts-kokoro-en.js) shows how to use
+[kokoro-en-v0_19](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2)
+for text-to-speech.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+node ./test-offline-tts-kokoro-en.js
+```
+
+## ./test-offline-tts-matcha-zh.js
+
+[./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
+[matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
+for text-to-speech.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-zh.js
+```
+
+## ./test-offline-tts-matcha-en.js
+
+[./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
+[matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
+for text-to-speech.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
 
-[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
+node ./test-offline-tts-matcha-en.js
+```
+
+## ./test-offline-tts-vits-en.js
+
+[./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
 [vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
 for text-to-speech.
 
@@ -37,12 +105,12 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 tar xvf vits-piper-en_US-amy-low.tar.bz2
-node ./test-offline-tts-en.js
+node ./test-offline-tts-vits-en.js
 ```
 
-## ./test-offline-tts-zh.js
+## ./test-offline-tts-vits-zh.js
 
-[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
+[./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
 a VITS pretrained model
 [aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
 for text-to-speech.
@@ -52,7 +120,7 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
 tar xvf vits-icefall-zh-aishell3.tar.bz2
-node ./test-offline-tts-zh.js
+node ./test-offline-tts-vits-zh.js
 ```
 
 # Speech-to-text
@@ -117,7 +185,25 @@ tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
 node ./test-offline-transducer.js
 ```
 
+## ./test-vad-with-non-streaming-asr-whisper.js
+
+[./test-vad-with-non-streaming-asr-whisper.js](./test-vad-with-non-streaming-asr-whisper.js)
+shows how to use VAD + whisper to decode a very long file.
+
+You can use the following command to run it:
+
+```bash
+wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+
+node ./test-vad-with-non-streaming-asr-whisper.js
+```
+
 ## ./test-offline-whisper.js
+
 [./test-offline-whisper.js](./test-offline-whisper.js) demonstrates
 how to decode a file with a Whisper model. In the code we use
 [sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html).
@@ -130,7 +216,40 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
 node ./test-offline-whisper.js
 ```
 
+## ./test-offline-moonshine.js
+
+[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
+how to decode a file with a Moonshine model. In the code we use
+[sherpa-onnx-moonshine-tiny-en-int8](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2).
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+node ./test-offline-moonshine.js
+```
+
+## ./test-vad-with-non-streaming-asr-moonshine.js
+
+[./test-vad-with-non-streaming-asr-moonshine.js](./test-vad-with-non-streaming-asr-moonshine.js)
+shows how to use VAD + whisper to decode a very long file.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+
+node ./test-vad-with-non-streaming-asr-moonshine.js
+```
+
 ## ./test-online-paraformer-microphone.js
+
 [./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js)
 demonstrates how to do real-time speech recognition from microphone
 with a streaming Paraformer model. In the code we use
diff --git a/nodejs-examples/test-keyword-spotter-transducer.js b/nodejs-examples/test-keyword-spotter-transducer.js
index 9ead2b191f..746d9995e1 100644
--- a/nodejs-examples/test-keyword-spotter-transducer.js
+++ b/nodejs-examples/test-keyword-spotter-transducer.js
@@ -41,6 +41,9 @@ while (kws.isReady(stream)) {
   const keyword = kws.getResult(stream).keyword;
   if (keyword != '') {
     detectedKeywords.push(keyword);
+
+    // remember to reset the stream right after detecting a keyword
+    kws.reset(stream);
   }
 }
 console.log(detectedKeywords);
diff --git a/nodejs-examples/test-offline-moonshine.js b/nodejs-examples/test-offline-moonshine.js
new file mode 100644
index 0000000000..8f5d2f00be
--- /dev/null
+++ b/nodejs-examples/test-offline-moonshine.js
@@ -0,0 +1,37 @@
+// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+//
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineRecognizer() {
+  let modelConfig = {
+    moonshine: {
+      preprocessor: './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
+      encoder: './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
+      uncachedDecoder:
+          './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
+      cachedDecoder:
+          './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
+    },
+    tokens: './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
+  };
+
+  let config = {
+    modelConfig: modelConfig,
+  };
+
+  return sherpa_onnx.createOfflineRecognizer(config);
+}
+
+recognizer = createOfflineRecognizer();
+stream = recognizer.createStream();
+
+const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
+const wave = sherpa_onnx.readWave(waveFilename);
+stream.acceptWaveform(wave.sampleRate, wave.samples);
+
+recognizer.decode(stream);
+const text = recognizer.getResult(stream).text;
+console.log(text);
+
+stream.free();
+recognizer.free();
diff --git a/nodejs-examples/test-offline-speaker-diarization.js b/nodejs-examples/test-offline-speaker-diarization.js
new file mode 100644
index 0000000000..de0f4a45b5
--- /dev/null
+++ b/nodejs-examples/test-offline-speaker-diarization.js
@@ -0,0 +1,64 @@
+// Copyright (c)  2024  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx');
+
+// clang-format off
+/* Please use the following commands to download files
+   used in this script
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+ */
+// clang-format on
+
+const config = {
+  segmentation: {
+    pyannote: {
+      model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
+      debug: 1,
+    },
+  },
+  embedding: {
+    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
+    debug: 1,
+  },
+  clustering: {
+    // since we know that the test wave file
+    // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
+    // here. if you don't have such information, please set numClusters to -1
+    numClusters: 4,
+
+    // If numClusters is not -1, then threshold is ignored.
+    //
+    // A larger threshold leads to fewer clusters, i.e., fewer speakers
+    // A smaller threshold leads to more clusters, i.e., more speakers
+    // You need to tune it by yourself.
+    threshold: 0.5,
+  },
+
+  // If a segment is shorter than minDurationOn, we discard it
+  minDurationOn: 0.2,  // in seconds
+
+  // If the gap between two segments is less than minDurationOff, then we
+  // merge these two segments into a single one
+  minDurationOff: 0.5,  // in seconds
+};
+
+const waveFilename = './0-four-speakers-zh.wav';
+
+const sd = sherpa_onnx.createOfflineSpeakerDiarization(config);
+console.log('Started')
+
+const wave = sherpa_onnx.readWave(waveFilename);
+if (sd.sampleRate != wave.sampleRate) {
+  throw new Error(
+      `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
+}
+
+const segments = sd.process(wave.samples);
+console.log(segments);
diff --git a/nodejs-examples/test-offline-tts-kokoro-en.js b/nodejs-examples/test-offline-tts-kokoro-en.js
new file mode 100644
index 0000000000..1c011d581a
--- /dev/null
+++ b/nodejs-examples/test-offline-tts-kokoro-en.js
@@ -0,0 +1,37 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsKokoroModelConfig = {
+    model: './kokoro-en-v0_19/model.onnx',
+    voices: './kokoro-en-v0_19/voices.bin',
+    tokens: './kokoro-en-v0_19/tokens.txt',
+    dataDir: './kokoro-en-v0_19/espeak-ng-data',
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 0;
+const speed = 1.0;
+const text =
+    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-kokoro-en.wav', audio);
+console.log('Saved to test-kokoro-en.wav successfully.');
+tts.free();
diff --git a/nodejs-examples/test-offline-tts-kokoro-zh-en.js b/nodejs-examples/test-offline-tts-kokoro-zh-en.js
new file mode 100644
index 0000000000..244219f274
--- /dev/null
+++ b/nodejs-examples/test-offline-tts-kokoro-zh-en.js
@@ -0,0 +1,40 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsKokoroModelConfig = {
+    model: './kokoro-multi-lang-v1_0/model.onnx',
+    voices: './kokoro-multi-lang-v1_0/voices.bin',
+    tokens: './kokoro-multi-lang-v1_0/tokens.txt',
+    dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data',
+    dictDir: './kokoro-multi-lang-v1_0/dict',
+    lexicon:
+        './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt',
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 49;
+const speed = 1.0;
+const text =
+    '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-kokoro-zh-en-49.wav', audio);
+console.log('Saved to test-kokoro-zh-en-49.wav successfully.');
+tts.free();
diff --git a/nodejs-examples/test-offline-tts-matcha-en.js b/nodejs-examples/test-offline-tts-matcha-en.js
new file mode 100644
index 0000000000..c2f9820436
--- /dev/null
+++ b/nodejs-examples/test-offline-tts-matcha-en.js
@@ -0,0 +1,40 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsMatchaModelConfig = {
+    acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
+    vocoder: './hifigan_v2.onnx',
+    lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
+    tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
+    dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
+
+    noiseScale: 0.667,
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 0;
+const speed = 1.0;
+const text =
+    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-matcha-en.wav', audio);
+console.log('Saved to test-matcha-en.wav successfully.');
+tts.free();
diff --git a/nodejs-examples/test-offline-tts-matcha-zh.js b/nodejs-examples/test-offline-tts-matcha-zh.js
new file mode 100644
index 0000000000..21c6a08756
--- /dev/null
+++ b/nodejs-examples/test-offline-tts-matcha-zh.js
@@ -0,0 +1,41 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsMatchaModelConfig = {
+    acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
+    vocoder: './hifigan_v2.onnx',
+    lexicon: './matcha-icefall-zh-baker/lexicon.txt',
+    tokens: './matcha-icefall-zh-baker/tokens.txt',
+    dictDir: './matcha-icefall-zh-baker/dict',
+    noiseScale: 0.667,
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+    ruleFsts:
+        './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 0;
+const speed = 1.0;
+const text =
+    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-matcha-zh.wav', audio);
+console.log('Saved to test-matcha-zh.wav successfully.');
+tts.free();
diff --git a/nodejs-examples/test-offline-tts-en.js b/nodejs-examples/test-offline-tts-vits-en.js
similarity index 92%
rename from nodejs-examples/test-offline-tts-en.js
rename to nodejs-examples/test-offline-tts-vits-en.js
index 61c23f5eb7..9e6c8da58e 100644
--- a/nodejs-examples/test-offline-tts-en.js
+++ b/nodejs-examples/test-offline-tts-vits-en.js
@@ -37,7 +37,7 @@ const audio = tts.generate({
   speed: speed
 });
 
-tts.save('./test-en.wav', audio);
-console.log('Saved to test-en.wav successfully.');
+tts.save('./test-vits-en.wav', audio);
+console.log('Saved to test-vits-en.wav successfully.');
 
 tts.free();
diff --git a/nodejs-examples/test-offline-tts-zh.js b/nodejs-examples/test-offline-tts-vits-zh.js
similarity index 92%
rename from nodejs-examples/test-offline-tts-zh.js
rename to nodejs-examples/test-offline-tts-vits-zh.js
index 7be1488628..78c81ead32 100644
--- a/nodejs-examples/test-offline-tts-zh.js
+++ b/nodejs-examples/test-offline-tts-vits-zh.js
@@ -34,6 +34,6 @@ const speakerId = 66;
 const speed = 1.0;
 const audio = tts.generate(
     {text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
-tts.save('./test-zh.wav', audio);
-console.log('Saved to test-zh.wav successfully.');
+tts.save('./test-vits-zh.wav', audio);
+console.log('Saved to test-vits-zh.wav successfully.');
 tts.free();
diff --git a/nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js b/nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js
new file mode 100644
index 0000000000..0d6bd76487
--- /dev/null
+++ b/nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js
@@ -0,0 +1,128 @@
+// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createRecognizer() {
+  // Please download test files from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+  const config = {
+    'modelConfig': {
+      'moonshine': {
+        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
+        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
+        'uncachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
+        'cachedDecoder':
+            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
+      },
+      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
+      'debug': 0,
+    }
+  };
+
+  return sherpa_onnx.createOfflineRecognizer(config);
+}
+
+function createVad() {
+  // please download silero_vad.onnx from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+  const config = {
+    sileroVad: {
+      model: './silero_vad.onnx',
+      threshold: 0.5,
+      minSpeechDuration: 0.25,
+      minSilenceDuration: 0.5,
+      maxSpeechDuration: 5,
+      windowSize: 512,
+    },
+    sampleRate: 16000,
+    debug: true,
+    numThreads: 1,
+    bufferSizeInSeconds: 60,
+  };
+
+  return sherpa_onnx.createVad(config);
+}
+
+const recognizer = createRecognizer();
+const vad = createVad();
+
+// please download ./Obama.wav from
+// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+const waveFilename = './Obama.wav';
+const wave = sherpa_onnx.readWave(waveFilename);
+
+if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
+  throw new Error(
+      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
+}
+
+console.log('Started')
+let start = Date.now();
+
+const windowSize = vad.config.sileroVad.windowSize;
+for (let i = 0; i < wave.samples.length; i += windowSize) {
+  const thisWindow = wave.samples.subarray(i, i + windowSize);
+  vad.acceptWaveform(thisWindow);
+
+  while (!vad.isEmpty()) {
+    const segment = vad.front();
+    vad.pop();
+
+    let start_time = segment.start / wave.sampleRate;
+    let end_time = start_time + segment.samples.length / wave.sampleRate;
+
+    start_time = start_time.toFixed(2);
+    end_time = end_time.toFixed(2);
+
+    const stream = recognizer.createStream();
+    stream.acceptWaveform(wave.sampleRate, segment.samples);
+
+    recognizer.decode(stream);
+    const r = recognizer.getResult(stream);
+    if (r.text.length > 0) {
+      const text = r.text.toLowerCase().trim();
+      console.log(`${start_time} -- ${end_time}: ${text}`);
+    }
+
+    stream.free();
+  }
+}
+
+vad.flush();
+
+while (!vad.isEmpty()) {
+  const segment = vad.front();
+  vad.pop();
+
+  let start_time = segment.start / wave.sampleRate;
+  let end_time = start_time + segment.samples.length / wave.sampleRate;
+
+  start_time = start_time.toFixed(2);
+  end_time = end_time.toFixed(2);
+
+  const stream = recognizer.createStream();
+  stream.acceptWaveform(wave.sampleRate, segment.samples);
+
+  recognizer.decode(stream);
+  const r = recognizer.getResult(stream);
+  if (r.text.length > 0) {
+    const text = r.text.toLowerCase().trim();
+    console.log(`${start_time} -- ${end_time}: ${text}`);
+  }
+}
+
+let stop = Date.now();
+console.log('Done')
+
+const elapsed_seconds = (stop - start) / 1000;
+const duration = wave.samples.length / wave.sampleRate;
+const real_time_factor = elapsed_seconds / duration;
+console.log('Wave duration', duration.toFixed(3), 'seconds')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
+console.log(
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+    real_time_factor.toFixed(3))
+
+vad.free();
+recognizer.free();
diff --git a/pascal-api-examples/README.md b/pascal-api-examples/README.md
index 5475d825bd..5e709cd7e2 100644
--- a/pascal-api-examples/README.md
+++ b/pascal-api-examples/README.md
@@ -9,6 +9,7 @@ https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html
 |Directory| Description|
 |---------|------------|
 |[read-wav](./read-wav)|It shows how to read a wave file.|
+|[speaker-diarization](./speaker-diarization)|It shows how to use Pascal API for speaker diarization.|
 |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
 |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.|
 |[vad](./vad)| It shows how to use the voice activity detection API.|
diff --git a/pascal-api-examples/non-streaming-asr/.gitignore b/pascal-api-examples/non-streaming-asr/.gitignore
index fbcf1c9680..aba0585a39 100644
--- a/pascal-api-examples/non-streaming-asr/.gitignore
+++ b/pascal-api-examples/non-streaming-asr/.gitignore
@@ -7,3 +7,4 @@ paraformer
 paraformer_itn
 sense_voice
 telespeech_ctc
+moonshine
diff --git a/pascal-api-examples/non-streaming-asr/moonshine.pas b/pascal-api-examples/non-streaming-asr/moonshine.pas
new file mode 100644
index 0000000000..04597ad645
--- /dev/null
+++ b/pascal-api-examples/non-streaming-asr/moonshine.pas
@@ -0,0 +1,80 @@
+{ Copyright (c)  2024  Xiaomi Corporation }
+
+{
+This file shows how to use a non-streaming Moonshine model
+to decode files.
+
+You can download the model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+}
+
+program moonshine;
+
+{$mode objfpc}
+
+uses
+  sherpa_onnx,
+  DateUtils,
+  SysUtils;
+
+var
+  Wave: TSherpaOnnxWave;
+  WaveFilename: AnsiString;
+
+  Config: TSherpaOnnxOfflineRecognizerConfig;
+  Recognizer: TSherpaOnnxOfflineRecognizer;
+  Stream: TSherpaOnnxOfflineStream;
+  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
+
+  Start: TDateTime;
+  Stop: TDateTime;
+
+  Elapsed: Single;
+  Duration: Single;
+  RealTimeFactor: Single;
+begin
+  Initialize(Config);
+
+  Config.ModelConfig.Moonshine.Preprocessor := './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx';
+  Config.ModelConfig.Moonshine.Encoder := './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx';
+  Config.ModelConfig.Moonshine.UncachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx';
+  Config.ModelConfig.Moonshine.CachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx';
+
+  Config.ModelConfig.Tokens := './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt';
+  Config.ModelConfig.Provider := 'cpu';
+  Config.ModelConfig.NumThreads := 1;
+  Config.ModelConfig.Debug := False;
+
+  WaveFilename := './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
+
+  Wave := SherpaOnnxReadWave(WaveFilename);
+
+  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
+  Stream := Recognizer.CreateStream();
+  Start := Now;
+
+  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
+  Recognizer.Decode(Stream);
+
+  RecognitionResult := Recognizer.GetResult(Stream);
+
+  Stop := Now;
+
+  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
+  Duration := Length(Wave.Samples) / Wave.SampleRate;
+  RealTimeFactor := Elapsed / Duration;
+
+  WriteLn(RecognitionResult.ToString);
+  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
+  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
+  WriteLn(Format('Wave duration %.3f s', [Duration]));
+  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
+
+  {Free resources to avoid memory leak.
+
+  Note: You don't need to invoke them for this simple script.
+  However, you have to invoke them in your own large/complex project.
+  }
+  FreeAndNil(Stream);
+  FreeAndNil(Recognizer);
+end.
diff --git a/pascal-api-examples/non-streaming-asr/run-moonshine.sh b/pascal-api-examples/non-streaming-asr/run-moonshine.sh
new file mode 100755
index 0000000000..9486b06e97
--- /dev/null
+++ b/pascal-api-examples/non-streaming-asr/run-moonshine.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./moonshine.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./moonshine
diff --git a/pascal-api-examples/speaker-diarization/main.pas b/pascal-api-examples/speaker-diarization/main.pas
new file mode 100644
index 0000000000..35d915d0ba
--- /dev/null
+++ b/pascal-api-examples/speaker-diarization/main.pas
@@ -0,0 +1,104 @@
+{ Copyright (c)  2024  Xiaomi Corporation }
+{
+This file shows how to use the Pascal API from sherpa-onnx
+for speaker diarization.
+
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+}
+
+program main;
+
+{$mode delphi}
+
+uses
+  sherpa_onnx,
+  ctypes,
+  SysUtils;
+
+function ProgressCallback(
+      NumProcessedChunks: cint32;
+      NumTotalChunks: cint32): cint32; cdecl;
+var
+  Progress: Single;
+begin
+  Progress := 100.0 * NumProcessedChunks / NumTotalChunks;
+  WriteLn(Format('Progress: %.3f%%', [Progress]));
+
+  Result := 0;
+end;
+
+var
+  Wave: TSherpaOnnxWave;
+  Config: TSherpaOnnxOfflineSpeakerDiarizationConfig;
+  Sd: TSherpaOnnxOfflineSpeakerDiarization;
+  Segments: TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
+  I: Integer;
+begin
+  Wave := SherpaOnnxReadWave('./0-four-speakers-zh.wav');
+
+  Config.Segmentation.Pyannote.Model := './sherpa-onnx-pyannote-segmentation-3-0/model.onnx';
+  Config.Embedding.Model := './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
+
+  {
+    Since we know that there are 4 speakers in ./0-four-speakers-zh.wav, we
+    set NumClusters to 4 here.
+    If you don't have such information, please set NumClusters to -1.
+    In that case, you have to set Config.Clustering.Threshold.
+    A larger threshold leads to fewer clusters, i.e., fewer speakers.
+  }
+  Config.Clustering.NumClusters := 4;
+  Config.Segmentation.Debug := True;
+  Config.Embedding.Debug := True;
+
+  Sd := TSherpaOnnxOfflineSpeakerDiarization.Create(Config);
+  if Sd.GetHandle = nil then
+    begin
+      WriteLn('Please check you config');
+      Exit;
+    end;
+
+  if Sd.GetSampleRate <> Wave.SampleRate then
+    begin
+      WriteLn(Format('Expected sample rate: %d, given: %d', [Sd.GetSampleRate, Wave.SampleRate]));
+      Exit;
+    end;
+
+  {
+    // If you don't want to use a callback
+    Segments := Sd.Process(Wave.Samples);
+  }
+  Segments := Sd.Process(Wave.Samples, @ProgressCallback);
+
+  for I := Low(Segments) to High(Segments) do
+    begin
+      WriteLn(Format('%.3f -- %.3f speaker_%d',
+        [Segments[I].Start, Segments[I].Stop, Segments[I].Speaker]));
+    end;
+
+  FreeAndNil(Sd);
+end.
diff --git a/pascal-api-examples/speaker-diarization/run.sh b/pascal-api-examples/speaker-diarization/run.sh
new file mode 100755
index 0000000000..866dc63c9c
--- /dev/null
+++ b/pascal-api-examples/speaker-diarization/run.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./main.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+./main
diff --git a/pascal-api-examples/tts/.gitignore b/pascal-api-examples/tts/.gitignore
index b7076ab5c3..a4f0ef0d93 100644
--- a/pascal-api-examples/tts/.gitignore
+++ b/pascal-api-examples/tts/.gitignore
@@ -2,3 +2,11 @@
 piper
 piper-playback
 link*.res
+matcha-zh
+matcha-en
+matcha-zh-playback
+matcha-en-playback
+kokoro-en
+kokoro-en-playback
+kokoro-zh-en
+kokoro-zh-en-playback
diff --git a/pascal-api-examples/tts/kokoro-en-playback.pas b/pascal-api-examples/tts/kokoro-en-playback.pas
new file mode 100644
index 0000000000..7796a6fee5
--- /dev/null
+++ b/pascal-api-examples/tts/kokoro-en-playback.pas
@@ -0,0 +1,239 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program kokoro_en_playback;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with Kokoro models.
+
+It generates speech from text and saves it to a wave file.
+
+Note that it plays the audio back as it is still generating.
+}
+
+{$mode objfpc}
+
+uses
+  {$ifdef unix}
+  cthreads,
+  {$endif}
+  SysUtils,
+  dos,
+  ctypes,
+  portaudio,
+  sherpa_onnx;
+
+var
+  CriticalSection: TRTLCriticalSection;
+
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+  Resampler: TSherpaOnnxLinearResampler;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 7;
+  Buffer: TSherpaOnnxCircularBuffer;
+  FinishedGeneration: Boolean = False;
+  FinishedPlaying: Boolean = False;
+
+  Version: String;
+  EnvStr: String;
+  Status: Integer;
+  NumDevices: Integer;
+  DeviceIndex: Integer;
+  DeviceInfo: PPaDeviceInfo;
+
+  { If you get EDivByZero: Division by zero error, please change the sample rate
+    to the one supported by your microphone.
+  }
+  DeviceSampleRate: Integer = 48000;
+  I: Integer;
+  Param: TPaStreamParameters;
+  Stream: PPaStream;
+  Wave: TSherpaOnnxWave;
+
+function GenerateCallback(
+      Samples: pcfloat; N: cint32;
+      Arg: Pointer): cint; cdecl;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Resampler <> nil then
+      Buffer.Push(Resampler.Resample(Samples, N, False))
+    else
+      Buffer.Push(Samples, N);
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+
+  { 1 means to continue generating; 0 means to stop generating. }
+  Result := 1;
+end;
+
+function PlayCallback(
+      input: Pointer; output: Pointer;
+      frameCount: culong;
+      timeInfo: PPaStreamCallbackTimeInfo;
+      statusFlags: TPaStreamCallbackFlags;
+      userData: Pointer ): cint; cdecl;
+var
+  Samples: TSherpaOnnxSamplesArray;
+  I: Integer;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Buffer.Size >= frameCount then
+      begin
+        Samples := Buffer.Get(Buffer.Head, FrameCount);
+        Buffer.Pop(FrameCount);
+      end
+    else if Buffer.Size > 0 then
+      begin
+        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
+        Buffer.Pop(Buffer.Size);
+        SetLength(Samples, frameCount);
+      end
+    else
+      SetLength(Samples, frameCount);
+
+    for I := 0 to frameCount - 1 do
+      pcfloat(output)[I] := Samples[I];
+
+    if (Buffer.Size > 0) or (not FinishedGeneration) then
+      Result := paContinue
+    else
+      begin
+        Result := paComplete;
+        FinishedPlaying := True;
+      end;
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+end;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx';
+  Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin';
+  Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt';
+  Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data';
+  Config.Model.NumThreads := 2;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+begin
+  Tts := GetOfflineTts;
+  if Tts.GetSampleRate <> DeviceSampleRate then
+    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
+
+  Version := String(Pa_GetVersionText);
+  WriteLn('Version is ', Version);
+  Status := Pa_Initialize;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  NumDevices := Pa_GetDeviceCount;
+  WriteLn('Num devices: ', NumDevices);
+
+  DeviceIndex := Pa_GetDefaultOutputDevice;
+
+  if DeviceIndex = paNoDevice then
+    begin
+      WriteLn('No default output device found');
+      Pa_Terminate;
+      Exit;
+    end;
+
+  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
+  if EnvStr <> '' then
+    begin
+      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
+      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
+    end;
+
+  for I := 0 to (NumDevices - 1) do
+    begin
+      DeviceInfo := Pa_GetDeviceInfo(I);
+      if I = DeviceIndex then
+        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
+        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
+      else
+        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
+    end;
+
+  WriteLn('Use device ', DeviceIndex);
+  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
+  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
+
+  Initialize(Param);
+  Param.Device := DeviceIndex;
+  Param.ChannelCount := 1;
+  Param.SampleFormat := paFloat32;
+  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
+  param.HostApiSpecificStreamInfo := nil;
+
+  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
+
+
+  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
+  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
+    PPaStreamCallback(@PlayCallback), nil);
+
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  InitCriticalSection(CriticalSection);
+
+  Status := Pa_StartStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed,
+    PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
+  FinishedGeneration := True;
+  SherpaOnnxWriteWave('./kokoro-en-playback-7.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./kokoro-en-playback-7.wav');
+
+  while not FinishedPlaying do
+    Pa_Sleep(100);  {sleep for 0.1 second }
+    {TODO(fangjun): Use an event to indicate the play is finished}
+
+  DoneCriticalSection(CriticalSection);
+
+  FreeAndNil(Tts);
+  FreeAndNil(Resampler);
+
+  Status := Pa_CloseStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  Status := Pa_Terminate;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+end.
+
diff --git a/pascal-api-examples/tts/kokoro-en.pas b/pascal-api-examples/tts/kokoro-en.pas
new file mode 100644
index 0000000000..5e186b24d8
--- /dev/null
+++ b/pascal-api-examples/tts/kokoro-en.pas
@@ -0,0 +1,55 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program kokoro_en;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with Kokoro TTS models.
+
+It generates speech from text and saves it to a wave file.
+
+If you want to play it while it is generating, please see
+./kokoro-en-playback.pas
+}
+
+{$mode objfpc}
+
+uses
+  SysUtils,
+  sherpa_onnx;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx';
+  Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin';
+  Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt';
+  Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data';
+  Config.Model.NumThreads := 2;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+var
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 8;
+
+begin
+  Tts := GetOfflineTts;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed);
+  SherpaOnnxWriteWave('./kokoro-en-8.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./kokoro-en-8.wav');
+
+  FreeAndNil(Tts);
+end.
+
diff --git a/pascal-api-examples/tts/kokoro-zh-en-playback.pas b/pascal-api-examples/tts/kokoro-zh-en-playback.pas
new file mode 100644
index 0000000000..22d36dea4a
--- /dev/null
+++ b/pascal-api-examples/tts/kokoro-zh-en-playback.pas
@@ -0,0 +1,241 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program kokoro_en_playback;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with Kokoro models (Chinese + English).
+
+It generates speech from text and saves it to a wave file.
+
+Note that it plays the audio back as it is still generating.
+}
+
+{$mode objfpc}
+
+uses
+  {$ifdef unix}
+  cthreads,
+  {$endif}
+  SysUtils,
+  dos,
+  ctypes,
+  portaudio,
+  sherpa_onnx;
+
+var
+  CriticalSection: TRTLCriticalSection;
+
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+  Resampler: TSherpaOnnxLinearResampler;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 47;
+  Buffer: TSherpaOnnxCircularBuffer;
+  FinishedGeneration: Boolean = False;
+  FinishedPlaying: Boolean = False;
+
+  Version: String;
+  EnvStr: String;
+  Status: Integer;
+  NumDevices: Integer;
+  DeviceIndex: Integer;
+  DeviceInfo: PPaDeviceInfo;
+
+  { If you get EDivByZero: Division by zero error, please change the sample rate
+    to the one supported by your microphone.
+  }
+  DeviceSampleRate: Integer = 48000;
+  I: Integer;
+  Param: TPaStreamParameters;
+  Stream: PPaStream;
+  Wave: TSherpaOnnxWave;
+
+function GenerateCallback(
+      Samples: pcfloat; N: cint32;
+      Arg: Pointer): cint; cdecl;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Resampler <> nil then
+      Buffer.Push(Resampler.Resample(Samples, N, False))
+    else
+      Buffer.Push(Samples, N);
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+
+  { 1 means to continue generating; 0 means to stop generating. }
+  Result := 1;
+end;
+
+function PlayCallback(
+      input: Pointer; output: Pointer;
+      frameCount: culong;
+      timeInfo: PPaStreamCallbackTimeInfo;
+      statusFlags: TPaStreamCallbackFlags;
+      userData: Pointer ): cint; cdecl;
+var
+  Samples: TSherpaOnnxSamplesArray;
+  I: Integer;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Buffer.Size >= frameCount then
+      begin
+        Samples := Buffer.Get(Buffer.Head, FrameCount);
+        Buffer.Pop(FrameCount);
+      end
+    else if Buffer.Size > 0 then
+      begin
+        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
+        Buffer.Pop(Buffer.Size);
+        SetLength(Samples, frameCount);
+      end
+    else
+      SetLength(Samples, frameCount);
+
+    for I := 0 to frameCount - 1 do
+      pcfloat(output)[I] := Samples[I];
+
+    if (Buffer.Size > 0) or (not FinishedGeneration) then
+      Result := paContinue
+    else
+      begin
+        Result := paComplete;
+        FinishedPlaying := True;
+      end;
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+end;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Kokoro.Model := './kokoro-multi-lang-v1_0/model.onnx';
+  Config.Model.Kokoro.Voices := './kokoro-multi-lang-v1_0/voices.bin';
+  Config.Model.Kokoro.Tokens := './kokoro-multi-lang-v1_0/tokens.txt';
+  Config.Model.Kokoro.DataDir := './kokoro-multi-lang-v1_0/espeak-ng-data';
+  Config.Model.Kokoro.DictDir := './kokoro-multi-lang-v1_0/dict';
+  Config.Model.Kokoro.Lexicon := './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt';
+  Config.Model.NumThreads := 2;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+begin
+  Tts := GetOfflineTts;
+  if Tts.GetSampleRate <> DeviceSampleRate then
+    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
+
+  Version := String(Pa_GetVersionText);
+  WriteLn('Version is ', Version);
+  Status := Pa_Initialize;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  NumDevices := Pa_GetDeviceCount;
+  WriteLn('Num devices: ', NumDevices);
+
+  DeviceIndex := Pa_GetDefaultOutputDevice;
+
+  if DeviceIndex = paNoDevice then
+    begin
+      WriteLn('No default output device found');
+      Pa_Terminate;
+      Exit;
+    end;
+
+  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
+  if EnvStr <> '' then
+    begin
+      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
+      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
+    end;
+
+  for I := 0 to (NumDevices - 1) do
+    begin
+      DeviceInfo := Pa_GetDeviceInfo(I);
+      if I = DeviceIndex then
+        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
+        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
+      else
+        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
+    end;
+
+  WriteLn('Use device ', DeviceIndex);
+  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
+  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
+
+  Initialize(Param);
+  Param.Device := DeviceIndex;
+  Param.ChannelCount := 1;
+  Param.SampleFormat := paFloat32;
+  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
+  param.HostApiSpecificStreamInfo := nil;
+
+  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
+
+
+  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
+  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
+    PPaStreamCallback(@PlayCallback), nil);
+
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  InitCriticalSection(CriticalSection);
+
+  Status := Pa_StartStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed,
+    PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
+  FinishedGeneration := True;
+  SherpaOnnxWriteWave('./kokoro-zh-en-playback-47.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./kokoro-zh-en-playback-47.wav');
+
+  while not FinishedPlaying do
+    Pa_Sleep(100);  {sleep for 0.1 second }
+    {TODO(fangjun): Use an event to indicate the play is finished}
+
+  DoneCriticalSection(CriticalSection);
+
+  FreeAndNil(Tts);
+  FreeAndNil(Resampler);
+
+  Status := Pa_CloseStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  Status := Pa_Terminate;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+end.
+
diff --git a/pascal-api-examples/tts/kokoro-zh-en.pas b/pascal-api-examples/tts/kokoro-zh-en.pas
new file mode 100644
index 0000000000..d34e864854
--- /dev/null
+++ b/pascal-api-examples/tts/kokoro-zh-en.pas
@@ -0,0 +1,57 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program kokoro_en;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with Kokoro TTS models (Chinese + English).
+
+It generates speech from text and saves it to a wave file.
+
+If you want to play it while it is generating, please see
+./kokoro-en-playback.pas
+}
+
+{$mode objfpc}
+
+uses
+  SysUtils,
+  sherpa_onnx;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Kokoro.Model := './kokoro-multi-lang-v1_0/model.onnx';
+  Config.Model.Kokoro.Voices := './kokoro-multi-lang-v1_0/voices.bin';
+  Config.Model.Kokoro.Tokens := './kokoro-multi-lang-v1_0/tokens.txt';
+  Config.Model.Kokoro.DataDir := './kokoro-multi-lang-v1_0/espeak-ng-data';
+  Config.Model.Kokoro.DictDir := './kokoro-multi-lang-v1_0/dict';
+  Config.Model.Kokoro.Lexicon := './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt';
+  Config.Model.NumThreads := 2;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+var
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 46;
+
+begin
+  Tts := GetOfflineTts;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed);
+  SherpaOnnxWriteWave('./kokoro-zh-en-46.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./kokoro-zh-en-46.wav');
+
+  FreeAndNil(Tts);
+end.
+
diff --git a/pascal-api-examples/tts/matcha-en-playback.pas b/pascal-api-examples/tts/matcha-en-playback.pas
new file mode 100644
index 0000000000..7a6e8c7584
--- /dev/null
+++ b/pascal-api-examples/tts/matcha-en-playback.pas
@@ -0,0 +1,239 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program matcha_en_playback;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with MatchaTTS models.
+
+It generates speech from text and saves it to a wave file.
+
+Note that it plays the audio back as it is still generating.
+}
+
+{$mode objfpc}
+
+uses
+  {$ifdef unix}
+  cthreads,
+  {$endif}
+  SysUtils,
+  dos,
+  ctypes,
+  portaudio,
+  sherpa_onnx;
+
+var
+  CriticalSection: TRTLCriticalSection;
+
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+  Resampler: TSherpaOnnxLinearResampler;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 0;
+  Buffer: TSherpaOnnxCircularBuffer;
+  FinishedGeneration: Boolean = False;
+  FinishedPlaying: Boolean = False;
+
+  Version: String;
+  EnvStr: String;
+  Status: Integer;
+  NumDevices: Integer;
+  DeviceIndex: Integer;
+  DeviceInfo: PPaDeviceInfo;
+
+  { If you get EDivByZero: Division by zero error, please change the sample rate
+    to the one supported by your microphone.
+  }
+  DeviceSampleRate: Integer = 48000;
+  I: Integer;
+  Param: TPaStreamParameters;
+  Stream: PPaStream;
+  Wave: TSherpaOnnxWave;
+
+function GenerateCallback(
+      Samples: pcfloat; N: cint32;
+      Arg: Pointer): cint; cdecl;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Resampler <> nil then
+      Buffer.Push(Resampler.Resample(Samples, N, False))
+    else
+      Buffer.Push(Samples, N);
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+
+  { 1 means to continue generating; 0 means to stop generating. }
+  Result := 1;
+end;
+
+function PlayCallback(
+      input: Pointer; output: Pointer;
+      frameCount: culong;
+      timeInfo: PPaStreamCallbackTimeInfo;
+      statusFlags: TPaStreamCallbackFlags;
+      userData: Pointer ): cint; cdecl;
+var
+  Samples: TSherpaOnnxSamplesArray;
+  I: Integer;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Buffer.Size >= frameCount then
+      begin
+        Samples := Buffer.Get(Buffer.Head, FrameCount);
+        Buffer.Pop(FrameCount);
+      end
+    else if Buffer.Size > 0 then
+      begin
+        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
+        Buffer.Pop(Buffer.Size);
+        SetLength(Samples, frameCount);
+      end
+    else
+      SetLength(Samples, frameCount);
+
+    for I := 0 to frameCount - 1 do
+      pcfloat(output)[I] := Samples[I];
+
+    if (Buffer.Size > 0) or (not FinishedGeneration) then
+      Result := paContinue
+    else
+      begin
+        Result := paComplete;
+        FinishedPlaying := True;
+      end;
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+end;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
+  Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
+  Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
+  Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
+  Config.Model.NumThreads := 1;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+begin
+  Tts := GetOfflineTts;
+  if Tts.GetSampleRate <> DeviceSampleRate then
+    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
+
+  Version := String(Pa_GetVersionText);
+  WriteLn('Version is ', Version);
+  Status := Pa_Initialize;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  NumDevices := Pa_GetDeviceCount;
+  WriteLn('Num devices: ', NumDevices);
+
+  DeviceIndex := Pa_GetDefaultOutputDevice;
+
+  if DeviceIndex = paNoDevice then
+    begin
+      WriteLn('No default output device found');
+      Pa_Terminate;
+      Exit;
+    end;
+
+  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
+  if EnvStr <> '' then
+    begin
+      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
+      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
+    end;
+
+  for I := 0 to (NumDevices - 1) do
+    begin
+      DeviceInfo := Pa_GetDeviceInfo(I);
+      if I = DeviceIndex then
+        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
+        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
+      else
+        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
+    end;
+
+  WriteLn('Use device ', DeviceIndex);
+  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
+  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
+
+  Initialize(Param);
+  Param.Device := DeviceIndex;
+  Param.ChannelCount := 1;
+  Param.SampleFormat := paFloat32;
+  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
+  param.HostApiSpecificStreamInfo := nil;
+
+  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
+
+
+  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
+  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
+    PPaStreamCallback(@PlayCallback), nil);
+
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  InitCriticalSection(CriticalSection);
+
+  Status := Pa_StartStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed,
+    PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
+  FinishedGeneration := True;
+  SherpaOnnxWriteWave('./matcha-en-playback.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./matcha-en-playback.wav');
+
+  while not FinishedPlaying do
+    Pa_Sleep(100);  {sleep for 0.1 second }
+    {TODO(fangjun): Use an event to indicate the play is finished}
+
+  DoneCriticalSection(CriticalSection);
+
+  FreeAndNil(Tts);
+  FreeAndNil(Resampler);
+
+  Status := Pa_CloseStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  Status := Pa_Terminate;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+end.
+
diff --git a/pascal-api-examples/tts/matcha-en.pas b/pascal-api-examples/tts/matcha-en.pas
new file mode 100644
index 0000000000..f818d53e64
--- /dev/null
+++ b/pascal-api-examples/tts/matcha-en.pas
@@ -0,0 +1,55 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program matcha_en;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with MatchaTTS models.
+
+It generates speech from text and saves it to a wave file.
+
+If you want to play it while it is generating, please see
+./matcha-en-playback.pas
+}
+
+{$mode objfpc}
+
+uses
+  SysUtils,
+  sherpa_onnx;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
+  Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
+  Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
+  Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
+  Config.Model.NumThreads := 1;
+  Config.Model.Debug := False;
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+var
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 0;
+
+begin
+  Tts := GetOfflineTts;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed);
+  SherpaOnnxWriteWave('./matcha-en.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./matcha-en.wav');
+
+  FreeAndNil(Tts);
+end.
+
diff --git a/pascal-api-examples/tts/matcha-zh-playback.pas b/pascal-api-examples/tts/matcha-zh-playback.pas
new file mode 100644
index 0000000000..05f94ba9c3
--- /dev/null
+++ b/pascal-api-examples/tts/matcha-zh-playback.pas
@@ -0,0 +1,241 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program matcha_zh_playback;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with MatchaTTS models.
+
+It generates speech from text and saves it to a wave file.
+
+Note that it plays the audio back as it is still generating.
+}
+
+{$mode objfpc}
+
+uses
+  {$ifdef unix}
+  cthreads,
+  {$endif}
+  SysUtils,
+  dos,
+  ctypes,
+  portaudio,
+  sherpa_onnx;
+
+var
+  CriticalSection: TRTLCriticalSection;
+
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+  Resampler: TSherpaOnnxLinearResampler;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 0;
+  Buffer: TSherpaOnnxCircularBuffer;
+  FinishedGeneration: Boolean = False;
+  FinishedPlaying: Boolean = False;
+
+  Version: String;
+  EnvStr: String;
+  Status: Integer;
+  NumDevices: Integer;
+  DeviceIndex: Integer;
+  DeviceInfo: PPaDeviceInfo;
+
+  { If you get EDivByZero: Division by zero error, please change the sample rate
+    to the one supported by your microphone.
+  }
+  DeviceSampleRate: Integer = 48000;
+  I: Integer;
+  Param: TPaStreamParameters;
+  Stream: PPaStream;
+  Wave: TSherpaOnnxWave;
+
+function GenerateCallback(
+      Samples: pcfloat; N: cint32;
+      Arg: Pointer): cint; cdecl;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Resampler <> nil then
+      Buffer.Push(Resampler.Resample(Samples, N, False))
+    else
+      Buffer.Push(Samples, N);
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+
+  { 1 means to continue generating; 0 means to stop generating. }
+  Result := 1;
+end;
+
+function PlayCallback(
+      input: Pointer; output: Pointer;
+      frameCount: culong;
+      timeInfo: PPaStreamCallbackTimeInfo;
+      statusFlags: TPaStreamCallbackFlags;
+      userData: Pointer ): cint; cdecl;
+var
+  Samples: TSherpaOnnxSamplesArray;
+  I: Integer;
+begin
+  EnterCriticalSection(CriticalSection);
+  try
+    if Buffer.Size >= frameCount then
+      begin
+        Samples := Buffer.Get(Buffer.Head, FrameCount);
+        Buffer.Pop(FrameCount);
+      end
+    else if Buffer.Size > 0 then
+      begin
+        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
+        Buffer.Pop(Buffer.Size);
+        SetLength(Samples, frameCount);
+      end
+    else
+      SetLength(Samples, frameCount);
+
+    for I := 0 to frameCount - 1 do
+      pcfloat(output)[I] := Samples[I];
+
+    if (Buffer.Size > 0) or (not FinishedGeneration) then
+      Result := paContinue
+    else
+      begin
+        Result := paComplete;
+        FinishedPlaying := True;
+      end;
+  finally
+    LeaveCriticalSection(CriticalSection);
+  end;
+end;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
+  Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
+  Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
+  Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
+  Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
+  Config.Model.NumThreads := 1;
+  Config.Model.Debug := False;
+  Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+begin
+  Tts := GetOfflineTts;
+  if Tts.GetSampleRate <> DeviceSampleRate then
+    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);
+
+  Version := String(Pa_GetVersionText);
+  WriteLn('Version is ', Version);
+  Status := Pa_Initialize;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  NumDevices := Pa_GetDeviceCount;
+  WriteLn('Num devices: ', NumDevices);
+
+  DeviceIndex := Pa_GetDefaultOutputDevice;
+
+  if DeviceIndex = paNoDevice then
+    begin
+      WriteLn('No default output device found');
+      Pa_Terminate;
+      Exit;
+    end;
+
+  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
+  if EnvStr <> '' then
+    begin
+      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
+      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
+    end;
+
+  for I := 0 to (NumDevices - 1) do
+    begin
+      DeviceInfo := Pa_GetDeviceInfo(I);
+      if I = DeviceIndex then
+        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
+        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
+      else
+        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
+    end;
+
+  WriteLn('Use device ', DeviceIndex);
+  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
+  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
+
+  Initialize(Param);
+  Param.Device := DeviceIndex;
+  Param.ChannelCount := 1;
+  Param.SampleFormat := paFloat32;
+  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
+  param.HostApiSpecificStreamInfo := nil;
+
+  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);
+
+
+  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
+  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
+    PPaStreamCallback(@PlayCallback), nil);
+
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  InitCriticalSection(CriticalSection);
+
+  Status := Pa_StartStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
+      Pa_Terminate;
+      Exit;
+    end;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := '某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed,
+    PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil);
+  FinishedGeneration := True;
+  SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./matcha-zh-playback.wav');
+
+  while not FinishedPlaying do
+    Pa_Sleep(100);  {sleep for 0.1 second }
+    {TODO(fangjun): Use an event to indicate the play is finished}
+
+  DoneCriticalSection(CriticalSection);
+
+  FreeAndNil(Tts);
+  FreeAndNil(Resampler);
+
+  Status := Pa_CloseStream(stream);
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+
+  Status := Pa_Terminate;
+  if Status <> paNoError then
+    begin
+      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
+      Exit;
+    end;
+end.
+
diff --git a/pascal-api-examples/tts/matcha-zh.pas b/pascal-api-examples/tts/matcha-zh.pas
new file mode 100644
index 0000000000..c941009526
--- /dev/null
+++ b/pascal-api-examples/tts/matcha-zh.pas
@@ -0,0 +1,57 @@
+{ Copyright (c)  2025  Xiaomi Corporation }
+program matcha_zh;
+{
+This file shows how to use the text to speech API of sherpa-onnx
+with MatchaTTS models.
+
+It generates speech from text and saves it to a wave file.
+
+If you want to play it while it is generating, please see
+./matcha-zh-playback.pas
+}
+
+{$mode objfpc}
+
+uses
+  SysUtils,
+  sherpa_onnx;
+
+function GetOfflineTts: TSherpaOnnxOfflineTts;
+var
+  Config: TSherpaOnnxOfflineTtsConfig;
+begin
+  Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
+  Config.Model.Matcha.Vocoder := './hifigan_v2.onnx';
+  Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
+  Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
+  Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
+  Config.Model.NumThreads := 1;
+  Config.Model.Debug := False;
+  Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
+  Config.MaxNumSentences := 1;
+
+  Result := TSherpaOnnxOfflineTts.Create(Config);
+end;
+
+var
+  Tts: TSherpaOnnxOfflineTts;
+  Audio: TSherpaOnnxGeneratedAudio;
+
+  Text: AnsiString;
+  Speed: Single = 1.0;  {Use a larger value to speak faster}
+  SpeakerId: Integer = 0;
+
+begin
+  Tts := GetOfflineTts;
+
+  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');
+
+  Text := '某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';
+
+  Audio :=  Tts.Generate(Text, SpeakerId, Speed);
+  SherpaOnnxWriteWave('./matcha-zh.wav', Audio.Samples, Audio.SampleRate);
+  WriteLn('Saved to ./matcha-zh.wav');
+
+  FreeAndNil(Tts);
+end.
+
diff --git a/pascal-api-examples/tts/piper-playback.pas b/pascal-api-examples/tts/piper-playback.pas
index b9cd10b71d..5e65a34f06 100644
--- a/pascal-api-examples/tts/piper-playback.pas
+++ b/pascal-api-examples/tts/piper-playback.pas
@@ -1,5 +1,5 @@
 { Copyright (c)  2024  Xiaomi Corporation }
-program piper;
+program piper_playback;
 {
 This file shows how to use the text to speech API of sherpa-onnx
 with Piper models.
diff --git a/pascal-api-examples/tts/run-kokoro-en-playback.sh b/pascal-api-examples/tts/run-kokoro-en-playback.sh
new file mode 100755
index 0000000000..49e280d7a4
--- /dev/null
+++ b/pascal-api-examples/tts/run-kokoro-en-playback.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
+  ./kokoro-en-playback.pas
+
+# Please see ../portaudio-test/README.md
+# for how to install portaudio on macOS
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./kokoro-en-playback
diff --git a/pascal-api-examples/tts/run-kokoro-en.sh b/pascal-api-examples/tts/run-kokoro-en.sh
new file mode 100755
index 0000000000..f26fb335d5
--- /dev/null
+++ b/pascal-api-examples/tts/run-kokoro-en.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./kokoro-en.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./kokoro-en
diff --git a/pascal-api-examples/tts/run-kokoro-zh-en-playback.sh b/pascal-api-examples/tts/run-kokoro-zh-en-playback.sh
new file mode 100755
index 0000000000..cc6e2526dc
--- /dev/null
+++ b/pascal-api-examples/tts/run-kokoro-zh-en-playback.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
+  ./kokoro-zh-en-playback.pas
+
+# Please see ../portaudio-test/README.md
+# for how to install portaudio on macOS
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./kokoro-zh-en-playback
diff --git a/pascal-api-examples/tts/run-kokoro-zh-en.sh b/pascal-api-examples/tts/run-kokoro-zh-en.sh
new file mode 100755
index 0000000000..36b583481a
--- /dev/null
+++ b/pascal-api-examples/tts/run-kokoro-zh-en.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./kokoro-zh-en.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./kokoro-zh-en
diff --git a/pascal-api-examples/tts/run-matcha-en-playback.sh b/pascal-api-examples/tts/run-matcha-en-playback.sh
new file mode 100755
index 0000000000..ffa677e94f
--- /dev/null
+++ b/pascal-api-examples/tts/run-matcha-en-playback.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
+  ./matcha-en-playback.pas
+
+# Please see ../portaudio-test/README.md
+# for how to install portaudio on macOS
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./matcha-en-playback
diff --git a/pascal-api-examples/tts/run-matcha-en.sh b/pascal-api-examples/tts/run-matcha-en.sh
new file mode 100755
index 0000000000..084e672b19
--- /dev/null
+++ b/pascal-api-examples/tts/run-matcha-en.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./matcha-en.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./matcha-en
diff --git a/pascal-api-examples/tts/run-matcha-zh-playback.sh b/pascal-api-examples/tts/run-matcha-zh-playback.sh
new file mode 100755
index 0000000000..e12ad22afd
--- /dev/null
+++ b/pascal-api-examples/tts/run-matcha-zh-playback.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
+  ./matcha-zh-playback.pas
+
+# Please see ../portaudio-test/README.md
+# for how to install portaudio on macOS
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./matcha-zh-playback
diff --git a/pascal-api-examples/tts/run-matcha-zh.sh b/pascal-api-examples/tts/run-matcha-zh.sh
new file mode 100755
index 0000000000..a7d83d379b
--- /dev/null
+++ b/pascal-api-examples/tts/run-matcha-zh.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./matcha-zh.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./matcha-zh
diff --git a/pascal-api-examples/vad-with-non-streaming-asr/.gitignore b/pascal-api-examples/vad-with-non-streaming-asr/.gitignore
index 4718ed421b..d499ad3b37 100644
--- a/pascal-api-examples/vad-with-non-streaming-asr/.gitignore
+++ b/pascal-api-examples/vad-with-non-streaming-asr/.gitignore
@@ -1,3 +1,4 @@
 !run-*.sh
 vad_with_whisper
 vad_with_sense_voice
+vad_with_moonshine
diff --git a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh
new file mode 100755
index 0000000000..fdf04b639a
--- /dev/null
+++ b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  popd
+fi
+
+if [[ ! -f ./silero_vad.onnx ]]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+if [ ! -f ./Obama.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+fi
+
+if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+fi
+
+fpc \
+  -dSHERPA_ONNX_USE_SHARED_LIBS \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./vad_with_moonshine.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./vad_with_moonshine
diff --git a/pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas b/pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas
new file mode 100644
index 0000000000..50a2e95d26
--- /dev/null
+++ b/pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas
@@ -0,0 +1,139 @@
+{ Copyright (c)  2024  Xiaomi Corporation }
+
+{
+This file shows how to use a non-streaming Moonshine model
+with silero VAD to decode files.
+
+You can download the model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+}
+
+program vad_with_moonshine;
+
+{$mode objfpc}
+
+uses
+  sherpa_onnx,
+  SysUtils;
+
+function CreateVad(): TSherpaOnnxVoiceActivityDetector;
+var
+  Config: TSherpaOnnxVadModelConfig;
+
+  SampleRate: Integer;
+  WindowSize: Integer;
+begin
+  Initialize(Config);
+
+  SampleRate := 16000; {Please don't change it unless you know the details}
+  WindowSize := 512; {Please don't change it unless you know the details}
+
+  Config.SileroVad.Model := './silero_vad.onnx';
+  Config.SileroVad.MinSpeechDuration := 0.5;
+  Config.SileroVad.MinSilenceDuration := 0.5;
+  Config.SileroVad.Threshold := 0.5;
+  Config.SileroVad.WindowSize := WindowSize;
+  Config.NumThreads:= 1;
+  Config.Debug:= True;
+  Config.Provider:= 'cpu';
+  Config.SampleRate := SampleRate;
+
+  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
+end;
+
+function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
+var
+  Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+  Initialize(Config);
+
+  Config.ModelConfig.Moonshine.Preprocessor := './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx';
+  Config.ModelConfig.Moonshine.Encoder := './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx';
+  Config.ModelConfig.Moonshine.UncachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx';
+  Config.ModelConfig.Moonshine.CachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx';
+
+  Config.ModelConfig.Tokens := './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt';
+  Config.ModelConfig.Provider := 'cpu';
+  Config.ModelConfig.NumThreads := 1;
+  Config.ModelConfig.Debug := False;
+
+  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+var
+  Wave: TSherpaOnnxWave;
+
+  Recognizer: TSherpaOnnxOfflineRecognizer;
+  Vad: TSherpaOnnxVoiceActivityDetector;
+
+  Offset: Integer;
+  WindowSize: Integer;
+  SpeechSegment: TSherpaOnnxSpeechSegment;
+
+  Start: Single;
+  Duration: Single;
+
+  Stream: TSherpaOnnxOfflineStream;
+  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
+begin
+  Vad := CreateVad();
+  Recognizer := CreateOfflineRecognizer();
+
+  Wave := SherpaOnnxReadWave('./Obama.wav');
+  if Wave.SampleRate <> Vad.Config.SampleRate then
+    begin
+      WriteLn(Format('Expected sample rate: %d. Given: %d',
+        [Vad.Config.SampleRate, Wave.SampleRate]));
+
+      Exit;
+    end;
+
+  WindowSize := Vad.Config.SileroVad.WindowSize;
+  Offset := 0;
+  while Offset + WindowSize <= Length(Wave.Samples) do
+    begin
+      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
+      Offset += WindowSize;
+
+      while not Vad.IsEmpty do
+        begin
+          SpeechSegment := Vad.Front();
+          Vad.Pop();
+          Stream := Recognizer.CreateStream();
+
+          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
+          Recognizer.Decode(Stream);
+          RecognitionResult := Recognizer.GetResult(Stream);
+
+          Start := SpeechSegment.Start / Wave.SampleRate;
+          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
+          WriteLn(Format('%.3f -- %.3f %s',
+            [Start, Start + Duration, RecognitionResult.Text]));
+
+          FreeAndNil(Stream);
+        end;
+    end;
+
+  Vad.Flush;
+
+  while not Vad.IsEmpty do
+    begin
+      SpeechSegment := Vad.Front();
+      Vad.Pop();
+      Stream := Recognizer.CreateStream();
+
+      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
+      Recognizer.Decode(Stream);
+      RecognitionResult := Recognizer.GetResult(Stream);
+
+      Start := SpeechSegment.Start / Wave.SampleRate;
+      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
+      WriteLn(Format('%.3f -- %.3f %s',
+        [Start, Start + Duration, RecognitionResult.Text]));
+
+      FreeAndNil(Stream);
+    end;
+
+  FreeAndNil(Recognizer);
+  FreeAndNil(Vad);
+end.
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000000..709440c629
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>com.k2fsa.sherpa.onnx</groupId>
+    <artifactId>sherpa-onnx-android</artifactId>
+    <version>1.10.42</version>
+    <url>https://github.com/k2-fsa/sherpa-onnx</url>
+    <packaging>pom</packaging>
+    <description>First Android Library</description>
+
+    <licenses>
+      <license>
+        <name>The Apache Software License, Version 2.0</name>
+        <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+        <distribution>repo</distribution>
+      </license>
+    </licenses>
+</project>
diff --git a/python-api-examples/generate-subtitles.py b/python-api-examples/generate-subtitles.py
index 85871016a4..1e36dd070c 100755
--- a/python-api-examples/generate-subtitles.py
+++ b/python-api-examples/generate-subtitles.py
@@ -47,7 +47,19 @@
       --feature-dim=80 \
       /path/to/test.mp4
 
-(3) For Whisper models
+(3) For Moonshine models
+
+./python-api-examples/generate-subtitles.py  \
+  --silero-vad-model=/path/to/silero_vad.onnx \
+  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  --num-threads=2 \
+  /path/to/test.mp4
+
+(4) For Whisper models
 
 ./python-api-examples/generate-subtitles.py  \
   --silero-vad-model=/path/to/silero_vad.onnx \
@@ -58,7 +70,7 @@
   --num-threads=2 \
   /path/to/test.mp4
 
-(4) For SenseVoice CTC models
+(5) For SenseVoice CTC models
 
 ./python-api-examples/generate-subtitles.py  \
   --silero-vad-model=/path/to/silero_vad.onnx \
@@ -68,7 +80,7 @@
   /path/to/test.mp4
 
 
-(5) For WeNet CTC models
+(6) For WeNet CTC models
 
 ./python-api-examples/generate-subtitles.py  \
   --silero-vad-model=/path/to/silero_vad.onnx \
@@ -83,6 +95,7 @@
 used in this file.
 """
 import argparse
+import datetime as dt
 import shutil
 import subprocess
 import sys
@@ -157,7 +170,7 @@ def get_args():
     parser.add_argument(
         "--num-threads",
         type=int,
-        default=1,
+        default=2,
         help="Number of threads for neural network computation",
     )
 
@@ -208,6 +221,34 @@ def get_args():
         """,
     )
 
+    parser.add_argument(
+        "--moonshine-preprocessor",
+        default="",
+        type=str,
+        help="Path to moonshine preprocessor model",
+    )
+
+    parser.add_argument(
+        "--moonshine-encoder",
+        default="",
+        type=str,
+        help="Path to moonshine encoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-uncached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine uncached decoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-cached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine cached decoder model",
+    )
+
     parser.add_argument(
         "--decoding-method",
         type=str,
@@ -263,6 +304,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.encoder)
         assert_file_exists(args.decoder)
@@ -284,6 +331,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.paraformer)
 
@@ -300,6 +353,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.sense_voice)
         recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
@@ -312,6 +371,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
     elif args.wenet_ctc:
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.wenet_ctc)
 
@@ -327,6 +392,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
     elif args.whisper_encoder:
         assert_file_exists(args.whisper_encoder)
         assert_file_exists(args.whisper_decoder)
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
             encoder=args.whisper_encoder,
@@ -339,6 +410,22 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
             task=args.whisper_task,
             tail_paddings=args.whisper_tail_paddings,
         )
+    elif args.moonshine_preprocessor:
+        assert_file_exists(args.moonshine_preprocessor)
+        assert_file_exists(args.moonshine_encoder)
+        assert_file_exists(args.moonshine_uncached_decoder)
+        assert_file_exists(args.moonshine_cached_decoder)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
+            preprocessor=args.moonshine_preprocessor,
+            encoder=args.moonshine_encoder,
+            uncached_decoder=args.moonshine_uncached_decoder,
+            cached_decoder=args.moonshine_cached_decoder,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+        )
     else:
         raise ValueError("Please specify at least one model")
 
@@ -424,29 +511,27 @@ def main():
     segment_list = []
 
     print("Started!")
+    start_t = dt.datetime.now()
+    num_processed_samples = 0
 
-    is_silence = False
+    is_eof = False
     # TODO(fangjun): Support multithreads
-    while True:
+    while not is_eof:
         # *2 because int16_t has two bytes
         data = process.stdout.read(frames_per_read * 2)
         if not data:
-            if is_silence:
-                break
-            is_silence = True
-            # The converted audio file does not have a mute data of 1 second or more at the end, which will result in the loss of the last segment data
-            data = np.zeros(1 * args.sample_rate, dtype=np.int16)
-
-        samples = np.frombuffer(data, dtype=np.int16)
-        samples = samples.astype(np.float32) / 32768
+            vad.flush()
+            is_eof = True
+        else:
+            samples = np.frombuffer(data, dtype=np.int16)
+            samples = samples.astype(np.float32) / 32768
 
-        buffer = np.concatenate([buffer, samples])
-        while len(buffer) > window_size:
-            vad.accept_waveform(buffer[:window_size])
-            buffer = buffer[window_size:]
+            num_processed_samples += samples.shape[0]
 
-        if is_silence:
-            vad.flush()
+            buffer = np.concatenate([buffer, samples])
+            while len(buffer) > window_size:
+                vad.accept_waveform(buffer[:window_size])
+                buffer = buffer[window_size:]
 
         streams = []
         segments = []
@@ -471,6 +556,11 @@ def main():
             seg.text = stream.result.text
             segment_list.append(seg)
 
+    end_t = dt.datetime.now()
+    elapsed_seconds = (end_t - start_t).total_seconds()
+    duration = num_processed_samples / 16000
+    rtf = elapsed_seconds / duration
+
     srt_filename = Path(args.sound_file).with_suffix(".srt")
     with open(srt_filename, "w", encoding="utf-8") as f:
         for i, seg in enumerate(segment_list):
@@ -479,6 +569,9 @@ def main():
             print("", file=f)
 
     print(f"Saved to {srt_filename}")
+    print(f"Audio duration:\t{duration:.3f} s")
+    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
+    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")
     print("Done!")
 
 
diff --git a/python-api-examples/keyword-spotter-from-microphone.py b/python-api-examples/keyword-spotter-from-microphone.py
index 65a59fca6e..b634c90700 100755
--- a/python-api-examples/keyword-spotter-from-microphone.py
+++ b/python-api-examples/keyword-spotter-from-microphone.py
@@ -169,6 +169,8 @@ def main():
 
     print("Started! Please speak")
 
+    idx = 0
+
     sample_rate = 16000
     samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
     stream = keyword_spotter.create_stream()
@@ -179,9 +181,12 @@ def main():
             stream.accept_waveform(sample_rate, samples)
             while keyword_spotter.is_ready(stream):
                 keyword_spotter.decode_stream(stream)
-            result = keyword_spotter.get_result(stream)
-            if result:
-                print("\r{}".format(result), end="", flush=True)
+                result = keyword_spotter.get_result(stream)
+                if result:
+                    print(f"{idx}: {result }")
+                    idx += 1
+                    # Remember to reset stream right after detecting a keyword
+                    keyword_spotter.reset_stream(stream)
 
 
 if __name__ == "__main__":
diff --git a/python-api-examples/keyword-spotter.py b/python-api-examples/keyword-spotter.py
index 1b1de77e3c..f3f76420bc 100755
--- a/python-api-examples/keyword-spotter.py
+++ b/python-api-examples/keyword-spotter.py
@@ -18,122 +18,6 @@
 import sherpa_onnx
 
 
-def get_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--tokens",
-        type=str,
-        help="Path to tokens.txt",
-    )
-
-    parser.add_argument(
-        "--encoder",
-        type=str,
-        help="Path to the transducer encoder model",
-    )
-
-    parser.add_argument(
-        "--decoder",
-        type=str,
-        help="Path to the transducer decoder model",
-    )
-
-    parser.add_argument(
-        "--joiner",
-        type=str,
-        help="Path to the transducer joiner model",
-    )
-
-    parser.add_argument(
-        "--num-threads",
-        type=int,
-        default=1,
-        help="Number of threads for neural network computation",
-    )
-
-    parser.add_argument(
-        "--provider",
-        type=str,
-        default="cpu",
-        help="Valid values: cpu, cuda, coreml",
-    )
-
-    parser.add_argument(
-        "--max-active-paths",
-        type=int,
-        default=4,
-        help="""
-        It specifies number of active paths to keep during decoding.
-        """,
-    )
-
-    parser.add_argument(
-        "--num-trailing-blanks",
-        type=int,
-        default=1,
-        help="""The number of trailing blanks a keyword should be followed. Setting
-        to a larger value (e.g. 8) when your keywords has overlapping tokens
-        between each other.
-        """,
-    )
-
-    parser.add_argument(
-        "--keywords-file",
-        type=str,
-        help="""
-        The file containing keywords, one words/phrases per line, and for each
-        phrase the bpe/cjkchar/pinyin are separated by a space. For example:
-
-        ▁HE LL O ▁WORLD
-        x iǎo ài t óng x ué 
-        """,
-    )
-
-    parser.add_argument(
-        "--keywords-score",
-        type=float,
-        default=1.0,
-        help="""
-        The boosting score of each token for keywords. The larger the easier to
-        survive beam search.
-        """,
-    )
-
-    parser.add_argument(
-        "--keywords-threshold",
-        type=float,
-        default=0.25,
-        help="""
-        The trigger threshold (i.e. probability) of the keyword. The larger the
-        harder to trigger.
-        """,
-    )
-
-    parser.add_argument(
-        "sound_files",
-        type=str,
-        nargs="+",
-        help="The input sound file(s) to decode. Each file must be of WAVE"
-        "format with a single channel, and each sample has 16-bit, "
-        "i.e., int16_t. "
-        "The sample rate of the file can be arbitrary and does not need to "
-        "be 16 kHz",
-    )
-
-    return parser.parse_args()
-
-
-def assert_file_exists(filename: str):
-    assert Path(filename).is_file(), (
-        f"{filename} does not exist!\n"
-        "Please refer to "
-        "https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html to download it"
-    )
-
-
 def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
     """
     Args:
@@ -159,83 +43,74 @@ def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
         return samples_float32, f.getframerate()
 
 
-def main():
-    args = get_args()
-    assert_file_exists(args.tokens)
-    assert_file_exists(args.encoder)
-    assert_file_exists(args.decoder)
-    assert_file_exists(args.joiner)
-
-    assert Path(
-        args.keywords_file
-    ).is_file(), (
-        f"keywords_file : {args.keywords_file} not exist, please provide a valid path."
+def create_keyword_spotter():
+    kws = sherpa_onnx.KeywordSpotter(
+        tokens="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt",
+        encoder="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
+        decoder="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
+        joiner="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
+        num_threads=2,
+        keywords_file="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt",
+        provider="cpu",
     )
 
-    keyword_spotter = sherpa_onnx.KeywordSpotter(
-        tokens=args.tokens,
-        encoder=args.encoder,
-        decoder=args.decoder,
-        joiner=args.joiner,
-        num_threads=args.num_threads,
-        max_active_paths=args.max_active_paths,
-        keywords_file=args.keywords_file,
-        keywords_score=args.keywords_score,
-        keywords_threshold=args.keywords_threshold,
-        num_trailing_blanks=args.num_trailing_blanks,
-        provider=args.provider,
-    )
+    return kws
 
-    print("Started!")
-    start_time = time.time()
-
-    streams = []
-    total_duration = 0
-    for wave_filename in args.sound_files:
-        assert_file_exists(wave_filename)
-        samples, sample_rate = read_wave(wave_filename)
-        duration = len(samples) / sample_rate
-        total_duration += duration
-
-        s = keyword_spotter.create_stream()
-
-        s.accept_waveform(sample_rate, samples)
-
-        tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
-        s.accept_waveform(sample_rate, tail_paddings)
-
-        s.input_finished()
-
-        streams.append(s)
-
-    results = [""] * len(streams)
-    while True:
-        ready_list = []
-        for i, s in enumerate(streams):
-            if keyword_spotter.is_ready(s):
-                ready_list.append(s)
-            r = keyword_spotter.get_result(s)
-            if r:
-                results[i] += f"{r}/"
-                print(f"{r} is detected.")
-        if len(ready_list) == 0:
-            break
-        keyword_spotter.decode_streams(ready_list)
-    end_time = time.time()
-    print("Done!")
-
-    for wave_filename, result in zip(args.sound_files, results):
-        print(f"{wave_filename}\n{result}")
-        print("-" * 10)
-
-    elapsed_seconds = end_time - start_time
-    rtf = elapsed_seconds / total_duration
-    print(f"num_threads: {args.num_threads}")
-    print(f"Wave duration: {total_duration:.3f} s")
-    print(f"Elapsed time: {elapsed_seconds:.3f} s")
-    print(
-        f"Real time factor (RTF): {elapsed_seconds:.3f}/{total_duration:.3f} = {rtf:.3f}"
-    )
+
+def main():
+    kws = create_keyword_spotter()
+
+    wave_filename = (
+        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
+    )
+
+    samples, sample_rate = read_wave(wave_filename)
+
+    tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
+
+    print("----------Use pre-defined keywords----------")
+    s = kws.create_stream()
+    s.accept_waveform(sample_rate, samples)
+    s.accept_waveform(sample_rate, tail_paddings)
+    s.input_finished()
+    while kws.is_ready(s):
+        kws.decode_stream(s)
+        r = kws.get_result(s)
+        if r != "":
+            # Remember to call reset right after detected a keyword
+            kws.reset_stream(s)
+
+            print(f"Detected {r}")
+
+    print("----------Use pre-defined keywords + add a new keyword----------")
+
+    s = kws.create_stream("y ǎn y uán @演员")
+    s.accept_waveform(sample_rate, samples)
+    s.accept_waveform(sample_rate, tail_paddings)
+    s.input_finished()
+    while kws.is_ready(s):
+        kws.decode_stream(s)
+        r = kws.get_result(s)
+        if r != "":
+            # Remember to call reset right after detected a keyword
+            kws.reset_stream(s)
+
+            print(f"Detected {r}")
+
+    print("----------Use pre-defined keywords + add 2 new keywords----------")
+
+    s = kws.create_stream("y ǎn y uán @演员/zh ī m íng @知名")
+    s.accept_waveform(sample_rate, samples)
+    s.accept_waveform(sample_rate, tail_paddings)
+    s.input_finished()
+    while kws.is_ready(s):
+        kws.decode_stream(s)
+        r = kws.get_result(s)
+        if r != "":
+            # Remember to call reset right after detected a keyword
+            kws.reset_stream(s)
+
+            print(f"Detected {r}")
 
 
 if __name__ == "__main__":
diff --git a/python-api-examples/non_streaming_server.py b/python-api-examples/non_streaming_server.py
index 2194d6f54c..da05384d0f 100755
--- a/python-api-examples/non_streaming_server.py
+++ b/python-api-examples/non_streaming_server.py
@@ -66,7 +66,21 @@
   --wenet-ctc ./sherpa-onnx-zh-wenet-wenetspeech/model.onnx \
   --tokens ./sherpa-onnx-zh-wenet-wenetspeech/tokens.txt
 
-(5) Use a Whisper model
+(5) Use a Moonshine model
+
+cd /path/to/sherpa-onnx
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+
+python3 ./python-api-examples/non_streaming_server.py \
+  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt
+
+(6) Use a Whisper model
 
 cd /path/to/sherpa-onnx
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
@@ -78,7 +92,7 @@
   --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
   --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt
 
-(5) Use a tdnn model of the yesno recipe from icefall
+(7) Use a tdnn model of the yesno recipe from icefall
 
 cd /path/to/sherpa-onnx
 
@@ -92,7 +106,7 @@
   --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
   --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt
 
-(6) Use a Non-streaming SenseVoice model
+(8) Use a Non-streaming SenseVoice model
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
 tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
@@ -102,6 +116,16 @@
   --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
   --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
 
+(9) Use a Non-streaming telespeech ctc model
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
+tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
+rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
+
+python3 ./python-api-examples/non_streaming_server.py \
+  --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
+  --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt
+
 ----
 
 To use a certificate so that you can use https, please use
@@ -236,6 +260,15 @@ def add_nemo_ctc_model_args(parser: argparse.ArgumentParser):
     )
 
 
+def add_telespeech_ctc_model_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--telespeech-ctc",
+        default="",
+        type=str,
+        help="Path to the model.onnx from TeleSpeech CTC",
+    )
+
+
 def add_wenet_ctc_model_args(parser: argparse.ArgumentParser):
     parser.add_argument(
         "--wenet-ctc",
@@ -254,6 +287,36 @@ def add_tdnn_ctc_model_args(parser: argparse.ArgumentParser):
     )
 
 
+def add_moonshine_model_args(parser: argparse.ArgumentParser):
+    parser.add_argument(
+        "--moonshine-preprocessor",
+        default="",
+        type=str,
+        help="Path to moonshine preprocessor model",
+    )
+
+    parser.add_argument(
+        "--moonshine-encoder",
+        default="",
+        type=str,
+        help="Path to moonshine encoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-uncached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine uncached decoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-cached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine cached decoder model",
+    )
+
+
 def add_whisper_model_args(parser: argparse.ArgumentParser):
     parser.add_argument(
         "--whisper-encoder",
@@ -309,8 +372,10 @@ def add_model_args(parser: argparse.ArgumentParser):
     add_sense_voice_model_args(parser)
     add_nemo_ctc_model_args(parser)
     add_wenet_ctc_model_args(parser)
+    add_telespeech_ctc_model_args(parser)
     add_tdnn_ctc_model_args(parser)
     add_whisper_model_args(parser)
+    add_moonshine_model_args(parser)
 
     parser.add_argument(
         "--tokens",
@@ -596,7 +661,11 @@ async def process_request(
         path: str,
         request_headers: websockets.Headers,
     ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
-        if "sec-websocket-key" not in request_headers:
+        if "sec-websocket-key" not in (
+            request_headers.headers  # For new request_headers
+            if hasattr(request_headers, "headers")
+            else request_headers  # For old request_headers
+        ):
             # This is a normal HTTP request
             if path == "/":
                 path = "/index.html"
@@ -873,9 +942,16 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.sense_voice) == 0, args.sense_voice
         assert len(args.nemo_ctc) == 0, args.nemo_ctc
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
         assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.encoder)
         assert_file_exists(args.decoder)
@@ -900,9 +976,16 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.sense_voice) == 0, args.sense_voice
         assert len(args.nemo_ctc) == 0, args.nemo_ctc
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
         assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.paraformer)
 
@@ -918,9 +1001,16 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
     elif args.sense_voice:
         assert len(args.nemo_ctc) == 0, args.nemo_ctc
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
         assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.sense_voice)
         recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
@@ -931,9 +1021,16 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         )
     elif args.nemo_ctc:
         assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
         assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.nemo_ctc)
 
@@ -947,9 +1044,16 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
             provider=args.provider,
         )
     elif args.wenet_ctc:
+        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
         assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.wenet_ctc)
 
@@ -962,10 +1066,38 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
             decoding_method=args.decoding_method,
             provider=args.provider,
         )
+    elif args.telespeech_ctc:
+        assert len(args.whisper_encoder) == 0, args.whisper_encoder
+        assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.tdnn_model) == 0, args.tdnn_model
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
+
+        assert_file_exists(args.telespeech_ctc)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_telespeech_ctc(
+            model=args.telespeech_ctc,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            sample_rate=args.sample_rate,
+            feature_dim=args.feat_dim,
+            decoding_method=args.decoding_method,
+            provider=args.provider,
+        )
     elif args.whisper_encoder:
         assert len(args.tdnn_model) == 0, args.tdnn_model
         assert_file_exists(args.whisper_encoder)
         assert_file_exists(args.whisper_decoder)
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
             encoder=args.whisper_encoder,
@@ -980,6 +1112,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         )
     elif args.tdnn_model:
         assert_file_exists(args.tdnn_model)
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         recognizer = sherpa_onnx.OfflineRecognizer.from_tdnn_ctc(
             model=args.tdnn_model,
@@ -990,6 +1128,21 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
             decoding_method=args.decoding_method,
             provider=args.provider,
         )
+    elif args.moonshine_preprocessor:
+        assert_file_exists(args.moonshine_preprocessor)
+        assert_file_exists(args.moonshine_encoder)
+        assert_file_exists(args.moonshine_uncached_decoder)
+        assert_file_exists(args.moonshine_cached_decoder)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
+            preprocessor=args.moonshine_preprocessor,
+            encoder=args.moonshine_encoder,
+            uncached_decoder=args.moonshine_uncached_decoder,
+            cached_decoder=args.moonshine_cached_decoder,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            decoding_method=args.decoding_method,
+        )
     else:
         raise ValueError("Please specify at least one model")
 
diff --git a/python-api-examples/offline-moonshine-decode-files.py b/python-api-examples/offline-moonshine-decode-files.py
new file mode 100644
index 0000000000..f4d153d879
--- /dev/null
+++ b/python-api-examples/offline-moonshine-decode-files.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+"""
+This file shows how to use a non-streaming Moonshine model from
+https://github.com/usefulsensors/moonshine
+to decode files.
+
+Please download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+
+For instance,
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+"""
+
+import datetime as dt
+from pathlib import Path
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def create_recognizer():
+    preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"
+    encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"
+    uncached_decoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"
+    cached_decoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"
+
+    tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"
+    test_wav = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
+
+    if not Path(preprocessor).is_file() or not Path(test_wav).is_file():
+        raise ValueError(
+            """Please download model files from
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+            """
+        )
+    return (
+        sherpa_onnx.OfflineRecognizer.from_moonshine(
+            preprocessor=preprocessor,
+            encoder=encoder,
+            uncached_decoder=uncached_decoder,
+            cached_decoder=cached_decoder,
+            tokens=tokens,
+            debug=True,
+        ),
+        test_wav,
+    )
+
+
+def main():
+    recognizer, wave_filename = create_recognizer()
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+    # sample_rate does not need to be 16000 Hz
+
+    start_t = dt.datetime.now()
+
+    stream = recognizer.create_stream()
+    stream.accept_waveform(sample_rate, audio)
+    recognizer.decode_stream(stream)
+
+    end_t = dt.datetime.now()
+    elapsed_seconds = (end_t - start_t).total_seconds()
+    duration = audio.shape[-1] / sample_rate
+    rtf = elapsed_seconds / duration
+
+    print(stream.result)
+    print(wave_filename)
+    print("Text:", stream.result.text)
+    print(f"Audio duration:\t{duration:.3f} s")
+    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
+    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python-api-examples/offline-speaker-diarization.py b/python-api-examples/offline-speaker-diarization.py
new file mode 100755
index 0000000000..01627c7f56
--- /dev/null
+++ b/python-api-examples/offline-speaker-diarization.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+# Copyright (c)  2024  Xiaomi Corporation
+
+"""
+This file shows how to use sherpa-onnx Python API for
+offline/non-streaming speaker diarization.
+
+Usage:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Run it
+
+    python3 ./python-api-examples/offline-speaker-diarization.py
+
+"""
+from pathlib import Path
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def init_speaker_diarization(num_speakers: int = -1, cluster_threshold: float = 0.5):
+    """
+    Args:
+      num_speakers:
+        If you know the actual number of speakers in the wave file, then please
+        specify it. Otherwise, leave it to -1
+      cluster_threshold:
+        If num_speakers is -1, then this threshold is used for clustering.
+        A smaller cluster_threshold leads to more clusters, i.e., more speakers.
+        A larger cluster_threshold leads to fewer clusters, i.e., fewer speakers.
+    """
+    segmentation_model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
+    embedding_extractor_model = (
+        "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
+    )
+
+    config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
+        segmentation=sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
+            pyannote=sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
+                model=segmentation_model
+            ),
+        ),
+        embedding=sherpa_onnx.SpeakerEmbeddingExtractorConfig(
+            model=embedding_extractor_model
+        ),
+        clustering=sherpa_onnx.FastClusteringConfig(
+            num_clusters=num_speakers, threshold=cluster_threshold
+        ),
+        min_duration_on=0.3,
+        min_duration_off=0.5,
+    )
+    if not config.validate():
+        raise RuntimeError(
+            "Please check your config and make sure all required files exist"
+        )
+
+    return sherpa_onnx.OfflineSpeakerDiarization(config)
+
+
+def progress_callback(num_processed_chunk: int, num_total_chunks: int) -> int:
+    progress = num_processed_chunk / num_total_chunks * 100
+    print(f"Progress: {progress:.3f}%")
+    return 0
+
+
+def main():
+    wave_filename = "./0-four-speakers-zh.wav"
+    if not Path(wave_filename).is_file():
+        raise RuntimeError(f"{wave_filename} does not exist")
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # Since we know there are 4 speakers in the above test wave file, we use
+    # num_speakers 4 here
+    sd = init_speaker_diarization(num_speakers=4)
+    if sample_rate != sd.sample_rate:
+        raise RuntimeError(
+            f"Expected samples rate: {sd.sample_rate}, given: {sample_rate}"
+        )
+
+    show_progress = True
+
+    if show_progress:
+        result = sd.process(audio, callback=progress_callback).sort_by_start_time()
+    else:
+        result = sd.process(audio).sort_by_start_time()
+
+    for r in result:
+        print(f"{r.start:.3f} -- {r.end:.3f} speaker_{r.speaker:02}")
+        #  print(r) # this one is simpler
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python-api-examples/offline-tts-play.py b/python-api-examples/offline-tts-play.py
index 8457fc45c0..8ec419ac4a 100755
--- a/python-api-examples/offline-tts-play.py
+++ b/python-api-examples/offline-tts-play.py
@@ -11,7 +11,7 @@
 
 Usage:
 
-Example (1/3)
+Example (1/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 tar xf vits-piper-en_US-amy-low.tar.bz2
@@ -23,7 +23,7 @@
  --output-filename=./generated.wav \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
 
-Example (2/3)
+Example (2/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
 tar xvf vits-zh-aishell3.tar.bz2
@@ -37,7 +37,7 @@
  --output-filename=./liubei-21.wav \
  "勿以恶小而为之，勿以善小而不为。惟贤惟德，能服于人。122334"
 
-Example (3/3)
+Example (3/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
@@ -53,6 +53,80 @@
  --output-filename=./test-2.wav \
  "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。2024年5月11号，拨打110或者18920240511。123456块钱。"
 
+Example (4/7)
+
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts-play.py \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --output-filename=./test-matcha.wav \
+ "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+
+Example (5/7)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts-play.py \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --output-filename=./test-matcha-ljspeech-en.wav \
+  --num-threads=2 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+Example (6/7)
+
+(This version of kokoro supports only English)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-en-v0_19/model.onnx \
+  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+  --num-threads=2 \
+  --sid=10 \
+  --output-filename="./kokoro-10.wav" \
+  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."
+
+Example (7/7)
+
+(This version of kokoro supports English, Chinese, etc.)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+python3 ./python-api-examples/offline-tts-play.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+  --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+  --num-threads=2 \
+  --sid=18 \
+  --output-filename="./kokoro-18-zh-en.wav" \
+  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
 
 You can find more models at
 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
@@ -84,14 +158,11 @@
     sys.exit(-1)
 
 
-def get_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
+def add_vits_args(parser):
     parser.add_argument(
         "--vits-model",
         type=str,
+        default="",
         help="Path to vits model.onnx",
     )
 
@@ -124,6 +195,105 @@ def get_args():
         help="Path to the dict directory for models using jieba",
     )
 
+
+def add_matcha_args(parser):
+    parser.add_argument(
+        "--matcha-acoustic-model",
+        type=str,
+        default="",
+        help="Path to model.onnx for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-vocoder",
+        type=str,
+        default="",
+        help="Path to vocoder for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-lexicon",
+        type=str,
+        default="",
+        help="Path to lexicon.txt for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-tokens",
+        type=str,
+        default="",
+        help="Path to tokens.txt for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-data-dir",
+        type=str,
+        default="",
+        help="""Path to the dict directory of espeak-ng. If it is specified,
+        --matcha-lexicon and --matcha-tokens are ignored""",
+    )
+
+    parser.add_argument(
+        "--matcha-dict-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory for models using jieba",
+    )
+
+
+def add_kokoro_args(parser):
+    parser.add_argument(
+        "--kokoro-model",
+        type=str,
+        default="",
+        help="Path to model.onnx for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-voices",
+        type=str,
+        default="",
+        help="Path to voices.bin for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-tokens",
+        type=str,
+        default="",
+        help="Path to tokens.txt for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-data-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory of espeak-ng.",
+    )
+
+    parser.add_argument(
+        "--kokoro-dict-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory for models using jieba. Needed only by multilingual kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-lexicon",
+        type=str,
+        default="",
+        help="Path to lexicon.txt for kokoro. Needed only by multilingual kokoro",
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    add_vits_args(parser)
+    add_matcha_args(parser)
+    add_kokoro_args(parser)
+
     parser.add_argument(
         "--tts-rule-fsts",
         type=str,
@@ -313,6 +483,22 @@ def main():
                 dict_dir=args.vits_dict_dir,
                 tokens=args.vits_tokens,
             ),
+            matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
+                acoustic_model=args.matcha_acoustic_model,
+                vocoder=args.matcha_vocoder,
+                lexicon=args.matcha_lexicon,
+                tokens=args.matcha_tokens,
+                data_dir=args.matcha_data_dir,
+                dict_dir=args.matcha_dict_dir,
+            ),
+            kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
+                model=args.kokoro_model,
+                voices=args.kokoro_voices,
+                tokens=args.kokoro_tokens,
+                data_dir=args.kokoro_data_dir,
+                dict_dir=args.kokoro_dict_dir,
+                lexicon=args.kokoro_lexicon,
+            ),
             provider=args.provider,
             debug=args.debug,
             num_threads=args.num_threads,
diff --git a/python-api-examples/offline-tts.py b/python-api-examples/offline-tts.py
index 18ea638e84..c4e63b4f7b 100755
--- a/python-api-examples/offline-tts.py
+++ b/python-api-examples/offline-tts.py
@@ -12,7 +12,7 @@
 
 Usage:
 
-Example (1/3)
+Example (1/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 tar xf vits-piper-en_US-amy-low.tar.bz2
@@ -24,7 +24,7 @@
  --output-filename=./generated.wav \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
 
-Example (2/3)
+Example (2/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
 tar xvf vits-icefall-zh-aishell3.tar.bz2
@@ -38,7 +38,7 @@
  --output-filename=./liubei-21.wav \
  "勿以恶小而为之，勿以善小而不为。惟贤惟德，能服于人。122334"
 
-Example (3/3)
+Example (3/7)
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
@@ -54,6 +54,80 @@
  --output-filename=./test-2.wav \
  "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。2024年5月11号，拨打110或者18920240511。123456块钱。"
 
+Example (4/7)
+
+curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+ --matcha-vocoder=./hifigan_v2.onnx \
+ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
+ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
+ --output-filename=./test-matcha.wav \
+ "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+
+Example (5/7)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+python3 ./python-api-examples/offline-tts.py \
+  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+  --matcha-vocoder=./hifigan_v2.onnx \
+  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+  --output-filename=./test-matcha-ljspeech-en.wav \
+  --num-threads=2 \
+ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
+
+Example (6/7)
+
+(This version of kokoro supports only English)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+tar xf kokoro-en-v0_19.tar.bz2
+rm kokoro-en-v0_19.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-en-v0_19/model.onnx \
+  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
+  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
+  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
+  --num-threads=2 \
+  --sid=10 \
+  --output-filename="./kokoro-10.wav" \
+  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."
+
+Example (7/7)
+
+(This version of kokoro supports English, Chinese, etc.)
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+tar xf kokoro-multi-lang-v1_0.tar.bz2
+rm kokoro-multi-lang-v1_0.tar.bz2
+
+python3 ./python-api-examples/offline-tts.py \
+  --debug=1 \
+  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
+  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
+  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
+  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
+  --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
+  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
+  --num-threads=2 \
+  --sid=18 \
+  --output-filename="./kokoro-18-zh-en.wav" \
+  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
 
 You can find more models at
 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
@@ -71,14 +145,11 @@
 import soundfile as sf
 
 
-def get_args():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
+def add_vits_args(parser):
     parser.add_argument(
         "--vits-model",
         type=str,
+        default="",
         help="Path to vits model.onnx",
     )
 
@@ -111,6 +182,105 @@ def get_args():
         help="Path to the dict directory for models using jieba",
     )
 
+
+def add_matcha_args(parser):
+    parser.add_argument(
+        "--matcha-acoustic-model",
+        type=str,
+        default="",
+        help="Path to model.onnx for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-vocoder",
+        type=str,
+        default="",
+        help="Path to vocoder for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-lexicon",
+        type=str,
+        default="",
+        help="Path to lexicon.txt for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-tokens",
+        type=str,
+        default="",
+        help="Path to tokens.txt for matcha",
+    )
+
+    parser.add_argument(
+        "--matcha-data-dir",
+        type=str,
+        default="",
+        help="""Path to the dict directory of espeak-ng. If it is specified,
+        --matcha-lexicon and --matcha-tokens are ignored""",
+    )
+
+    parser.add_argument(
+        "--matcha-dict-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory for models using jieba",
+    )
+
+
+def add_kokoro_args(parser):
+    parser.add_argument(
+        "--kokoro-model",
+        type=str,
+        default="",
+        help="Path to model.onnx for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-voices",
+        type=str,
+        default="",
+        help="Path to voices.bin for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-tokens",
+        type=str,
+        default="",
+        help="Path to tokens.txt for kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-data-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory of espeak-ng.",
+    )
+
+    parser.add_argument(
+        "--kokoro-dict-dir",
+        type=str,
+        default="",
+        help="Path to the dict directory for models using jieba. Needed only by multilingual kokoro",
+    )
+
+    parser.add_argument(
+        "--kokoro-lexicon",
+        type=str,
+        default="",
+        help="Path to lexicon.txt for kokoro. Needed only by multilingual kokoro",
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    add_vits_args(parser)
+    add_matcha_args(parser)
+    add_kokoro_args(parser)
+
     parser.add_argument(
         "--tts-rule-fsts",
         type=str,
@@ -121,7 +291,7 @@ def get_args():
     parser.add_argument(
         "--max-num-sentences",
         type=int,
-        default=2,
+        default=1,
         help="""Max number of sentences in a batch to avoid OOM if the input
         text is very long. Set it to -1 to process all the sentences in a
         single batch. A smaller value does not mean it is slower compared
@@ -196,6 +366,22 @@ def main():
                 dict_dir=args.vits_dict_dir,
                 tokens=args.vits_tokens,
             ),
+            matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
+                acoustic_model=args.matcha_acoustic_model,
+                vocoder=args.matcha_vocoder,
+                lexicon=args.matcha_lexicon,
+                tokens=args.matcha_tokens,
+                data_dir=args.matcha_data_dir,
+                dict_dir=args.matcha_dict_dir,
+            ),
+            kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
+                model=args.kokoro_model,
+                voices=args.kokoro_voices,
+                tokens=args.kokoro_tokens,
+                data_dir=args.kokoro_data_dir,
+                dict_dir=args.kokoro_dict_dir,
+                lexicon=args.kokoro_lexicon,
+            ),
             provider=args.provider,
             debug=args.debug,
             num_threads=args.num_threads,
diff --git a/python-api-examples/offline-whisper-decode-files.py b/python-api-examples/offline-whisper-decode-files.py
new file mode 100644
index 0000000000..aa85ccdf9a
--- /dev/null
+++ b/python-api-examples/offline-whisper-decode-files.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+"""
+This file shows how to use a non-streaming whisper model from
+https://github.com/openai/whisper
+to decode files.
+
+Please download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+
+For instance,
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+"""
+
+import datetime as dt
+from pathlib import Path
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def create_recognizer():
+    encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
+    decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
+    tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"
+    test_wav = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"
+
+    if not Path(encoder).is_file() or not Path(test_wav).is_file():
+        raise ValueError(
+            """Please download model files from
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+            """
+        )
+    return (
+        sherpa_onnx.OfflineRecognizer.from_whisper(
+            encoder=encoder,
+            decoder=decoder,
+            tokens=tokens,
+            debug=True,
+        ),
+        test_wav,
+    )
+
+
+def main():
+    recognizer, wave_filename = create_recognizer()
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+    # sample_rate does not need to be 16000 Hz
+
+    start_t = dt.datetime.now()
+
+    stream = recognizer.create_stream()
+    stream.accept_waveform(sample_rate, audio)
+    recognizer.decode_stream(stream)
+
+    end_t = dt.datetime.now()
+    elapsed_seconds = (end_t - start_t).total_seconds()
+    duration = audio.shape[-1] / sample_rate
+    rtf = elapsed_seconds / duration
+
+    print(stream.result)
+    print(wave_filename)
+    print("Text:", stream.result.text)
+    print(f"Audio duration:\t{duration:.3f} s")
+    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
+    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python-api-examples/speaker-identification-with-vad-non-streaming-asr-alsa.py b/python-api-examples/speaker-identification-with-vad-non-streaming-asr-alsa.py
new file mode 100644
index 0000000000..9be196f2b9
--- /dev/null
+++ b/python-api-examples/speaker-identification-with-vad-non-streaming-asr-alsa.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+
+"""
+This script works only on Linux. It uses ALSA for recording.
+
+This script shows how to use Python APIs for speaker identification with
+a microphone, a VAD model, and a non-streaming ASR model.
+
+Please see also ./generate-subtitles.py
+
+Usage:
+
+(1) Prepare a text file containing speaker related files.
+
+Each line in the text file contains two columns. The first column is the
+speaker name, while the second column contains the wave file of the speaker.
+
+If the text file contains multiple wave files for the same speaker, then the
+embeddings of these files are averaged.
+
+An example text file is given below:
+
+    foo /path/to/a.wav
+    bar /path/to/b.wav
+    foo /path/to/c.wav
+    foobar /path/to/d.wav
+
+Each wave file should contain only a single channel; the sample format
+should be int16_t; the sample rate can be arbitrary.
+
+(2) Download a model for computing speaker embeddings
+
+Please visit
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+to download a model. An example is given below:
+
+    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx
+
+Note that `zh` means Chinese, while `en` means English.
+
+(3) Download the VAD model
+Please visit
+https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
+to download silero_vad.onnx
+
+For instance,
+
+wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
+
+(4) Please refer to ./generate-subtitles.py
+to download a non-streaming ASR model.
+
+(5) Run this script
+
+Assume the filename of the text file is speaker.txt.
+
+python3 ./python-api-examples/speaker-identification-with-vad-non-streaming-asr.py \
+  --silero-vad-model=/path/to/silero_vad.onnx \
+  --speaker-file ./speaker.txt \
+  --model ./wespeaker_zh_cnceleb_resnet34.onnx
+"""
+import argparse
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import numpy as np
+import sherpa_onnx
+import soundfile as sf
+
+g_sample_rate = 16000
+
+
+def register_non_streaming_asr_model_args(parser):
+    parser.add_argument(
+        "--tokens",
+        type=str,
+        help="Path to tokens.txt",
+    )
+
+    parser.add_argument(
+        "--encoder",
+        default="",
+        type=str,
+        help="Path to the transducer encoder model",
+    )
+
+    parser.add_argument(
+        "--decoder",
+        default="",
+        type=str,
+        help="Path to the transducer decoder model",
+    )
+
+    parser.add_argument(
+        "--joiner",
+        default="",
+        type=str,
+        help="Path to the transducer joiner model",
+    )
+
+    parser.add_argument(
+        "--paraformer",
+        default="",
+        type=str,
+        help="Path to the model.onnx from Paraformer",
+    )
+
+    parser.add_argument(
+        "--wenet-ctc",
+        default="",
+        type=str,
+        help="Path to the CTC model.onnx from WeNet",
+    )
+
+    parser.add_argument(
+        "--whisper-encoder",
+        default="",
+        type=str,
+        help="Path to whisper encoder model",
+    )
+
+    parser.add_argument(
+        "--whisper-decoder",
+        default="",
+        type=str,
+        help="Path to whisper decoder model",
+    )
+
+    parser.add_argument(
+        "--whisper-language",
+        default="",
+        type=str,
+        help="""It specifies the spoken language in the input file.
+        Example values: en, fr, de, zh, jp.
+        Available languages for multilingual models can be found at
+        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
+        If not specified, we infer the language from the input audio file.
+        """,
+    )
+
+    parser.add_argument(
+        "--whisper-task",
+        default="transcribe",
+        choices=["transcribe", "translate"],
+        type=str,
+        help="""For multilingual models, if you specify translate, the output
+        will be in English.
+        """,
+    )
+
+    parser.add_argument(
+        "--whisper-tail-paddings",
+        default=-1,
+        type=int,
+        help="""Number of tail padding frames.
+        We have removed the 30-second constraint from whisper, so you need to
+        choose the amount of tail padding frames by yourself.
+        Use -1 to use a default value for tail padding.
+        """,
+    )
+
+    parser.add_argument(
+        "--decoding-method",
+        type=str,
+        default="greedy_search",
+        help="""Valid values are greedy_search and modified_beam_search.
+        modified_beam_search is valid only for transducer models.
+        """,
+    )
+
+    parser.add_argument(
+        "--feature-dim",
+        type=int,
+        default=80,
+        help="Feature dimension. Must match the one expected by the model",
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    register_non_streaming_asr_model_args(parser)
+
+    parser.add_argument(
+        "--speaker-file",
+        type=str,
+        required=True,
+        help="""Path to the speaker file. Read the help doc at the beginning of this
+        file for the format.""",
+    )
+
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Path to the speaker embedding model file.",
+    )
+
+    parser.add_argument(
+        "--silero-vad-model",
+        type=str,
+        required=True,
+        help="Path to silero_vad.onnx",
+    )
+
+    parser.add_argument("--threshold", type=float, default=0.6)
+
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        default=1,
+        help="Number of threads for neural network computation",
+    )
+
+    parser.add_argument(
+        "--debug",
+        type=bool,
+        default=False,
+        help="True to show debug messages",
+    )
+
+    parser.add_argument(
+        "--provider",
+        type=str,
+        default="cpu",
+        help="Valid values: cpu, cuda, coreml",
+    )
+
+    parser.add_argument(
+        "--device-name",
+        type=str,
+        required=True,
+        help="""
+The device name specifies which microphone to use in case there are several
+on your system. You can use
+
+  arecord -l
+
+to find all available microphones on your computer. For instance, if it outputs
+
+**** List of CAPTURE Hardware Devices ****
+card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
+  Subdevices: 1/1
+  Subdevice #0: subdevice #0
+
+and if you want to select card 3 and device 0 on that card, please use:
+
+  plughw:3,0
+
+as the device_name.
+        """,
+    )
+
+    return parser.parse_args()
+
+
+def assert_file_exists(filename: str):
+    assert Path(filename).is_file(), (
+        f"{filename} does not exist!\n"
+        "Please refer to "
+        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
+    )
+
+
+def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
+    if args.encoder:
+        assert len(args.paraformer) == 0, args.paraformer
+        assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.whisper_encoder) == 0, args.whisper_encoder
+        assert len(args.whisper_decoder) == 0, args.whisper_decoder
+
+        assert_file_exists(args.encoder)
+        assert_file_exists(args.decoder)
+        assert_file_exists(args.joiner)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
+            encoder=args.encoder,
+            decoder=args.decoder,
+            joiner=args.joiner,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            sample_rate=args.sample_rate,
+            feature_dim=args.feature_dim,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+        )
+    elif args.paraformer:
+        assert len(args.wenet_ctc) == 0, args.wenet_ctc
+        assert len(args.whisper_encoder) == 0, args.whisper_encoder
+        assert len(args.whisper_decoder) == 0, args.whisper_decoder
+
+        assert_file_exists(args.paraformer)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
+            paraformer=args.paraformer,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            sample_rate=g_sample_rate,
+            feature_dim=args.feature_dim,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+        )
+    elif args.wenet_ctc:
+        assert len(args.whisper_encoder) == 0, args.whisper_encoder
+        assert len(args.whisper_decoder) == 0, args.whisper_decoder
+
+        assert_file_exists(args.wenet_ctc)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
+            model=args.wenet_ctc,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            sample_rate=args.sample_rate,
+            feature_dim=args.feature_dim,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+        )
+    elif args.whisper_encoder:
+        assert_file_exists(args.whisper_encoder)
+        assert_file_exists(args.whisper_decoder)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
+            encoder=args.whisper_encoder,
+            decoder=args.whisper_decoder,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+            language=args.whisper_language,
+            task=args.whisper_task,
+            tail_paddings=args.whisper_tail_paddings,
+        )
+    else:
+        raise ValueError("Please specify at least one model")
+
+    return recognizer
+
+
+def load_speaker_embedding_model(args):
+    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
+        model=args.model,
+        num_threads=args.num_threads,
+        debug=args.debug,
+        provider=args.provider,
+    )
+    if not config.validate():
+        raise ValueError(f"Invalid config. {config}")
+    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
+    return extractor
+
+
+def load_speaker_file(args) -> Dict[str, List[str]]:
+    if not Path(args.speaker_file).is_file():
+        raise ValueError(f"--speaker-file {args.speaker_file} does not exist")
+
+    ans = defaultdict(list)
+    with open(args.speaker_file) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+
+            fields = line.split()
+            if len(fields) != 2:
+                raise ValueError(f"Invalid line: {line}. Fields: {fields}")
+
+            speaker_name, filename = fields
+            ans[speaker_name].append(filename)
+    return ans
+
+
+def load_audio(filename: str) -> Tuple[np.ndarray, int]:
+    data, sample_rate = sf.read(
+        filename,
+        always_2d=True,
+        dtype="float32",
+    )
+    data = data[:, 0]  # use only the first channel
+    samples = np.ascontiguousarray(data)
+    return samples, sample_rate
+
+
+def compute_speaker_embedding(
+    filenames: List[str],
+    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
+) -> np.ndarray:
+    assert len(filenames) > 0, "filenames is empty"
+
+    ans = None
+    for filename in filenames:
+        print(f"processing {filename}")
+        samples, sample_rate = load_audio(filename)
+        stream = extractor.create_stream()
+        stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
+        stream.input_finished()
+
+        assert extractor.is_ready(stream)
+        embedding = extractor.compute(stream)
+        embedding = np.array(embedding)
+        if ans is None:
+            ans = embedding
+        else:
+            ans += embedding
+
+    return ans / len(filenames)
+
+
+def main():
+    args = get_args()
+    print(args)
+
+    device_name = args.device_name
+    print(f"device_name: {device_name}")
+    alsa = sherpa_onnx.Alsa(device_name)
+
+    recognizer = create_recognizer(args)
+    extractor = load_speaker_embedding_model(args)
+    speaker_file = load_speaker_file(args)
+
+    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
+    for name, filename_list in speaker_file.items():
+        embedding = compute_speaker_embedding(
+            filenames=filename_list,
+            extractor=extractor,
+        )
+        status = manager.add(name, embedding)
+        if not status:
+            raise RuntimeError(f"Failed to register speaker {name}")
+
+    vad_config = sherpa_onnx.VadModelConfig()
+    vad_config.silero_vad.model = args.silero_vad_model
+    vad_config.silero_vad.min_silence_duration = 0.25
+    vad_config.silero_vad.min_speech_duration = 0.25
+    vad_config.sample_rate = g_sample_rate
+    if not vad_config.validate():
+        raise ValueError("Errors in vad config")
+
+    window_size = vad_config.silero_vad.window_size
+
+    vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)
+
+    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms
+
+    print("Started! Please speak")
+
+    idx = 0
+    buffer = []
+    while True:
+        samples = alsa.read(samples_per_read)  # a blocking read
+        samples = np.array(samples)
+        buffer = np.concatenate([buffer, samples])
+        while len(buffer) > window_size:
+            vad.accept_waveform(buffer[:window_size])
+            buffer = buffer[window_size:]
+
+        while not vad.empty():
+            if len(vad.front.samples) < 0.5 * g_sample_rate:
+                # this segment is too short, skip it
+                vad.pop()
+                continue
+            stream = extractor.create_stream()
+            stream.accept_waveform(
+                sample_rate=g_sample_rate, waveform=vad.front.samples
+            )
+            stream.input_finished()
+
+            embedding = extractor.compute(stream)
+            embedding = np.array(embedding)
+            name = manager.search(embedding, threshold=args.threshold)
+            if not name:
+                name = "unknown"
+
+            # Now for non-streaming ASR
+            asr_stream = recognizer.create_stream()
+            asr_stream.accept_waveform(
+                sample_rate=g_sample_rate, waveform=vad.front.samples
+            )
+            recognizer.decode_stream(asr_stream)
+            text = asr_stream.result.text
+
+            vad.pop()
+
+            print(f"\r{idx}-{name}: {text}")
+            idx += 1
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\nCaught Ctrl + C. Exiting")
diff --git a/python-api-examples/streaming_server.py b/python-api-examples/streaming_server.py
index c7c1b8de6a..ef6bac8e31 100755
--- a/python-api-examples/streaming_server.py
+++ b/python-api-examples/streaming_server.py
@@ -229,6 +229,28 @@ def add_hotwords_args(parser: argparse.ArgumentParser):
         --hotwords-file is given.
         """,
     )
+    parser.add_argument(
+        "--modeling-unit",
+        type=str,
+        default='cjkchar',
+        help="""
+        The modeling unit of the used model. Current supported units are:
+        - cjkchar(for Chinese)
+        - bpe(for English like languages)
+        - cjkchar+bpe(for multilingual models)
+        """,
+    )
+    parser.add_argument(
+        "--bpe-vocab",
+        type=str,
+        default='',
+        help="""
+        The bpe vocabulary generated by sentencepiece toolkit. 
+        It is only used when modeling-unit is bpe or cjkchar+bpe.
+        if you can’t find bpe.vocab in the model directory, please run:
+        python script/export_bpe_vocab.py --bpe-model exp/bpe.model
+        """,
+    )
 
 
 def add_modified_beam_search_args(parser: argparse.ArgumentParser):
@@ -409,6 +431,8 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
             rule2_min_trailing_silence=args.rule2_min_trailing_silence,
             rule3_min_utterance_length=args.rule3_min_utterance_length,
             provider=args.provider,
+            modeling_unit=args.modeling_unit,
+            bpe_vocab=args.bpe_vocab
         )
     elif args.paraformer_encoder:
         recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
@@ -584,7 +608,11 @@ async def process_request(
         path: str,
         request_headers: websockets.Headers,
     ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
-        if "sec-websocket-key" not in request_headers:
+        if "sec-websocket-key" not in (
+            request_headers.headers  # For new request_headers
+            if hasattr(request_headers, "headers")
+            else request_headers  # For old request_headers
+        ):
             # This is a normal HTTP request
             if path == "/":
                 path = "/index.html"
diff --git a/python-api-examples/vad-with-non-streaming-asr.py b/python-api-examples/vad-with-non-streaming-asr.py
index 7bb125d1a2..f5bde30c6c 100755
--- a/python-api-examples/vad-with-non-streaming-asr.py
+++ b/python-api-examples/vad-with-non-streaming-asr.py
@@ -35,7 +35,18 @@
       --sample-rate=16000 \
       --feature-dim=80
 
-(3) For Whisper models
+(3) For Moonshine models
+
+./python-api-examples/vad-with-non-streaming-asr.py  \
+  --silero-vad-model=/path/to/silero_vad.onnx \
+  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
+  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
+  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
+  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
+  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
+  --num-threads=2
+
+(4) For Whisper models
 
 ./python-api-examples/vad-with-non-streaming-asr.py  \
   --silero-vad-model=/path/to/silero_vad.onnx \
@@ -45,7 +56,7 @@
   --whisper-task=transcribe \
   --num-threads=2
 
-(4) For SenseVoice CTC models
+(5) For SenseVoice CTC models
 
 ./python-api-examples/vad-with-non-streaming-asr.py  \
   --silero-vad-model=/path/to/silero_vad.onnx \
@@ -192,6 +203,34 @@ def get_args():
         """,
     )
 
+    parser.add_argument(
+        "--moonshine-preprocessor",
+        default="",
+        type=str,
+        help="Path to moonshine preprocessor model",
+    )
+
+    parser.add_argument(
+        "--moonshine-encoder",
+        default="",
+        type=str,
+        help="Path to moonshine encoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-uncached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine uncached decoder model",
+    )
+
+    parser.add_argument(
+        "--moonshine-cached-decoder",
+        default="",
+        type=str,
+        help="Path to moonshine cached decoder model",
+    )
+
     parser.add_argument(
         "--blank-penalty",
         type=float,
@@ -251,6 +290,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.sense_voice) == 0, args.sense_voice
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.encoder)
         assert_file_exists(args.decoder)
@@ -272,6 +317,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
         assert len(args.sense_voice) == 0, args.sense_voice
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.paraformer)
 
@@ -287,6 +338,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
     elif args.sense_voice:
         assert len(args.whisper_encoder) == 0, args.whisper_encoder
         assert len(args.whisper_decoder) == 0, args.whisper_decoder
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         assert_file_exists(args.sense_voice)
         recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
@@ -299,6 +356,12 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
     elif args.whisper_encoder:
         assert_file_exists(args.whisper_encoder)
         assert_file_exists(args.whisper_decoder)
+        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
+        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
+        assert (
+            len(args.moonshine_uncached_decoder) == 0
+        ), args.moonshine_uncached_decoder
+        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder
 
         recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
             encoder=args.whisper_encoder,
@@ -311,6 +374,22 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
             task=args.whisper_task,
             tail_paddings=args.whisper_tail_paddings,
         )
+    elif args.moonshine_preprocessor:
+        assert_file_exists(args.moonshine_preprocessor)
+        assert_file_exists(args.moonshine_encoder)
+        assert_file_exists(args.moonshine_uncached_decoder)
+        assert_file_exists(args.moonshine_cached_decoder)
+
+        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
+            preprocessor=args.moonshine_preprocessor,
+            encoder=args.moonshine_encoder,
+            uncached_decoder=args.moonshine_uncached_decoder,
+            cached_decoder=args.moonshine_cached_decoder,
+            tokens=args.tokens,
+            num_threads=args.num_threads,
+            decoding_method=args.decoding_method,
+            debug=args.debug,
+        )
     else:
         raise ValueError("Please specify at least one model")
 
diff --git a/scripts/3dspeaker/run.sh b/scripts/3dspeaker/run.sh
index 6dda2c96e1..9504e98ede 100755
--- a/scripts/3dspeaker/run.sh
+++ b/scripts/3dspeaker/run.sh
@@ -4,10 +4,10 @@ set -e
 
 function install_3d_speaker() {
   echo "Install 3D-Speaker"
-  git clone https://github.com/alibaba-damo-academy/3D-Speaker.git
+  git clone https://github.com/modelscope/3D-Speaker
   pushd 3D-Speaker
   pip install -q -r ./requirements.txt
-  pip install -q modelscope onnx onnxruntime kaldi-native-fbank
+  pip install -q modelscope==1.14.0 onnx onnxruntime kaldi-native-fbank
   popd
 }
 
diff --git a/scripts/apk/build-apk-speaker-diarization.sh.in b/scripts/apk/build-apk-speaker-diarization.sh.in
new file mode 100755
index 0000000000..9a0080854c
--- /dev/null
+++ b/scripts/apk/build-apk-speaker-diarization.sh.in
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+#
+# Please set the environment variable ANDROID_NDK
+# before running this script
+
+# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
+# and some other files like the file "build/cmake/android.toolchain.cmake"
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+log "Building Speaker identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
+
+export SHERPA_ONNX_ENABLE_TTS=OFF
+
+log "====================arm64-v8a================="
+./build-android-arm64-v8a.sh
+log "====================armv7-eabi================"
+./build-android-armv7-eabi.sh
+log "====================x86-64===================="
+./build-android-x86-64.sh
+log "====================x86===================="
+./build-android-x86.sh
+
+mkdir -p apks
+
+{% for model in model_list %}
+
+pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/
+
+ls -lh
+
+segmentation_model_name={{ model.segmentation.model_name }}
+segmentation_short_name={{ model.segmentation.short_name }}
+
+embedding_model_name={{ model.embedding.model_name }}
+embedding_short_name={{ model.embedding.short_name }}
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$segmentation_model_name.tar.bz2
+tar xvf $segmentation_model_name.tar.bz2
+rm $segmentation_model_name.tar.bz2
+mv $segmentation_model_name/model.onnx segmentation.onnx
+rm -rf $segmentation_model_name
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$embedding_model_name.onnx
+mv $embedding_model_name.onnx embedding.onnx
+
+echo "pwd: $PWD"
+ls -lh
+
+popd
+
+for arch in arm64-v8a armeabi-v7a x86_64 x86; do
+  log "------------------------------------------------------------"
+  log "build speaker diarization apk for $arch"
+  log "------------------------------------------------------------"
+  src_arch=$arch
+  if [ $arch == "armeabi-v7a" ]; then
+    src_arch=armv7-eabi
+  elif [ $arch == "x86_64" ]; then
+    src_arch=x86-64
+  fi
+
+  ls -lh ./build-android-$src_arch/install/lib/*.so
+
+  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/
+
+  pushd ./android/SherpaOnnxSpeakerDiarization
+  ./gradlew build
+  popd
+
+  mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$segmentation_short_name-$embedding_short_name.apk
+  ls -lh apks
+  rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so
+done
+
+rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/*.onnx
+
+{% endfor %}
+
+ls -lh apks
diff --git a/scripts/apk/build-apk-tts-engine.sh.in b/scripts/apk/build-apk-tts-engine.sh.in
index c611c061b9..e55022ee6f 100644
--- a/scripts/apk/build-apk-tts-engine.sh.in
+++ b/scripts/apk/build-apk-tts-engine.sh.in
@@ -37,6 +37,9 @@ mkdir -p apks
 pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
 model_dir={{ tts_model.model_dir }}
 model_name={{ tts_model.model_name }}
+acoustic_model_name={{ tts_model.acoustic_model_name }}
+vocoder={{ tts_model.vocoder }}
+voices={{ tts_model.voices }}
 lang={{ tts_model.lang }}
 lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
 
@@ -44,15 +47,34 @@ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$mod
 tar xf $model_dir.tar.bz2
 rm $model_dir.tar.bz2
 
+{% if tts_model.vocoder %}
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
+{% endif %}
+
 popd
 # Now we are at the project root directory
 
 git checkout .
 pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
 sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
-sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
 sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
 
+{% if tts_model.model_name %}
+  sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
+{% endif %}
+
+{% if tts_model.model_name %}
+  sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
+{% endif %}
+
+{% if tts_model.vocoder %}
+  sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
+{% endif %}
+
+{% if tts_model.voices %}
+  sed -i.bak s/"voices = null"/"voices = \"$voices\""/ ./TtsEngine.kt
+{% endif %}
+
 {% if tts_model.rule_fsts %}
   rule_fsts={{ tts_model.rule_fsts }}
   sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
@@ -75,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
   sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
 {% endif %}
 
+{% if tts_model.lexicon %}
+  lexicon={{ tts_model.lexicon }}
+  sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt
+{% endif %}
+
 git diff
 popd
 
@@ -82,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
   lang=zh_en
 fi
 
+if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
+  lang=zh_en
+fi
+
 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
   log "------------------------------------------------------------"
   log "build tts apk for $arch"
@@ -109,6 +140,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
 done
 
 rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
+rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/*.onnx
 {% endfor %}
 
 git checkout .
diff --git a/scripts/apk/build-apk-tts.sh.in b/scripts/apk/build-apk-tts.sh.in
index 2e62ad6362..400f340489 100644
--- a/scripts/apk/build-apk-tts.sh.in
+++ b/scripts/apk/build-apk-tts.sh.in
@@ -37,19 +37,42 @@ mkdir -p apks
 pushd ./android/SherpaOnnxTts/app/src/main/assets/
 model_dir={{ tts_model.model_dir }}
 model_name={{ tts_model.model_name }}
+acoustic_model_name={{ tts_model.acoustic_model_name }}
+vocoder={{ tts_model.vocoder }}
+voices={{ tts_model.voices }}
 lang={{ tts_model.lang }}
 
 wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
 tar xf $model_dir.tar.bz2
 rm $model_dir.tar.bz2
 
+{% if tts_model.vocoder %}
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
+{% endif %}
+
 popd
 # Now we are at the project root directory
 
 git checkout .
 pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
 sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
-sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
+
+
+{% if tts_model.model_name %}
+  sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
+{% endif %}
+
+{% if tts_model.acoustic_model_name %}
+  sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
+{% endif %}
+
+{% if tts_model.vocoder %}
+  sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
+{% endif %}
+
+{% if tts_model.voices %}
+  sed -i.bak s/"voices = null"/"voices = \"$voices\""/ ./MainActivity.kt
+{% endif %}
 
 {% if tts_model.rule_fsts %}
   rule_fsts={{ tts_model.rule_fsts }}
@@ -73,6 +96,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
   sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
 {% endif %}
 
+{% if tts_model.lexicon %}
+  lexicon={{ tts_model.lexicon }}
+  sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
+{% endif %}
+
 git diff
 popd
 
@@ -80,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
   lang=zh_en
 fi
 
+if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
+  lang=zh_en
+fi
+
 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
   log "------------------------------------------------------------"
   log "build tts apk for $arch"
@@ -107,6 +139,8 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
 done
 
 rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
+rm -fv ./android/SherpaOnnxTts/app/src/main/assets/*.onnx
+
 {% endfor %}
 
 git checkout .
diff --git a/scripts/apk/generate-asr-2pass-apk-script.py b/scripts/apk/generate-asr-2pass-apk-script.py
index e35a34fb16..87cb0a0b33 100755
--- a/scripts/apk/generate-asr-2pass-apk-script.py
+++ b/scripts/apk/generate-asr-2pass-apk-script.py
@@ -127,6 +127,36 @@ def get_2nd_models():
 
             ls -lh
 
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-moonshine-tiny-en-int8",
+            idx=21,
+            lang="en",
+            short_name="moonshine_tiny_int8",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            ls -lh
+
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-moonshine-base-en-int8",
+            idx=22,
+            lang="en",
+            short_name="moonshine_base_int8",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            ls -lh
+
             popd
             """,
         ),
@@ -300,6 +330,14 @@ def get_models():
             "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
             "sherpa-onnx-whisper-tiny.en",
         ),
+        (
+            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
+            "sherpa-onnx-moonshine-tiny-en-int8",
+        ),
+        (
+            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
+            "sherpa-onnx-moonshine-base-en-int8",
+        ),
         (
             "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
             "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
diff --git a/scripts/apk/generate-asr-apk-script.py b/scripts/apk/generate-asr-apk-script.py
index 6085321760..bda643c014 100755
--- a/scripts/apk/generate-asr-apk-script.py
+++ b/scripts/apk/generate-asr-apk-script.py
@@ -243,6 +243,23 @@ def get_models():
 
             ls -lh
 
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-streaming-paraformer-bilingual-zh-en",
+            idx=5,
+            lang="zh_en",
+            short_name="paraformer",
+            cmd="""
+            pushd $model_name
+            rm -fv decoder.onnx
+            rm -fv encoder.onnx
+
+            rm -rfv test_wavs
+
+            ls -lh
+
             popd
             """,
         ),
diff --git a/scripts/apk/generate-speaker-diarization-apk-script.py b/scripts/apk/generate-speaker-diarization-apk-script.py
new file mode 100755
index 0000000000..f4bb06a9aa
--- /dev/null
+++ b/scripts/apk/generate-speaker-diarization-apk-script.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+
+import argparse
+from dataclasses import dataclass
+from typing import List
+
+import jinja2
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--total",
+        type=int,
+        default=1,
+        help="Number of runners",
+    )
+    parser.add_argument(
+        "--index",
+        type=int,
+        default=0,
+        help="Index of the current runner",
+    )
+    return parser.parse_args()
+
+
+@dataclass
+class SpeakerSegmentationModel:
+    model_name: str
+    short_name: str
+
+
+@dataclass
+class SpeakerEmbeddingModel:
+    model_name: str
+    short_name: str
+
+
+@dataclass
+class Model:
+    segmentation: SpeakerSegmentationModel
+    embedding: SpeakerEmbeddingModel
+
+
+def get_segmentation_models() -> List[SpeakerSegmentationModel]:
+    models = [
+        SpeakerSegmentationModel(
+            model_name="sherpa-onnx-pyannote-segmentation-3-0",
+            short_name="pyannote_audio",
+        ),
+        SpeakerSegmentationModel(
+            model_name="sherpa-onnx-reverb-diarization-v1",
+            short_name="revai_v1",
+        ),
+    ]
+
+    return models
+
+
+def get_embedding_models() -> List[SpeakerEmbeddingModel]:
+    models = [
+        SpeakerSegmentationModel(
+            model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k",
+            short_name="3dspeaker",
+        ),
+        SpeakerSegmentationModel(
+            model_name="nemo_en_titanet_small",
+            short_name="nemo",
+        ),
+    ]
+    return models
+
+
+def main():
+    args = get_args()
+    index = args.index
+    total = args.total
+    assert 0 <= index < total, (index, total)
+
+    segmentation_models = get_segmentation_models()
+    embedding_models = get_embedding_models()
+
+    all_model_list = []
+    for s in segmentation_models:
+        for e in embedding_models:
+            all_model_list.append(Model(segmentation=s, embedding=e))
+
+    num_models = len(all_model_list)
+
+    num_per_runner = num_models // total
+    if num_per_runner <= 0:
+        raise ValueError(f"num_models: {num_models}, num_runners: {total}")
+
+    start = index * num_per_runner
+    end = start + num_per_runner
+
+    remaining = num_models - args.total * num_per_runner
+
+    print(f"{index}/{total}: {start}-{end}/{num_models}")
+
+    d = dict()
+    d["model_list"] = all_model_list[start:end]
+    if index < remaining:
+        s = args.total * num_per_runner + index
+        d["model_list"].append(all_model_list[s])
+        print(f"{s}/{num_models}")
+
+    filename_list = ["./build-apk-speaker-diarization.sh"]
+    for filename in filename_list:
+        environment = jinja2.Environment()
+        with open(f"{filename}.in") as f:
+            s = f.read()
+        template = environment.from_string(s)
+
+        s = template.render(**d)
+        with open(filename, "w") as f:
+            print(s, file=f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/apk/generate-tts-apk-script.py b/scripts/apk/generate-tts-apk-script.py
index 291c4738a7..0650c65838 100755
--- a/scripts/apk/generate-tts-apk-script.py
+++ b/scripts/apk/generate-tts-apk-script.py
@@ -30,7 +30,10 @@ def get_args():
 @dataclass
 class TtsModel:
     model_dir: str
-    model_name: str = ""
+    model_name: str = ""  # for vits
+    acoustic_model_name: str = ""  # for matcha
+    vocoder: str = ""  # for matcha
+    voices: str = ""  # for kokoro
     lang: str = ""  # en, zh, fr, de, etc.
     rule_fsts: Optional[List[str]] = None
     rule_fars: Optional[List[str]] = None
@@ -38,6 +41,7 @@ class TtsModel:
     dict_dir: Optional[str] = None
     is_char: bool = False
     lang_iso_639_3: str = ""
+    lexicon: str = ""
 
 
 def convert_lang_to_iso_639_3(models: List[TtsModel]):
@@ -163,6 +167,7 @@ def get_piper_models() -> List[TtsModel]:
         TtsModel(model_dir="vits-piper-es_MX-claude-high"),
         TtsModel(model_dir="vits-piper-fa_IR-amir-medium"),
         TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
+        TtsModel(model_dir="vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium"),
         TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
         TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
         #  TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
@@ -183,6 +188,7 @@ def get_piper_models() -> List[TtsModel]:
         TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
         TtsModel(model_dir="vits-piper-kk_KZ-issai-high"),
         TtsModel(model_dir="vits-piper-kk_KZ-raya-x_low"),
+        TtsModel(model_dir="vits-piper-lv_LV-aivars-medium"),
         TtsModel(model_dir="vits-piper-lb_LU-marylux-medium"),
         TtsModel(model_dir="vits-piper-ne_NP-google-medium"),
         TtsModel(model_dir="vits-piper-ne_NP-google-x_low"),
@@ -376,6 +382,64 @@ def get_vits_models() -> List[TtsModel]:
     return all_models
 
 
+def get_matcha_models() -> List[TtsModel]:
+    chinese_models = [
+        TtsModel(
+            model_dir="matcha-icefall-zh-baker",
+            acoustic_model_name="model-steps-3.onnx",
+            lang="zh",
+        )
+    ]
+    rule_fsts = ["phone.fst", "date.fst", "number.fst"]
+    for m in chinese_models:
+        s = [f"{m.model_dir}/{r}" for r in rule_fsts]
+        m.rule_fsts = ",".join(s)
+        m.dict_dir = m.model_dir + "/dict"
+        m.vocoder = "hifigan_v2.onnx"
+
+    english_models = [
+        TtsModel(
+            model_dir="matcha-icefall-en_US-ljspeech",
+            acoustic_model_name="model-steps-3.onnx",
+            lang="en",
+        )
+    ]
+    for m in english_models:
+        m.data_dir = f"{m.model_dir}/espeak-ng-data"
+        m.vocoder = "hifigan_v2.onnx"
+
+    return chinese_models + english_models
+
+
+def get_kokoro_models() -> List[TtsModel]:
+    english_models = [
+        TtsModel(
+            model_dir="kokoro-en-v0_19",
+            model_name="model.onnx",
+            lang="en",
+        )
+    ]
+    for m in english_models:
+        m.data_dir = f"{m.model_dir}/espeak-ng-data"
+        m.voices = "voices.bin"
+
+    multi_lingual_models = [
+        TtsModel(
+            model_dir="kokoro-multi-lang-v1_0",
+            model_name="model.onnx",
+            lang="en",
+        )
+    ]
+    for m in multi_lingual_models:
+        m.data_dir = f"{m.model_dir}/espeak-ng-data"
+        m.dict_dir = f"{m.model_dir}/dict"
+        m.voices = "voices.bin"
+        m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt"
+        m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst"
+
+    return english_models + multi_lingual_models
+
+
 def main():
     args = get_args()
     index = args.index
@@ -387,7 +451,11 @@ def main():
     all_model_list += get_piper_models()
     all_model_list += get_mimic3_models()
     all_model_list += get_coqui_models()
+    all_model_list += get_matcha_models()
+    all_model_list += get_kokoro_models()
+
     convert_lang_to_iso_639_3(all_model_list)
+    print(all_model_list)
 
     num_models = len(all_model_list)
 
diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py
index b6361427f4..27c773a300 100755
--- a/scripts/apk/generate-vad-asr-apk-script.py
+++ b/scripts/apk/generate-vad-asr-apk-script.py
@@ -2,6 +2,7 @@
 
 import argparse
 from dataclasses import dataclass
+from pathlib import Path
 
 import jinja2
 
@@ -34,6 +35,7 @@ class Model:
 
     # e.g., zh, en, zh_en
     lang: str
+    lang2: str
 
     # e.g., whisper, paraformer, zipformer
     short_name: str = ""
@@ -51,6 +53,7 @@ def get_models():
             model_name="sherpa-onnx-whisper-tiny.en",
             idx=2,
             lang="en",
+            lang2="English",
             short_name="whisper_tiny",
             cmd="""
             pushd $model_name
@@ -71,6 +74,7 @@ def get_models():
             model_name="sherpa-onnx-paraformer-zh-2023-09-14",
             idx=0,
             lang="zh_en",
+            lang2="Chinese,English",
             short_name="paraformer",
             rule_fsts="itn_zh_number.fst",
             cmd="""
@@ -92,6 +96,7 @@ def get_models():
             model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
             idx=15,
             lang="zh_en_ko_ja_yue",
+            lang2="中英粤日韩",
             short_name="sense_voice",
             cmd="""
             pushd $model_name
@@ -109,6 +114,7 @@ def get_models():
             model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
             idx=14,
             lang="zh_en",
+            lang2="Chinese,English",
             short_name="small_paraformer",
             rule_fsts="itn_zh_number.fst",
             cmd="""
@@ -132,6 +138,7 @@ def get_models():
             model_name="icefall-asr-zipformer-wenetspeech-20230615",
             idx=4,
             lang="zh",
+            lang2="Chinese",
             short_name="zipformer",
             rule_fsts="itn_zh_number.fst",
             cmd="""
@@ -159,6 +166,7 @@ def get_models():
             model_name="sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k",
             idx=7,
             lang="be_de_en_es_fr_hr_it_pl_ru_uk",
+            lang2="be_de_en_es_fr_hr_it_pl_ru_uk",
             short_name="fast_conformer_ctc_20k",
             cmd="""
             pushd $model_name
@@ -174,6 +182,7 @@ def get_models():
             model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-24500",
             idx=8,
             lang="en",
+            lang2="English",
             short_name="fast_conformer_ctc_24500",
             cmd="""
             pushd $model_name
@@ -188,7 +197,8 @@ def get_models():
         Model(
             model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288",
             idx=9,
-            lang="en_des_es_fr",
+            lang="en_de_es_fr",
+            lang2="English,German,Spanish,French",
             short_name="fast_conformer_ctc_14288",
             cmd="""
             pushd $model_name
@@ -204,6 +214,7 @@ def get_models():
             model_name="sherpa-onnx-nemo-fast-conformer-ctc-es-1424",
             idx=10,
             lang="es",
+            lang2="Spanish",
             short_name="fast_conformer_ctc_1424",
             cmd="""
             pushd $model_name
@@ -219,6 +230,7 @@ def get_models():
             model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
             idx=11,
             lang="zh",
+            lang2="Chinese",
             short_name="telespeech",
             rule_fsts="itn_zh_number.fst",
             cmd="""
@@ -239,6 +251,7 @@ def get_models():
             model_name="sherpa-onnx-zipformer-thai-2024-06-20",
             idx=12,
             lang="th",
+            lang2="Thai",
             short_name="zipformer",
             cmd="""
             pushd $model_name
@@ -260,6 +273,7 @@ def get_models():
             model_name="sherpa-onnx-zipformer-korean-2024-06-24",
             idx=13,
             lang="ko",
+            lang2="Korean",
             short_name="zipformer",
             cmd="""
             pushd $model_name
@@ -281,6 +295,7 @@ def get_models():
             model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
             idx=16,
             lang="ja",
+            lang2="Japanese",
             short_name="zipformer_reazonspeech",
             cmd="""
             pushd $model_name
@@ -300,6 +315,7 @@ def get_models():
             model_name="sherpa-onnx-zipformer-ru-2024-09-18",
             idx=17,
             lang="ru",
+            lang2="Russian",
             short_name="zipformer",
             cmd="""
             pushd $model_name
@@ -320,6 +336,7 @@ def get_models():
             model_name="sherpa-onnx-small-zipformer-ru-2024-09-18",
             idx=18,
             lang="ru",
+            lang2="Russian",
             short_name="small_zipformer",
             cmd="""
             pushd $model_name
@@ -333,6 +350,96 @@ def get_models():
 
             ls -lh
 
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24",
+            idx=19,
+            lang="ru",
+            lang2="Russian",
+            short_name="nemo_ctc_giga_am",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            rm -fv *.sh
+            rm -fv *.py
+
+            ls -lh
+
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24",
+            idx=20,
+            lang="ru",
+            lang2="Russian",
+            short_name="nemo_transducer_giga_am",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            rm -fv *.sh
+            rm -fv *.py
+
+            ls -lh
+
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-moonshine-tiny-en-int8",
+            idx=21,
+            lang="en",
+            lang2="English",
+            short_name="moonshine_tiny_int8",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            ls -lh
+
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-moonshine-base-en-int8",
+            idx=22,
+            lang="en",
+            lang2="English",
+            short_name="moonshine_base_int8",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            ls -lh
+
+            popd
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-zipformer-zh-en-2023-11-22",
+            idx=23,
+            lang="zh_en",
+            lang2="Chinese,English",
+            short_name="zipformer",
+            cmd="""
+            pushd $model_name
+
+            rm -rfv test_wavs
+
+            rm -fv encoder-epoch-34-avg-19.onnx
+            rm -fv joiner-epoch-34-avg-19.onnx
+            rm -fv bbpe.model
+
+            ls -lh
+
             popd
             """,
         ),
@@ -370,9 +477,14 @@ def main():
 
     filename_list = [
         "./build-apk-vad-asr.sh",
+        "./build-hap-vad-asr.sh",
     ]
     for filename in filename_list:
         environment = jinja2.Environment()
+        if not Path(f"{filename}.in").is_file():
+            print(f"skip {filename}")
+            continue
+
         with open(f"{filename}.in") as f:
             s = f.read()
         template = environment.from_string(s)
diff --git a/scripts/bbpe/.gitignore b/scripts/bbpe/.gitignore
new file mode 100644
index 0000000000..fa966a067a
--- /dev/null
+++ b/scripts/bbpe/.gitignore
@@ -0,0 +1 @@
+bbpe.cc
diff --git a/scripts/bbpe/generate_bbpe_table.py b/scripts/bbpe/generate_bbpe_table.py
new file mode 100755
index 0000000000..0b52042414
--- /dev/null
+++ b/scripts/bbpe/generate_bbpe_table.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See https://github.com/facebookresearch/fairseq/blob/main/fairseq/data/encoders/byte_bpe.py#L28
+# and
+# https://github.com/k2-fsa/icefall/blob/master/icefall/byte_utils.py
+#
+# Caution: The PRINTABLE_LATIN from fairseq is different from PRINTABLE_BASE_CHARS from icefall
+
+import re
+
+BPE_UNK = chr(8263)
+PRINTABLE_BASE_CHARS = (
+    list(range(256, 287 + 1))
+    + list(range(32, 126 + 1))
+    + list(range(288, 305 + 1))
+    + list(range(308, 318 + 1))
+    + list(range(321, 328 + 1))
+    + list(range(330, 382 + 1))
+    + list(range(384, 422 + 1))
+)
+
+
+BYTE_TO_BCHAR = {b: chr(PRINTABLE_BASE_CHARS[b]) for b in range(256)}
+BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
+BCHAR_TO_BYTE[BPE_UNK] = 32  # map unk to space
+
+
+def main():
+    s = ""
+    s += "// sherpa-onnx/csrc/bbpe.cc\n"
+    s += "//\n"
+    s += "// Copyright (c)  2024 Xiaomi Corporation\n"
+    s += "\n"
+    s += "// Auto-generated! DO NOT EDIT\n"
+    s += "\n"
+    s += '#include "sherpa-onnx/csrc/bbpe.h"\n'
+    s += "\n"
+    s += "#include <cstdint>\n"
+    s += "#include <string>\n"
+    s += "#include <unordered_map>\n"
+    s += "\n"
+    s += "const std::unordered_map<std::string, uint8_t> &GetByteBpeTable() {\n"
+    s += "  static const std::unordered_map<std::string, uint8_t> table = {\n"
+
+    s += "      "
+    for i, (k, v) in enumerate(BCHAR_TO_BYTE.items()):
+        s += "{"
+        if k in ["\\", '"']:
+            s += f'"\{k}", {v}'
+        else:
+            s += f'"{k}", {v}'
+        s += "}, "
+        if i > 0 and i % 7 == 0:
+            s += "\n"
+            s += "      "
+    s += "};\n"
+    s += "\n"
+    s += "  return table\n;"
+    s += "}\n"
+
+    with open("bbpe.cc", "w", encoding="utf-8") as f:
+        f.write(s)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/check_style_cpplint.sh b/scripts/check_style_cpplint.sh
index eedc9afc1d..dcadcd9910 100755
--- a/scripts/check_style_cpplint.sh
+++ b/scripts/check_style_cpplint.sh
@@ -71,6 +71,9 @@ function is_source_code_file() {
 }
 
 function check_style() {
+  if [[ $1 == mfc-example* ]]; then
+    return
+  fi
   python3 $cpplint_src $1 || abort $1
 }
 
@@ -99,7 +102,8 @@ function do_check() {
       ;;
     2)
       echo "Check all files"
-      files=$(find $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc")
+      files=$(find $sherpa_onnx_dir/cxx-api-examples $sherpa_onnx_dir/c-api-examples $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc")
+      files2=$(find $sherpa_onnx_dir/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/ -name "*.cc")
       ;;
     *)
       echo "Check last commit"
@@ -107,7 +111,7 @@ function do_check() {
       ;;
   esac
 
-  for f in $files; do
+  for f in $files $files2; do
     need_check=$(is_source_code_file $f)
     if $need_check; then
       [[ -f $f ]] && check_style $f
diff --git a/scripts/dart/speaker-diarization-pubspec.yaml b/scripts/dart/speaker-diarization-pubspec.yaml
new file mode 100644
index 0000000000..fec147e75c
--- /dev/null
+++ b/scripts/dart/speaker-diarization-pubspec.yaml
@@ -0,0 +1,16 @@
+name: speaker_diarization
+description: >
+  This example demonstrates how to use the Dart API for speaker diarization.
+
+version: 1.0.0
+
+environment:
+  sdk: ">=3.0.0 <4.0.0"
+
+dependencies:
+  sherpa_onnx:
+    path: ../../flutter/sherpa_onnx
+  path: ^1.9.0
+
+dev_dependencies:
+  lints: ^3.0.0
diff --git a/scripts/dotnet/FastClusteringConfig.cs b/scripts/dotnet/FastClusteringConfig.cs
new file mode 100644
index 0000000000..276ef9d865
--- /dev/null
+++ b/scripts/dotnet/FastClusteringConfig.cs
@@ -0,0 +1,20 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+
+    [StructLayout(LayoutKind.Sequential)]
+    public struct FastClusteringConfig
+    {
+        public FastClusteringConfig()
+        {
+            NumClusters = -1;
+            Threshold = 0.5F;
+        }
+
+        public int NumClusters;
+        public float Threshold;
+    }
+}
diff --git a/scripts/dotnet/KeywordSpotter.cs b/scripts/dotnet/KeywordSpotter.cs
index 9c76acefca..d71d8924d6 100644
--- a/scripts/dotnet/KeywordSpotter.cs
+++ b/scripts/dotnet/KeywordSpotter.cs
@@ -26,7 +26,10 @@ public OnlineStream CreateStream()
         public OnlineStream CreateStream(string keywords)
         {
             byte[] utf8Bytes = Encoding.UTF8.GetBytes(keywords);
-            IntPtr p = SherpaOnnxCreateKeywordStreamWithKeywords(_handle.Handle, utf8Bytes);
+            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
+            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
+            IntPtr p = SherpaOnnxCreateKeywordStreamWithKeywords(_handle.Handle, utf8BytesWithNull);
             return new OnlineStream(p);
         }
 
@@ -43,6 +46,11 @@ public void Decode(OnlineStream stream)
             Decode(_handle.Handle, stream.Handle);
         }
 
+        public void Reset(OnlineStream stream)
+        {
+            Reset(_handle.Handle, stream.Handle);
+        }
+
         // The caller should ensure all passed streams are ready for decoding.
         public void Decode(IEnumerable<OnlineStream> streams)
         {
@@ -107,6 +115,9 @@ private void Cleanup()
         [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeKeywordStream")]
         private static extern void Decode(IntPtr handle, IntPtr stream);
 
+        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxResetKeywordStream")]
+        private static extern void Reset(IntPtr handle, IntPtr stream);
+
         [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeMultipleKeywordStreams")]
         private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
 
diff --git a/scripts/dotnet/OfflineModelConfig.cs b/scripts/dotnet/OfflineModelConfig.cs
index b24aeaf89a..b7433c277e 100644
--- a/scripts/dotnet/OfflineModelConfig.cs
+++ b/scripts/dotnet/OfflineModelConfig.cs
@@ -24,6 +24,7 @@ public OfflineModelConfig()
             BpeVocab = "";
             TeleSpeechCtc = "";
             SenseVoice = new OfflineSenseVoiceModelConfig();
+            Moonshine = new OfflineMoonshineModelConfig();
         }
         public OfflineTransducerModelConfig Transducer;
         public OfflineParaformerModelConfig Paraformer;
@@ -54,5 +55,6 @@ public OfflineModelConfig()
         public string TeleSpeechCtc;
 
         public OfflineSenseVoiceModelConfig SenseVoice;
+        public OfflineMoonshineModelConfig Moonshine;
     }
 }
diff --git a/scripts/dotnet/OfflineMoonshineModelConfig.cs b/scripts/dotnet/OfflineMoonshineModelConfig.cs
new file mode 100644
index 0000000000..53c2051734
--- /dev/null
+++ b/scripts/dotnet/OfflineMoonshineModelConfig.cs
@@ -0,0 +1,29 @@
+/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineMoonshineModelConfig
+    {
+        public OfflineMoonshineModelConfig()
+        {
+            Preprocessor = "";
+            Encoder = "";
+            UncachedDecoder = "";
+            CachedDecoder = "";
+        }
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Preprocessor;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Encoder;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string UncachedDecoder;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string CachedDecoder;
+    }
+}
diff --git a/scripts/dotnet/OfflinePunctuation.cs b/scripts/dotnet/OfflinePunctuation.cs
index 4b39c3d326..4730bb6a0a 100644
--- a/scripts/dotnet/OfflinePunctuation.cs
+++ b/scripts/dotnet/OfflinePunctuation.cs
@@ -17,8 +17,11 @@ public OfflinePunctuation(OfflinePunctuationConfig config)
         public String AddPunct(String text)
         {
             byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
+            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
+            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
 
-            IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, utf8Bytes);
+            IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, utf8BytesWithNull);
 
             string s = "";
             int length = 0;
diff --git a/scripts/dotnet/OfflineRecognizerResult.cs b/scripts/dotnet/OfflineRecognizerResult.cs
index 8c10b6f882..ecf682b19e 100644
--- a/scripts/dotnet/OfflineRecognizerResult.cs
+++ b/scripts/dotnet/OfflineRecognizerResult.cs
@@ -31,17 +31,70 @@ public OfflineRecognizerResult(IntPtr handle)
             byte[] stringBuffer = new byte[length];
             Marshal.Copy(impl.Text, stringBuffer, 0, length);
             _text = Encoding.UTF8.GetString(stringBuffer);
+
+            _tokens = new String[impl.Count];
+
+            unsafe
+            {
+                byte* buf = (byte*)impl.Tokens;
+                for (int i = 0; i < impl.Count; i++)
+                {
+                    length = 0;
+                    byte* start = buf;
+                    while (*buf != 0)
+                    {
+                        ++buf;
+                        length += 1;
+                    }
+                    ++buf;
+
+                    stringBuffer = new byte[length];
+                    fixed (byte* pTarget = stringBuffer)
+                    {
+                        for (int k = 0; k < length; k++)
+                        {
+                            pTarget[k] = start[k];
+                        }
+                    }
+
+                    _tokens[i] = Encoding.UTF8.GetString(stringBuffer);
+                }
+            }
+
+            unsafe
+            {
+              if (impl.Timestamps != IntPtr.Zero)
+              {
+                float *t = (float*)impl.Timestamps;
+                _timestamps = new float[impl.Count];
+                fixed (float* f = _timestamps)
+                {
+                  for (int k = 0; k < impl.Count; k++)
+                  {
+                    f[k] = t[k];
+                  }
+                }
+              }
+            }
+
         }
 
         [StructLayout(LayoutKind.Sequential)]
         struct Impl
         {
             public IntPtr Text;
+            public IntPtr Timestamps;
+            public int Count;
+            public IntPtr Tokens;
         }
 
         private String _text;
         public String Text => _text;
-    }
 
+        private String[] _tokens;
+        public String[] Tokens => _tokens;
 
+        private float[] _timestamps;
+        public float[] Timestamps => _timestamps;
+    }
 }
diff --git a/scripts/dotnet/OfflineSpeakerDiarization.cs b/scripts/dotnet/OfflineSpeakerDiarization.cs
new file mode 100644
index 0000000000..cfe28e9410
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeakerDiarization.cs
@@ -0,0 +1,130 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace SherpaOnnx
+{
+    // IntPtr is actually a `const float*` from C++
+    public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg);
+
+    public class OfflineSpeakerDiarization : IDisposable
+    {
+        public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config)
+        {
+            IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config);
+            _handle = new HandleRef(this, h);
+        }
+
+        public void SetConfig(OfflineSpeakerDiarizationConfig config)
+        {
+            SherpaOnnxOfflineSpeakerDiarizationSetConfig(_handle.Handle, ref config);
+        }
+
+        public OfflineSpeakerDiarizationSegment[] Process(float[] samples)
+        {
+            IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length);
+            return ProcessImpl(result);
+        }
+
+        public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg)
+        {
+            IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg);
+            return ProcessImpl(result);
+        }
+
+        private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result)
+        {
+            if (result == IntPtr.Zero)
+            {
+              return new OfflineSpeakerDiarizationSegment[] {};
+            }
+
+            int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
+            IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);
+
+            OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments];
+            unsafe
+            {
+              int size = sizeof(float) * 2 + sizeof(int);
+              for (int i = 0; i != numSegments; ++i)
+              {
+                IntPtr t = new IntPtr((byte*)p + i * size);
+                ans[i] = new OfflineSpeakerDiarizationSegment(t);
+
+                // The following IntPtr.Add() does not support net20
+                // ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
+              }
+            }
+
+
+            SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
+            SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
+
+            return ans;
+
+        }
+
+        public void Dispose()
+        {
+            Cleanup();
+            // Prevent the object from being placed on the
+            // finalization queue
+            System.GC.SuppressFinalize(this);
+        }
+
+        ~OfflineSpeakerDiarization()
+        {
+            Cleanup();
+        }
+
+        private void Cleanup()
+        {
+            SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle);
+
+            // Don't permit the handle to be used again.
+            _handle = new HandleRef(this, IntPtr.Zero);
+        }
+
+        private HandleRef _handle;
+
+        public int SampleRate
+        {
+            get
+            {
+                return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle);
+            }
+        }
+
+        [DllImport(Dll.Filename)]
+        private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config);
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n);
+
+        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
+        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg);
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle);
+
+        [DllImport(Dll.Filename)]
+        private static extern void SherpaOnnxOfflineSpeakerDiarizationSetConfig(IntPtr handle, ref OfflineSpeakerDiarizationConfig config);
+    }
+}
+
diff --git a/scripts/dotnet/OfflineSpeakerDiarizationConfig.cs b/scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
new file mode 100644
index 0000000000..94f57039fd
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
@@ -0,0 +1,31 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeakerDiarizationConfig
+    {
+        public OfflineSpeakerDiarizationConfig()
+        {
+            Segmentation = new OfflineSpeakerSegmentationModelConfig();
+            Embedding = new SpeakerEmbeddingExtractorConfig();
+            Clustering = new FastClusteringConfig();
+
+            MinDurationOn = 0.3F;
+            MinDurationOff = 0.5F;
+        }
+
+        public OfflineSpeakerSegmentationModelConfig Segmentation;
+        public SpeakerEmbeddingExtractorConfig Embedding;
+        public FastClusteringConfig Clustering;
+
+        public float MinDurationOn;
+        public float MinDurationOff;
+    }
+}
+
+
+
diff --git a/scripts/dotnet/OfflineSpeakerDiarizationSegment.cs b/scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
new file mode 100644
index 0000000000..8985c977e6
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
@@ -0,0 +1,33 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace SherpaOnnx
+{
+
+    public class OfflineSpeakerDiarizationSegment
+    {
+        public OfflineSpeakerDiarizationSegment(IntPtr handle)
+        {
+          Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
+
+          Start = impl.Start;
+          End = impl.End;
+          Speaker = impl.Speaker;
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        struct Impl
+        {
+            public float Start;
+            public float End;
+            public int Speaker;
+        }
+
+        public float Start;
+        public float End;
+        public int Speaker;
+    }
+}
+
diff --git a/scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs b/scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
new file mode 100644
index 0000000000..1bd1f38426
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
@@ -0,0 +1,32 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeakerSegmentationModelConfig
+    {
+        public OfflineSpeakerSegmentationModelConfig()
+        {
+            Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig();
+            NumThreads = 1;
+            Debug = 0;
+            Provider = "cpu";
+        }
+
+        public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote;
+
+        /// Number of threads used to run the neural network model
+        public int NumThreads;
+
+        /// true to print debug information of the model
+        public int Debug;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Provider;
+    }
+}
+
+
diff --git a/scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs b/scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs
new file mode 100644
index 0000000000..3197621257
--- /dev/null
+++ b/scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs
@@ -0,0 +1,20 @@
+/// Copyright (c)  2024  Xiaomi Corporation
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineSpeakerSegmentationPyannoteModelConfig
+    {
+        public OfflineSpeakerSegmentationPyannoteModelConfig()
+        {
+            Model = "";
+        }
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Model;
+    }
+}
+
diff --git a/scripts/dotnet/OfflineTts.cs b/scripts/dotnet/OfflineTts.cs
index 6e36d816f9..334ba2287b 100644
--- a/scripts/dotnet/OfflineTts.cs
+++ b/scripts/dotnet/OfflineTts.cs
@@ -7,6 +7,7 @@ namespace SherpaOnnx
 {
     // IntPtr is actually a `const float*` from C++
     public delegate int OfflineTtsCallback(IntPtr samples, int n);
+    public delegate int OfflineTtsCallbackProgress(IntPtr samples, int n, float progress);
 
     public class OfflineTts : IDisposable
     {
@@ -19,14 +20,30 @@ public OfflineTts(OfflineTtsConfig config)
         public OfflineTtsGeneratedAudio Generate(String text, float speed, int speakerId)
         {
             byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
-            IntPtr p = SherpaOnnxOfflineTtsGenerate(_handle.Handle, utf8Bytes, speakerId, speed);
+            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
+            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
+            IntPtr p = SherpaOnnxOfflineTtsGenerate(_handle.Handle, utf8BytesWithNull, speakerId, speed);
             return new OfflineTtsGeneratedAudio(p);
         }
 
         public OfflineTtsGeneratedAudio GenerateWithCallback(String text, float speed, int speakerId, OfflineTtsCallback callback)
         {
             byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
-            IntPtr p = SherpaOnnxOfflineTtsGenerateWithCallback(_handle.Handle, utf8Bytes, speakerId, speed, callback);
+            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
+            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
+            IntPtr p = SherpaOnnxOfflineTtsGenerateWithCallback(_handle.Handle, utf8BytesWithNull, speakerId, speed, callback);
+            return new OfflineTtsGeneratedAudio(p);
+        }
+
+        public OfflineTtsGeneratedAudio GenerateWithCallbackProgress(String text, float speed, int speakerId, OfflineTtsCallbackProgress callback)
+        {
+            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
+            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
+            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
+            IntPtr p = SherpaOnnxOfflineTtsGenerateWithProgressCallback(_handle.Handle, utf8BytesWithNull, speakerId, speed, callback);
             return new OfflineTtsGeneratedAudio(p);
         }
 
@@ -86,5 +103,8 @@ public int NumSpeakers
 
         [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
         private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, int sid, float speed, OfflineTtsCallback callback);
+
+        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
+        private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithProgressCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, int sid, float speed, OfflineTtsCallbackProgress callback);
     }
 }
diff --git a/scripts/dotnet/OfflineTtsGeneratedAudio.cs b/scripts/dotnet/OfflineTtsGeneratedAudio.cs
index 2b521649c1..2828207904 100644
--- a/scripts/dotnet/OfflineTtsGeneratedAudio.cs
+++ b/scripts/dotnet/OfflineTtsGeneratedAudio.cs
@@ -16,7 +16,10 @@ public bool SaveToWaveFile(String filename)
         {
             Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
             byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
-            int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8Filename);
+            byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
+            utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
+            int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
             return status == 1;
         }
 
diff --git a/scripts/dotnet/OfflineTtsKokoroModelConfig.cs b/scripts/dotnet/OfflineTtsKokoroModelConfig.cs
new file mode 100644
index 0000000000..2ac402426c
--- /dev/null
+++ b/scripts/dotnet/OfflineTtsKokoroModelConfig.cs
@@ -0,0 +1,42 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineTtsKokoroModelConfig
+    {
+        public OfflineTtsKokoroModelConfig()
+        {
+            Model = "";
+            Voices = "";
+            Tokens = "";
+            DataDir = "";
+
+            LengthScale = 1.0F;
+
+            DictDir = "";
+            Lexicon = "";
+        }
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Model;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Voices;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Tokens;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string DataDir;
+
+        public float LengthScale;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string DictDir;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Lexicon;
+    }
+}
diff --git a/scripts/dotnet/OfflineTtsMatchaModelConfig.cs b/scripts/dotnet/OfflineTtsMatchaModelConfig.cs
new file mode 100644
index 0000000000..8743e12234
--- /dev/null
+++ b/scripts/dotnet/OfflineTtsMatchaModelConfig.cs
@@ -0,0 +1,44 @@
+/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+using System.Runtime.InteropServices;
+
+namespace SherpaOnnx
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct OfflineTtsMatchaModelConfig
+    {
+        public OfflineTtsMatchaModelConfig()
+        {
+            AcousticModel = "";
+            Vocoder = "";
+            Lexicon = "";
+            Tokens = "";
+            DataDir = "";
+
+            NoiseScale = 0.667F;
+            LengthScale = 1.0F;
+
+            DictDir = "";
+        }
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string AcousticModel;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Vocoder;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Lexicon;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string Tokens;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string DataDir;
+
+        public float NoiseScale;
+        public float LengthScale;
+
+        [MarshalAs(UnmanagedType.LPStr)]
+        public string DictDir;
+    }
+}
diff --git a/scripts/dotnet/OfflineTtsModelConfig.cs b/scripts/dotnet/OfflineTtsModelConfig.cs
index 40aa63912f..9b1ec55065 100644
--- a/scripts/dotnet/OfflineTtsModelConfig.cs
+++ b/scripts/dotnet/OfflineTtsModelConfig.cs
@@ -11,6 +11,8 @@ public struct OfflineTtsModelConfig
         public OfflineTtsModelConfig()
         {
             Vits = new OfflineTtsVitsModelConfig();
+            Matcha = new OfflineTtsMatchaModelConfig();
+            Kokoro = new OfflineTtsKokoroModelConfig();
             NumThreads = 1;
             Debug = 0;
             Provider = "cpu";
@@ -21,5 +23,8 @@ public OfflineTtsModelConfig()
         public int Debug;
         [MarshalAs(UnmanagedType.LPStr)]
         public string Provider;
+
+        public OfflineTtsMatchaModelConfig Matcha;
+        public OfflineTtsKokoroModelConfig Kokoro;
     }
-}
\ No newline at end of file
+}
diff --git a/scripts/dotnet/SpeakerEmbeddingManager.cs b/scripts/dotnet/SpeakerEmbeddingManager.cs
index 85b4812a71..d9c4215c32 100644
--- a/scripts/dotnet/SpeakerEmbeddingManager.cs
+++ b/scripts/dotnet/SpeakerEmbeddingManager.cs
@@ -18,7 +18,10 @@ public SpeakerEmbeddingManager(int dim)
         public bool Add(string name, float[] v)
         {
             byte[] utf8Name = Encoding.UTF8.GetBytes(name);
-            return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, utf8Name, v) == 1;
+            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
+            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
+            return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, utf8NameWithNull, v) == 1;
         }
 
         public bool Add(string name, ICollection<float[]> v_list)
@@ -33,13 +36,19 @@ public bool Add(string name, ICollection<float[]> v_list)
             }
 
             byte[] utf8Name = Encoding.UTF8.GetBytes(name);
-            return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, utf8Name, v, n) == 1;
+            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
+            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
+            return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, utf8NameWithNull, v, n) == 1;
         }
 
         public bool Remove(string name)
         {
             byte[] utf8Name = Encoding.UTF8.GetBytes(name);
-            return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, utf8Name) == 1;
+            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
+            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
+            return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, utf8NameWithNull) == 1;
         }
 
         public string Search(float[] v, float threshold)
@@ -77,13 +86,19 @@ public string Search(float[] v, float threshold)
         public bool Verify(string name, float[] v, float threshold)
         {
             byte[] utf8Name = Encoding.UTF8.GetBytes(name);
-            return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, utf8Name, v, threshold) == 1;
+            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
+            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
+            return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, utf8NameWithNull, v, threshold) == 1;
         }
 
         public bool Contains(string name)
         {
             byte[] utf8Name = Encoding.UTF8.GetBytes(name);
-            return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, utf8Name) == 1;
+            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
+            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
+            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
+            return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, utf8NameWithNull) == 1;
         }
 
         public string[] GetAllSpeakers()
diff --git a/scripts/dotnet/examples/Common.csproj b/scripts/dotnet/examples/Common.csproj
index 868c1470fe..6925e03818 100644
--- a/scripts/dotnet/examples/Common.csproj
+++ b/scripts/dotnet/examples/Common.csproj
@@ -1,7 +1,7 @@
 ﻿<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>.net6</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
   </PropertyGroup>
 
diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in
index 0cbe7efabc..d81a883e5a 100644
--- a/scripts/dotnet/sherpa-onnx.csproj.in
+++ b/scripts/dotnet/sherpa-onnx.csproj.in
@@ -4,7 +4,7 @@
     <PackageReadmeFile>README.md</PackageReadmeFile>
     <OutputType>Library</OutputType>
     <LangVersion>10.0</LangVersion>
-    <TargetFrameworks>net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
+    <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
     <RuntimeIdentifiers>linux-x64;linux-arm64;osx-x64;osx-arm64;win-x64;win-x86;win-arm64</RuntimeIdentifiers>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <AssemblyName>sherpa-onnx</AssemblyName>
diff --git a/scripts/dotnet/sherpa-onnx.csproj.runtime.in b/scripts/dotnet/sherpa-onnx.csproj.runtime.in
index 2a387ccead..f21c3da37a 100644
--- a/scripts/dotnet/sherpa-onnx.csproj.runtime.in
+++ b/scripts/dotnet/sherpa-onnx.csproj.runtime.in
@@ -3,7 +3,7 @@
     <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
     <PackageReadmeFile>README.md</PackageReadmeFile>
     <OutputType>Library</OutputType>
-    <TargetFrameworks>net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
+    <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
     <RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
     <AssemblyName>sherpa-onnx</AssemblyName>
     <Version>{{ version }}</Version>
diff --git a/scripts/go/_internal/add-punctuation/go.mod b/scripts/go/_internal/add-punctuation/go.mod
new file mode 100644
index 0000000000..f25ca0a86a
--- /dev/null
+++ b/scripts/go/_internal/add-punctuation/go.mod
@@ -0,0 +1,5 @@
+module add-punctuation
+
+go 1.12
+
+replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
diff --git a/scripts/go/_internal/add-punctuation/main.go b/scripts/go/_internal/add-punctuation/main.go
new file mode 120000
index 0000000000..84df910cdb
--- /dev/null
+++ b/scripts/go/_internal/add-punctuation/main.go
@@ -0,0 +1 @@
+../../../../go-api-examples/add-punctuation/main.go
\ No newline at end of file
diff --git a/scripts/go/_internal/add-punctuation/run.sh b/scripts/go/_internal/add-punctuation/run.sh
new file mode 120000
index 0000000000..2b1ee21b4c
--- /dev/null
+++ b/scripts/go/_internal/add-punctuation/run.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/add-punctuation/run.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/keyword-spotting-from-file/.gitignore b/scripts/go/_internal/keyword-spotting-from-file/.gitignore
new file mode 100644
index 0000000000..2c433c5c7f
--- /dev/null
+++ b/scripts/go/_internal/keyword-spotting-from-file/.gitignore
@@ -0,0 +1 @@
+keyword-spotting-from-file
diff --git a/scripts/go/_internal/keyword-spotting-from-file/go.mod b/scripts/go/_internal/keyword-spotting-from-file/go.mod
new file mode 100644
index 0000000000..9cdbc6321c
--- /dev/null
+++ b/scripts/go/_internal/keyword-spotting-from-file/go.mod
@@ -0,0 +1,5 @@
+module keyword-spotting-from-file
+
+go 1.12
+
+replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
diff --git a/scripts/go/_internal/keyword-spotting-from-file/main.go b/scripts/go/_internal/keyword-spotting-from-file/main.go
new file mode 120000
index 0000000000..f17d55363e
--- /dev/null
+++ b/scripts/go/_internal/keyword-spotting-from-file/main.go
@@ -0,0 +1 @@
+../../../../go-api-examples/keyword-spotting-from-file/main.go
\ No newline at end of file
diff --git a/scripts/go/_internal/keyword-spotting-from-file/run.sh b/scripts/go/_internal/keyword-spotting-from-file/run.sh
new file mode 120000
index 0000000000..a9bb15f889
--- /dev/null
+++ b/scripts/go/_internal/keyword-spotting-from-file/run.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/keyword-spotting-from-file/run.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh b/scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
new file mode 120000
index 0000000000..95064f1a80
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-decode-files/run-moonshine.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-speaker-diarization/go.mod b/scripts/go/_internal/non-streaming-speaker-diarization/go.mod
new file mode 100644
index 0000000000..38ae36d4d7
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-speaker-diarization/go.mod
@@ -0,0 +1,5 @@
+module non-streaming-speaker-diarization
+
+go 1.12
+
+replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
diff --git a/scripts/go/_internal/non-streaming-speaker-diarization/main.go b/scripts/go/_internal/non-streaming-speaker-diarization/main.go
new file mode 120000
index 0000000000..2e4da65e09
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-speaker-diarization/main.go
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-speaker-diarization/main.go
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-speaker-diarization/run.sh b/scripts/go/_internal/non-streaming-speaker-diarization/run.sh
new file mode 120000
index 0000000000..0746440f72
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-speaker-diarization/run.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-speaker-diarization/run.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh b/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh
new file mode 120000
index 0000000000..43687f274e
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-tts/run-kokoro-en.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh b/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh
new file mode 120000
index 0000000000..9710b21ac3
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-tts/run-matcha-en.sh b/scripts/go/_internal/non-streaming-tts/run-matcha-en.sh
new file mode 120000
index 0000000000..013e5bacc4
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-tts/run-matcha-en.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-tts/run-matcha-en.sh
\ No newline at end of file
diff --git a/scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh b/scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh
new file mode 120000
index 0000000000..73ef170eec
--- /dev/null
+++ b/scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh
@@ -0,0 +1 @@
+../../../../go-api-examples/non-streaming-tts/run-matcha-zh.sh
\ No newline at end of file
diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go
index 85ab8e0b13..d374f519a9 100644
--- a/scripts/go/sherpa_onnx.go
+++ b/scripts/go/sherpa_onnx.go
@@ -382,6 +382,13 @@ type OfflineWhisperModelConfig struct {
 	TailPaddings int
 }
 
+type OfflineMoonshineModelConfig struct {
+	Preprocessor    string
+	Encoder         string
+	UncachedDecoder string
+	CachedDecoder   string
+}
+
 type OfflineTdnnModelConfig struct {
 	Model string
 }
@@ -405,6 +412,7 @@ type OfflineModelConfig struct {
 	Whisper    OfflineWhisperModelConfig
 	Tdnn       OfflineTdnnModelConfig
 	SenseVoice OfflineSenseVoiceModelConfig
+	Moonshine  OfflineMoonshineModelConfig
 	Tokens     string // Path to tokens.txt
 
 	// Number of threads to use for neural network computation
@@ -515,6 +523,18 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
 
 	c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)
 
+	c.model_config.moonshine.preprocessor = C.CString(config.ModelConfig.Moonshine.Preprocessor)
+	defer C.free(unsafe.Pointer(c.model_config.moonshine.preprocessor))
+
+	c.model_config.moonshine.encoder = C.CString(config.ModelConfig.Moonshine.Encoder)
+	defer C.free(unsafe.Pointer(c.model_config.moonshine.encoder))
+
+	c.model_config.moonshine.uncached_decoder = C.CString(config.ModelConfig.Moonshine.UncachedDecoder)
+	defer C.free(unsafe.Pointer(c.model_config.moonshine.uncached_decoder))
+
+	c.model_config.moonshine.cached_decoder = C.CString(config.ModelConfig.Moonshine.CachedDecoder)
+	defer C.free(unsafe.Pointer(c.model_config.moonshine.cached_decoder))
+
 	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
 	defer C.free(unsafe.Pointer(c.model_config.tokens))
 
@@ -651,8 +671,31 @@ type OfflineTtsVitsModelConfig struct {
 	DictDir     string  // Path to dict directory for jieba (used only in Chinese tts)
 }
 
+type OfflineTtsMatchaModelConfig struct {
+	AcousticModel string  // Path to the acoustic model for MatchaTTS
+	Vocoder       string  // Path to the vocoder model for MatchaTTS
+	Lexicon       string  // Path to lexicon.txt
+	Tokens        string  // Path to tokens.txt
+	DataDir       string  // Path to espeak-ng-data directory
+	NoiseScale    float32 // noise scale for vits models. Please use 0.667 in general
+	LengthScale   float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
+	DictDir       string  // Path to dict directory for jieba (used only in Chinese tts)
+}
+
+type OfflineTtsKokoroModelConfig struct {
+	Model       string  // Path to the model for kokoro
+	Voices      string  // Path to the voices.bin for kokoro
+	Tokens      string  // Path to tokens.txt
+	DataDir     string  // Path to espeak-ng-data directory
+	DictDir     string  // Path to dict directory
+	Lexicon     string  // Path to lexicon files
+	LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
+}
+
 type OfflineTtsModelConfig struct {
-	Vits OfflineTtsVitsModelConfig
+	Vits   OfflineTtsVitsModelConfig
+	Matcha OfflineTtsMatchaModelConfig
+	Kokoro OfflineTtsKokoroModelConfig
 
 	// Number of threads to use for neural network computation
 	NumThreads int
@@ -702,6 +745,7 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
 
 	c.max_num_sentences = C.int(config.MaxNumSentences)
 
+	// vits
 	c.model.vits.model = C.CString(config.Model.Vits.Model)
 	defer C.free(unsafe.Pointer(c.model.vits.model))
 
@@ -721,6 +765,49 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
 	c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir)
 	defer C.free(unsafe.Pointer(c.model.vits.dict_dir))
 
+	// matcha
+	c.model.matcha.acoustic_model = C.CString(config.Model.Matcha.AcousticModel)
+	defer C.free(unsafe.Pointer(c.model.matcha.acoustic_model))
+
+	c.model.matcha.vocoder = C.CString(config.Model.Matcha.Vocoder)
+	defer C.free(unsafe.Pointer(c.model.matcha.vocoder))
+
+	c.model.matcha.lexicon = C.CString(config.Model.Matcha.Lexicon)
+	defer C.free(unsafe.Pointer(c.model.matcha.lexicon))
+
+	c.model.matcha.tokens = C.CString(config.Model.Matcha.Tokens)
+	defer C.free(unsafe.Pointer(c.model.matcha.tokens))
+
+	c.model.matcha.data_dir = C.CString(config.Model.Matcha.DataDir)
+	defer C.free(unsafe.Pointer(c.model.matcha.data_dir))
+
+	c.model.matcha.noise_scale = C.float(config.Model.Matcha.NoiseScale)
+	c.model.matcha.length_scale = C.float(config.Model.Matcha.LengthScale)
+
+	c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir)
+	defer C.free(unsafe.Pointer(c.model.matcha.dict_dir))
+
+	// kokoro
+	c.model.kokoro.model = C.CString(config.Model.Kokoro.Model)
+	defer C.free(unsafe.Pointer(c.model.kokoro.model))
+
+	c.model.kokoro.voices = C.CString(config.Model.Kokoro.Voices)
+	defer C.free(unsafe.Pointer(c.model.kokoro.voices))
+
+	c.model.kokoro.tokens = C.CString(config.Model.Kokoro.Tokens)
+	defer C.free(unsafe.Pointer(c.model.kokoro.tokens))
+
+	c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir)
+	defer C.free(unsafe.Pointer(c.model.kokoro.data_dir))
+
+	c.model.kokoro.dict_dir = C.CString(config.Model.Kokoro.DictDir)
+	defer C.free(unsafe.Pointer(c.model.kokoro.dict_dir))
+
+	c.model.kokoro.lexicon = C.CString(config.Model.Kokoro.Lexicon)
+	defer C.free(unsafe.Pointer(c.model.kokoro.lexicon))
+
+	c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)
+
 	c.model.num_threads = C.int(config.Model.NumThreads)
 	c.model.debug = C.int(config.Model.Debug)
 
@@ -1175,7 +1262,14 @@ func ReadWave(filename string) *Wave {
 	w := C.SherpaOnnxReadWave(s)
 	defer C.SherpaOnnxFreeWave(w)
 
+	if w == nil {
+		return nil
+	}
+
 	n := int(w.num_samples)
+	if n == 0 {
+		return nil
+	}
 
 	ans := &Wave{}
 	ans.SampleRate = int(w.sample_rate)
@@ -1189,3 +1283,325 @@ func ReadWave(filename string) *Wave {
 
 	return ans
 }
+
+// ============================================================
+// For offline speaker diarization
+// ============================================================
+type OfflineSpeakerSegmentationPyannoteModelConfig struct {
+	Model string
+}
+
+type OfflineSpeakerSegmentationModelConfig struct {
+	Pyannote   OfflineSpeakerSegmentationPyannoteModelConfig
+	NumThreads int
+	Debug      int
+	Provider   string
+}
+
+type FastClusteringConfig struct {
+	NumClusters int
+	Threshold   float32
+}
+
+type OfflineSpeakerDiarizationConfig struct {
+	Segmentation   OfflineSpeakerSegmentationModelConfig
+	Embedding      SpeakerEmbeddingExtractorConfig
+	Clustering     FastClusteringConfig
+	MinDurationOn  float32
+	MinDurationOff float32
+}
+
+type OfflineSpeakerDiarization struct {
+	impl *C.struct_SherpaOnnxOfflineSpeakerDiarization
+}
+
+func DeleteOfflineSpeakerDiarization(sd *OfflineSpeakerDiarization) {
+	C.SherpaOnnxDestroyOfflineSpeakerDiarization(sd.impl)
+	sd.impl = nil
+}
+
+func NewOfflineSpeakerDiarization(config *OfflineSpeakerDiarizationConfig) *OfflineSpeakerDiarization {
+	c := C.struct_SherpaOnnxOfflineSpeakerDiarizationConfig{}
+	c.segmentation.pyannote.model = C.CString(config.Segmentation.Pyannote.Model)
+	defer C.free(unsafe.Pointer(c.segmentation.pyannote.model))
+
+	c.segmentation.num_threads = C.int(config.Segmentation.NumThreads)
+
+	c.segmentation.debug = C.int(config.Segmentation.Debug)
+
+	c.segmentation.provider = C.CString(config.Segmentation.Provider)
+	defer C.free(unsafe.Pointer(c.segmentation.provider))
+
+	c.embedding.model = C.CString(config.Embedding.Model)
+	defer C.free(unsafe.Pointer(c.embedding.model))
+
+	c.embedding.num_threads = C.int(config.Embedding.NumThreads)
+
+	c.embedding.debug = C.int(config.Embedding.Debug)
+
+	c.embedding.provider = C.CString(config.Embedding.Provider)
+	defer C.free(unsafe.Pointer(c.embedding.provider))
+
+	c.clustering.num_clusters = C.int(config.Clustering.NumClusters)
+	c.clustering.threshold = C.float(config.Clustering.Threshold)
+	c.min_duration_on = C.float(config.MinDurationOn)
+	c.min_duration_off = C.float(config.MinDurationOff)
+
+	p := C.SherpaOnnxCreateOfflineSpeakerDiarization(&c)
+
+	if p == nil {
+		return nil
+	}
+
+	sd := &OfflineSpeakerDiarization{}
+	sd.impl = p
+
+	return sd
+}
+
+func (sd *OfflineSpeakerDiarization) SampleRate() int {
+	return int(C.SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd.impl))
+}
+
+// only config.Clustering is used. All other fields are ignored
+func (sd *OfflineSpeakerDiarization) SetConfig(config *OfflineSpeakerDiarizationConfig) {
+	c := C.struct_SherpaOnnxOfflineSpeakerDiarizationConfig{}
+
+	c.clustering.num_clusters = C.int(config.Clustering.NumClusters)
+	c.clustering.threshold = C.float(config.Clustering.Threshold)
+
+	C.SherpaOnnxOfflineSpeakerDiarizationSetConfig(sd.impl, &c)
+}
+
+type OfflineSpeakerDiarizationSegment struct {
+	Start   float32
+	End     float32
+	Speaker int
+}
+
+func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeakerDiarizationSegment {
+	r := C.SherpaOnnxOfflineSpeakerDiarizationProcess(sd.impl, (*C.float)(&samples[0]), C.int(len(samples)))
+	defer C.SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r)
+
+	n := int(C.SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r))
+
+	if n == 0 {
+		return nil
+	}
+
+	s := C.SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r)
+	defer C.SherpaOnnxOfflineSpeakerDiarizationDestroySegment(s)
+
+	ans := make([]OfflineSpeakerDiarizationSegment, n)
+
+	p := (*[1 << 28]C.struct_SherpaOnnxOfflineSpeakerDiarizationSegment)(unsafe.Pointer(s))[:n:n]
+
+	for i := 0; i < n; i++ {
+		ans[i].Start = float32(p[i].start)
+		ans[i].End = float32(p[i].end)
+		ans[i].Speaker = int(p[i].speaker)
+	}
+
+	return ans
+}
+
+// ============================================================
+// For punctuation
+// ============================================================
+type OfflinePunctuationModelConfig struct {
+	CtTransformer string
+	NumThreads    C.int
+	Debug         C.int // true to print debug information of the model
+	Provider      string
+}
+
+type OfflinePunctuationConfig struct {
+	Model OfflinePunctuationModelConfig
+}
+
+type OfflinePunctuation struct {
+	impl *C.struct_SherpaOnnxOfflinePunctuation
+}
+
+func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation {
+	cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{}
+	cfg.model.ct_transformer = C.CString(config.Model.CtTransformer)
+	defer C.free(unsafe.Pointer(cfg.model.ct_transformer))
+
+	cfg.model.num_threads = config.Model.NumThreads
+	cfg.model.debug = config.Model.Debug
+	cfg.model.provider = C.CString(config.Model.Provider)
+	defer C.free(unsafe.Pointer(cfg.model.provider))
+
+	punc := &OfflinePunctuation{}
+	punc.impl = C.SherpaOnnxCreateOfflinePunctuation(&cfg)
+
+	return punc
+}
+
+func DeleteOfflinePunc(punc *OfflinePunctuation) {
+	C.SherpaOnnxDestroyOfflinePunctuation(punc.impl)
+	punc.impl = nil
+}
+
+func (punc *OfflinePunctuation) AddPunct(text string) string {
+	p := C.SherpaOfflinePunctuationAddPunct(punc.impl, C.CString(text))
+	defer C.free(unsafe.Pointer(p))
+
+	text_with_punct := C.GoString(p)
+
+	return text_with_punct
+}
+
+// Configuration for the online/streaming recognizer.
+type KeywordSpotterConfig struct {
+	FeatConfig        FeatureConfig
+	ModelConfig       OnlineModelConfig
+	MaxActivePaths    int
+	KeywordsFile      string
+	KeywordsScore     float32
+	KeywordsThreshold float32
+	KeywordsBuf       string
+	KeywordsBufSize   int
+}
+
+type KeywordSpotterResult struct {
+	Keyword string
+}
+
+type KeywordSpotter struct {
+	impl *C.struct_SherpaOnnxKeywordSpotter
+}
+
+// Free the internal pointer inside the recognizer to avoid memory leak.
+func DeleteKeywordSpotter(spotter *KeywordSpotter) {
+	C.SherpaOnnxDestroyKeywordSpotter(spotter.impl)
+	spotter.impl = nil
+}
+
+// The user is responsible to invoke [DeleteKeywordSpotter]() to free
+// the returned spotter to avoid memory leak
+func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter {
+	c := C.struct_SherpaOnnxKeywordSpotterConfig{}
+	c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
+	c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)
+
+	c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder)
+	defer C.free(unsafe.Pointer(c.model_config.transducer.encoder))
+
+	c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder)
+	defer C.free(unsafe.Pointer(c.model_config.transducer.decoder))
+
+	c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner)
+	defer C.free(unsafe.Pointer(c.model_config.transducer.joiner))
+
+	c.model_config.paraformer.encoder = C.CString(config.ModelConfig.Paraformer.Encoder)
+	defer C.free(unsafe.Pointer(c.model_config.paraformer.encoder))
+
+	c.model_config.paraformer.decoder = C.CString(config.ModelConfig.Paraformer.Decoder)
+	defer C.free(unsafe.Pointer(c.model_config.paraformer.decoder))
+
+	c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model)
+	defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model))
+
+	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
+	defer C.free(unsafe.Pointer(c.model_config.tokens))
+
+	c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
+
+	c.model_config.provider = C.CString(config.ModelConfig.Provider)
+	defer C.free(unsafe.Pointer(c.model_config.provider))
+
+	c.model_config.debug = C.int(config.ModelConfig.Debug)
+
+	c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
+	defer C.free(unsafe.Pointer(c.model_config.model_type))
+
+	c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)
+	defer C.free(unsafe.Pointer(c.model_config.modeling_unit))
+
+	c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)
+	defer C.free(unsafe.Pointer(c.model_config.bpe_vocab))
+
+	c.model_config.tokens_buf = C.CString(config.ModelConfig.TokensBuf)
+	defer C.free(unsafe.Pointer(c.model_config.tokens_buf))
+
+	c.model_config.tokens_buf_size = C.int(config.ModelConfig.TokensBufSize)
+
+	c.max_active_paths = C.int(config.MaxActivePaths)
+
+	c.keywords_file = C.CString(config.KeywordsFile)
+	defer C.free(unsafe.Pointer(c.keywords_file))
+
+	c.keywords_score = C.float(config.KeywordsScore)
+
+	c.keywords_threshold = C.float(config.KeywordsThreshold)
+
+	c.keywords_buf = C.CString(config.KeywordsBuf)
+	defer C.free(unsafe.Pointer(c.keywords_buf))
+
+	c.keywords_buf_size = C.int(config.KeywordsBufSize)
+
+	spotter := &KeywordSpotter{}
+	spotter.impl = C.SherpaOnnxCreateKeywordSpotter(&c)
+
+	return spotter
+}
+
+// The user is responsible to invoke [DeleteOnlineStream]() to free
+// the returned stream to avoid memory leak
+func NewKeywordStream(spotter *KeywordSpotter) *OnlineStream {
+	stream := &OnlineStream{}
+	stream.impl = C.SherpaOnnxCreateKeywordStream(spotter.impl)
+	return stream
+}
+
+// The user is responsible to invoke [DeleteOnlineStream]() to free
+// the returned stream to avoid memory leak
+func NewKeywordStreamWithKeywords(spotter *KeywordSpotter, keywords string) *OnlineStream {
+	stream := &OnlineStream{}
+
+	s := C.CString(keywords)
+	defer C.free(unsafe.Pointer(s))
+
+	stream.impl = C.SherpaOnnxCreateKeywordStreamWithKeywords(spotter.impl, s)
+	return stream
+}
+
+// Check whether the stream has enough feature frames for decoding.
+// Return true if this stream is ready for decoding. Return false otherwise.
+//
+// You will usually use it like below:
+//
+//	for spotter.IsReady(s) {
+//	   spotter.Decode(s)
+//	}
+func (spotter *KeywordSpotter) IsReady(s *OnlineStream) bool {
+	return C.SherpaOnnxIsKeywordStreamReady(spotter.impl, s.impl) == 1
+}
+
+// Decode the stream. Before calling this function, you have to ensure
+// that spotter.IsReady(s) returns true. Otherwise, you will be SAD.
+//
+// You usually use it like below:
+//
+//	for spotter.IsReady(s) {
+//	  spotter.Decode(s)
+//	}
+func (spotter *KeywordSpotter) Decode(s *OnlineStream) {
+	C.SherpaOnnxDecodeKeywordStream(spotter.impl, s.impl)
+}
+
+// You MUST call it right after detecting a keyword
+func (spotter *KeywordSpotter) Reset(s *OnlineStream) {
+	C.SherpaOnnxResetKeywordStream(spotter.impl, s.impl)
+}
+
+// Get the current result of stream since the last invoke of Reset()
+func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult {
+	p := C.SherpaOnnxGetKeywordResult(spotter.impl, s.impl)
+	defer C.SherpaOnnxDestroyKeywordResult(p)
+	result := &KeywordSpotterResult{}
+	result.Keyword = C.GoString(p.keyword)
+	return result
+}
diff --git a/scripts/hap/.gitignore b/scripts/hap/.gitignore
new file mode 100644
index 0000000000..3153c7c068
--- /dev/null
+++ b/scripts/hap/.gitignore
@@ -0,0 +1 @@
+!build-*.in
diff --git a/scripts/hap/build-hap-vad-asr.sh.in b/scripts/hap/build-hap-vad-asr.sh.in
new file mode 100644
index 0000000000..7020d927d6
--- /dev/null
+++ b/scripts/hap/build-hap-vad-asr.sh.in
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+#
+# Auto generated! Please DO NOT EDIT!
+
+# Please set the environment variable COMMANDLINE_TOOLS_DIR
+# before running this script
+
+# Inside the $COMMANDLINE_TOOL_DIR directory, you can find the following:
+#
+# command-line-tools fangjun$ ls
+# LICENSE.txt NOTICE.txt  bin         codelinter  hstack      hvigor      ohpm        sdk         tool
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+log "Building streaming VAD + ASR Hap for sherpa-onnx v${SHERPA_ONNX_VERSION}"
+
+export SHERPA_ONNX_ENABLE_TTS=OFF
+
+if [ ! -f $COMMANDLINE_TOOLS_DIR/bin/hvigorw ]; then
+  echo "Please first download Command Line Tools for HarmonyOS"
+  echo "See https://developer.huawei.com/consumer/cn/download/"
+  echo "or"
+  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
+  exit 1
+fi
+
+jar=$COMMANDLINE_TOOLS_DIR/sdk/default/openharmony/toolchains/lib/hap-sign-tool.jar
+
+export PATH=$COMMANDLINE_TOOLS_DIR/bin:$PATH
+
+mkdir -p haps
+
+{% for model in model_list %}
+pushd ./harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile
+model_name={{ model.model_name }}
+type={{ model.idx }}
+lang={{ model.lang }}
+lang2={{ model.lang2 }}
+short_name={{ model.short_name }}
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
+tar xvf ${model_name}.tar.bz2
+
+{{ model.cmd }}
+
+rm -rf  *.tar.bz2
+ls -lh $model_name
+
+if [ ! -f ./silero_vad.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+fi
+
+popd
+# Now we are at the project root directory
+
+git checkout .
+pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/
+sed -i.bak s/"const type = 2/const type = $type/" ./NonStreamingAsrWithVadWorker.ets
+
+{% if model.rule_fsts %}
+  rule_fsts={{ model.rule_fsts }}
+  sed -i.bak s%"ruleFsts = ''"%"ruleFsts = \"$rule_fsts\""% ./NonStreamingAsrWithVadWorker.ets
+{% endif %}
+
+git diff
+popd
+
+pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages
+sed -i.bak s/English/$lang2/ ./Index.ets
+popd
+
+pushd harmony-os/SherpaOnnxVadAsr
+
+git diff
+
+cd entry
+ohpm install
+cd ..
+
+hvigorw clean --no-daemon
+hvigorw assembleHap --mode module -p product=default -p buildMode=release --no-daemon
+
+ls -lh ./entry/build/default/outputs/default/entry-default-unsigned.hap
+
+in_file=$PWD/entry/build/default/outputs/default/entry-default-unsigned.hap
+out_file=$PWD/entry/build/default/outputs/default/entry-default-signed.hap
+
+java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -mode "localSign" \
+  -appCertFile "/tmp/sherpa_onnx.cer" -profileFile "/tmp/sherpa_onnx_profileRelease.p7b" \
+  -inFile $in_file -keystoreFile "/tmp/sherpa_onnx_ohos_key.p12" \
+  -outFile $out_file -keyPwd "$HAP_KEY_PWD" -keystorePwd "$HAP_KEY_STORE_PWD" -signCode "1"
+
+ls -l $in_file $out_file
+ls -lh $in_file $out_file
+rm -rf ./entry/src/main/resources/rawfile/$model_name
+popd
+
+# Use unsigned hap
+mv $in_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
+# mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
+
+ls -lh haps
+
+{% endfor %}
+
+git checkout .
+
+ls -lh haps/
diff --git a/scripts/hap/generate-vad-asr-hap-script.py b/scripts/hap/generate-vad-asr-hap-script.py
new file mode 120000
index 0000000000..6173397723
--- /dev/null
+++ b/scripts/hap/generate-vad-asr-hap-script.py
@@ -0,0 +1 @@
+../apk/generate-vad-asr-apk-script.py
\ No newline at end of file
diff --git a/scripts/kokoro/.gitignore b/scripts/kokoro/.gitignore
new file mode 100644
index 0000000000..3802b51714
--- /dev/null
+++ b/scripts/kokoro/.gitignore
@@ -0,0 +1,3 @@
+voices.json
+voices.bin
+README-new.md
diff --git a/scripts/kokoro/README.md b/scripts/kokoro/README.md
new file mode 100644
index 0000000000..5a0e09c29a
--- /dev/null
+++ b/scripts/kokoro/README.md
@@ -0,0 +1,10 @@
+# Introduction
+
+This folder contains scripts for adding meta data to models
+from https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files
+
+See also
+https://huggingface.co/hexgrad/Kokoro-82M/tree/main
+and
+https://huggingface.co/spaces/hexgrad/Kokoro-TTS
+
diff --git a/scripts/kokoro/add_meta_data.py b/scripts/kokoro/add_meta_data.py
new file mode 100755
index 0000000000..5dfa74d898
--- /dev/null
+++ b/scripts/kokoro/add_meta_data.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import onnx
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", type=str, required=True, help="input and output onnx model"
+    )
+
+    parser.add_argument("--voices", type=str, required=True, help="Path to voices.json")
+    return parser.parse_args()
+
+
+def load_voices(filename):
+    with open(filename) as f:
+        voices = json.load(f)
+    for key in voices:
+        voices[key] = np.array(voices[key], dtype=np.float32)
+    return voices
+
+
+def get_vocab():
+    _pad = "$"
+    _punctuation = ';:,.!?¡¿—…"«»“” '
+    _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
+    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
+    dicts = {}
+    for i in range(len((symbols))):
+        dicts[symbols[i]] = i
+    return dicts
+
+
+def generate_tokens():
+    token2id = get_vocab()
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for s, i in token2id.items():
+            f.write(f"{s} {i}\n")
+
+
+def main():
+    args = get_args()
+    print(args.model, args.voices)
+
+    model = onnx.load(args.model)
+    voices = load_voices(args.voices)
+
+    if Path("./tokens.txt").is_file():
+        print("./tokens.txt exist, skip generating it")
+    else:
+        generate_tokens()
+
+    keys = list(voices.keys())
+    print(",".join(keys))
+
+    if Path("./voices.bin").is_file():
+        print("./voices.bin exists, skip generating it")
+    else:
+        with open("voices.bin", "wb") as f:
+            for k in keys:
+                f.write(voices[k].tobytes())
+
+    speaker2id_str = ""
+    id2speaker_str = ""
+    sep = ""
+    for i, s in enumerate(keys):
+        speaker2id_str += f"{sep}{s}->{i}"
+        id2speaker_str += f"{sep}{i}->{s}"
+        sep = ","
+
+    meta_data = {
+        "model_type": "kokoro",
+        "language": "English",
+        "has_espeak": 1,
+        "sample_rate": 24000,
+        "version": 1,
+        "voice": "en-us",
+        "style_dim": ",".join(map(str, voices[keys[0]].shape)),
+        "n_speakers": len(keys),
+        "speaker2id": speaker2id_str,
+        "id2speaker": id2speaker_str,
+        "speaker_names": ",".join(keys),
+        "model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files",
+        "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS",
+        "see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M",
+        "maintainer": "k2-fsa",
+    }
+
+    print(model.metadata_props)
+
+    while len(model.metadata_props):
+        model.metadata_props.pop()
+
+    for key, value in meta_data.items():
+        meta = model.metadata_props.add()
+        meta.key = key
+        meta.value = str(value)
+    print("--------------------")
+
+    print(model.metadata_props)
+
+    onnx.save(model, args.model)
+
+    print(f"Please see {args.model}, ./voices.bin, and ./tokens.txt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/run.sh b/scripts/kokoro/run.sh
new file mode 100755
index 0000000000..e1fae471f8
--- /dev/null
+++ b/scripts/kokoro/run.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+set -ex
+
+cat > README-new.md <<EOF
+# Introduction
+
+Files in this folder are from
+https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files
+
+Please see also
+https://huggingface.co/hexgrad/Kokoro-82M
+and
+https://huggingface.co/hexgrad/Kokoro-82M/discussions/14
+EOF
+
+files=(
+# kokoro-v0_19_hf.onnx
+kokoro-v0_19.onnx
+# kokoro-quant.onnx
+# kokoro-quant-convinteger.onnx
+voices.json
+)
+
+for f in ${files[@]}; do
+  if [ ! -f ./$f ]; then
+    curl -SL -O https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/$f
+  fi
+done
+
+models=(
+kokoro-v0_19
+# kokoro-quant
+# kokoro-quant-convinteger
+# kokoro-v0_19_hf
+)
+
+for m in ${models[@]}; do
+  ./add_meta_data.py --model $m.onnx --voices ./voices.json
+done
+
+ls -l
+echo "----------"
+ls -lh
+
+for m in ${models[@]}; do
+  ./test.py --model $m.onnx --voices-bin ./voices.bin --tokens ./tokens.txt
+done
+ls -lh
diff --git a/scripts/kokoro/test.py b/scripts/kokoro/test.py
new file mode 100755
index 0000000000..0e58d962b9
--- /dev/null
+++ b/scripts/kokoro/test.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+"""
+female (7)
+'af', 'af_bella', 'af_nicole','af_sarah', 'af_sky',
+'bf_emma', 'bf_isabella',
+
+male (4)
+'am_adam',  'am_michael', 'bm_george', 'bm_lewis'
+"""
+
+import argparse
+import time
+from pathlib import Path
+from typing import Dict, List
+
+import numpy as np
+
+try:
+    from piper_phonemize import phonemize_espeak
+except Exception as ex:
+    raise RuntimeError(
+        f"{ex}\nPlease run\n"
+        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
+    )
+
+import onnxruntime as ort
+import soundfile as sf
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Path to the model",
+    )
+
+    parser.add_argument(
+        "--voices-bin",
+        type=str,
+        required=True,
+        help="Path to the voices.bin",
+    )
+
+    parser.add_argument(
+        "--tokens",
+        type=str,
+        required=True,
+        help="Path to tokens.txt",
+    )
+    return parser.parse_args()
+
+
+def show(filename):
+    session_opts = ort.SessionOptions()
+    session_opts.log_severity_level = 3
+    sess = ort.InferenceSession(filename, session_opts)
+    for i in sess.get_inputs():
+        print(i)
+
+    print("-----")
+
+    for i in sess.get_outputs():
+        print(i)
+
+
+#  NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'tokens1'])
+#  NodeArg(name='style', type='tensor(float)', shape=[1, 256])
+#  NodeArg(name='speed', type='tensor(float)', shape=[1])
+#  -----
+#  NodeArg(name='audio', type='tensor(float)', shape=['audio0'])
+
+
+def load_tokens(filename: str) -> Dict[str, int]:
+    ans = dict()
+    with open(filename, encoding="utf-8") as f:
+        for line in f:
+            fields = line.strip().split()
+            if len(fields) == 2:
+                token, idx = fields
+                ans[token] = int(idx)
+            else:
+                assert len(fields) == 1, (len(fields), line)
+                ans[" "] = int(fields[0])
+    return ans
+
+
+def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
+    embedding = (
+        np.fromfile(voices_bin, dtype="uint8")
+        .view(np.float32)
+        .reshape(len(speaker_names), *dim)
+    )
+    print("embedding.shape", embedding.shape)
+    ans = dict()
+    for i in range(len(speaker_names)):
+        ans[speaker_names[i]] = embedding[i]
+
+    return ans
+
+
+class OnnxModel:
+    def __init__(self, model_filename: str, voices_bin: str, tokens: str):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.session_opts = session_opts
+        self.model = ort.InferenceSession(
+            model_filename,
+            sess_options=self.session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+        self.token2id = load_tokens(tokens)
+
+        meta = self.model.get_modelmeta().custom_metadata_map
+        print(meta)
+        dim = list(map(int, meta["style_dim"].split(",")))
+        speaker_names = meta["speaker_names"].split(",")
+
+        self.voices = load_voices(
+            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
+        )
+
+        self.sample_rate = int(meta["sample_rate"])
+
+        print(list(self.voices.keys()))
+        # ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam',
+        # 'am_michael', 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
+        # af -> (511, 1, 256)
+        self.max_len = self.voices[next(iter(self.voices))].shape[0] - 1
+
+    def __call__(self, text: str, voice):
+        tokens = phonemize_espeak(text, "en-us")
+        # tokens is List[List[str]]
+        # Each sentence is a List[str]
+        # len(tokens) == number of sentences
+
+        tokens = sum(tokens, [])  # flatten
+        tokens = "".join(tokens)
+
+        tokens = tokens.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace(
+            "kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ"
+        )
+
+        tokens = list(tokens)
+
+        token_ids = [self.token2id[i] for i in tokens]
+        token_ids = token_ids[: self.max_len]
+
+        style = self.voices[voice][len(token_ids)]
+
+        token_ids = [0, *token_ids, 0]
+        token_ids = np.array([token_ids], dtype=np.int64)
+
+        speed = np.array([1.0], dtype=np.float32)
+
+        audio = self.model.run(
+            [
+                self.model.get_outputs()[0].name,
+            ],
+            {
+                self.model.get_inputs()[0].name: token_ids,
+                self.model.get_inputs()[1].name: style,
+                self.model.get_inputs()[2].name: speed,
+            },
+        )[0]
+        return audio
+
+
+def test(model, voice, text) -> np.ndarray:
+    pass
+
+
+def main():
+    args = get_args()
+    print(vars(args))
+    show(args.model)
+
+    #  tokens = phonemize_espeak("how are you doing?", "en-us")
+    # [['h', 'ˌ', 'a', 'ʊ', ' ', 'ɑ', 'ː', 'ɹ', ' ', 'j', 'u', 'ː', ' ', 'd', 'ˈ', 'u', 'ː', 'ɪ', 'ŋ', '?']]
+    m = OnnxModel(
+        model_filename=args.model, voices_bin=args.voices_bin, tokens=args.tokens
+    )
+
+    text = (
+        "Today as always, men fall into two groups: slaves and free men."
+        + " Whoever does not have two-thirds of his day for himself, "
+        + "is a slave, whatever he may be: a statesman, a businessman, "
+        + "an official, or a scholar."
+    )
+
+    for i, voice in enumerate(m.voices.keys(), 1):
+        print(f"Testing {i}/{len(m.voices)} - {voice}/{args.model}")
+
+        start = time.time()
+        audio = m(text, voice=voice)
+        end = time.time()
+
+        elapsed_seconds = end - start
+        audio_duration = len(audio) / m.sample_rate
+        real_time_factor = elapsed_seconds / audio_duration
+
+        filename = f"{Path(args.model).stem}-{voice}.wav"
+        sf.write(
+            filename,
+            audio,
+            samplerate=m.sample_rate,
+            subtype="PCM_16",
+        )
+        print(f" Saved to {filename}")
+        print(f" Elapsed seconds: {elapsed_seconds:.3f}")
+        print(f" Audio duration in seconds: {audio_duration:.3f}")
+        print(
+            f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/v1.0/.gitignore b/scripts/kokoro/v1.0/.gitignore
new file mode 100644
index 0000000000..ddbf883b7d
--- /dev/null
+++ b/scripts/kokoro/v1.0/.gitignore
@@ -0,0 +1,5 @@
+config.json
+*.json
+*.txt
+.add-meta-data.done
+voices
diff --git a/scripts/kokoro/v1.0/README.md b/scripts/kokoro/v1.0/README.md
new file mode 100644
index 0000000000..e7e3465cf5
--- /dev/null
+++ b/scripts/kokoro/v1.0/README.md
@@ -0,0 +1,3 @@
+# Introduction
+
+This directory is for kokoro v1.0
diff --git a/scripts/kokoro/v1.0/__init__.py b/scripts/kokoro/v1.0/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/scripts/kokoro/v1.0/add_meta_data.py b/scripts/kokoro/v1.0/add_meta_data.py
new file mode 100755
index 0000000000..eab4c4ff13
--- /dev/null
+++ b/scripts/kokoro/v1.0/add_meta_data.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import onnx
+import torch
+
+from generate_voices_bin import speaker2id
+
+
+def main():
+    model = onnx.load("./kokoro.onnx")
+    style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu")
+
+    id2speaker_str = ""
+    speaker2id_str = ""
+    sep = ""
+    for s, i in speaker2id.items():
+        speaker2id_str += f"{sep}{s}->{i}"
+        id2speaker_str += f"{sep}{i}->{s}"
+        sep = ","
+
+    meta_data = {
+        "model_type": "kokoro",
+        "language": "multi-lang, e.g., English, Chinese",
+        "has_espeak": 1,
+        "sample_rate": 24000,
+        "version": 2,
+        "voice": "en-us",
+        "style_dim": ",".join(map(str, style.shape)),
+        "n_speakers": len(speaker2id),
+        "id2speaker": id2speaker_str,
+        "speaker2id": speaker2id_str,
+        "speaker_names": ",".join(map(str, speaker2id.keys())),
+        "model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files",
+        "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS",
+        "see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M",
+        "maintainer": "k2-fsa",
+        "comment": "This is Kokoro v1.0, a multilingual TTS model, supporting English, Chinese, French, Japanese etc.",
+    }
+
+    print(model.metadata_props)
+
+    while len(model.metadata_props):
+        model.metadata_props.pop()
+
+    for key, value in meta_data.items():
+        meta = model.metadata_props.add()
+        meta.key = key
+        meta.value = str(value)
+    print("--------------------")
+
+    print(model.metadata_props)
+
+    onnx.save(model, "./kokoro.onnx")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/v1.0/generate_lexicon.py b/scripts/kokoro/v1.0/generate_lexicon.py
new file mode 100755
index 0000000000..a7ad46f54b
--- /dev/null
+++ b/scripts/kokoro/v1.0/generate_lexicon.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+import json
+from pypinyin import phrases_dict, pinyin_dict
+from misaki import zh
+from typing import List, Tuple
+
+
+def generate_english_lexicon(kind: str):
+    assert kind in ("us", "gb"), kind
+    # If you want to add new words, please add them to
+    # the user_defined dict.
+    user_defined = {
+        "Kokoro": "kˈOkəɹO",
+        "Misaki": "misˈɑki",
+    }
+
+    user_defined_lower = dict()
+    for k, v in user_defined.items():
+        user_defined_lower[k.lower()] = v
+
+    with open(f"./{kind}_gold.json", encoding="utf-8") as f:
+        gold = json.load(f)
+
+    with open(f"./{kind}_silver.json", encoding="utf-8") as f:
+        silver = json.load(f)
+
+    # words in us_gold has a higher priority than those in s_silver, so
+    # we put us_gold after us_silver below
+    english = {**silver, **gold}
+
+    lexicon = dict()
+    for k, v in english.items():
+        k_lower = k.lower()
+
+        if k_lower in user_defined_lower:
+            print(f"{k} already exist in the user defined dict. Skip adding")
+            continue
+
+        if isinstance(v, str):
+            lexicon[k_lower] = v
+        else:
+            assert isinstance(v, dict), (k, v)
+            assert "DEFAULT" in v, (k, v)
+            lexicon[k_lower] = v["DEFAULT"]
+
+    return list(user_defined_lower.items()) + list(lexicon.items())
+
+
+def generate_chinese_lexicon():
+    word_dict = pinyin_dict.pinyin_dict
+    phrases = phrases_dict.phrases_dict
+
+    g2p = zh.ZHG2P()
+    lexicon = []
+
+    for key in word_dict:
+        if not (0x4E00 <= key <= 0x9FFF):
+            continue
+        w = chr(key)
+        tokens: str = g2p(w)
+        lexicon.append((w, tokens))
+
+    for key in phrases:
+        tokens: str = g2p(key)
+        lexicon.append((key, tokens))
+    return lexicon
+
+
+def save(filename: str, lexicon: List[Tuple[str, str]]):
+    with open(filename, "w", encoding="utf-8") as f:
+        for word, phones in lexicon:
+            tokens = " ".join(list(phones))
+            f.write(f"{word} {tokens}\n")
+
+
+def main():
+    us = generate_english_lexicon("us")
+    gb = generate_english_lexicon("gb")
+    zh = generate_chinese_lexicon()
+
+    save("lexicon-us-en.txt", us)
+    save("lexicon-gb-en.txt", gb)
+    save("lexicon-zh.txt", zh)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/v1.0/generate_tokens.py b/scripts/kokoro/v1.0/generate_tokens.py
new file mode 100755
index 0000000000..5c93ef5d61
--- /dev/null
+++ b/scripts/kokoro/v1.0/generate_tokens.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import json
+
+
+def main():
+    with open("config.json") as f:
+        config = json.load(f)
+    vocab = config["vocab"]
+
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for k, i in vocab.items():
+            f.write(f"{k} {i}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/v1.0/generate_voices_bin.py b/scripts/kokoro/v1.0/generate_voices_bin.py
new file mode 100755
index 0000000000..c89ce24310
--- /dev/null
+++ b/scripts/kokoro/v1.0/generate_voices_bin.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+import torch
+from pathlib import Path
+
+
+id2speaker = {
+    0: "af_alloy",
+    1: "af_aoede",
+    2: "af_bella",
+    3: "af_heart",
+    4: "af_jessica",
+    5: "af_kore",
+    6: "af_nicole",
+    7: "af_nova",
+    8: "af_river",
+    9: "af_sarah",
+    10: "af_sky",
+    11: "am_adam",
+    12: "am_echo",
+    13: "am_eric",
+    14: "am_fenrir",
+    15: "am_liam",
+    16: "am_michael",
+    17: "am_onyx",
+    18: "am_puck",
+    19: "am_santa",
+    20: "bf_alice",
+    21: "bf_emma",
+    22: "bf_isabella",
+    23: "bf_lily",
+    24: "bm_daniel",
+    25: "bm_fable",
+    26: "bm_george",
+    27: "bm_lewis",
+    28: "ef_dora",
+    29: "em_alex",
+    30: "ff_siwis",
+    31: "hf_alpha",
+    32: "hf_beta",
+    33: "hm_omega",
+    34: "hm_psi",
+    35: "if_sara",
+    36: "im_nicola",
+    37: "jf_alpha",
+    38: "jf_gongitsune",
+    39: "jf_nezumi",
+    40: "jf_tebukuro",
+    41: "jm_kumo",
+    42: "pf_dora",
+    43: "pm_alex",
+    44: "pm_santa",
+    45: "zf_xiaobei",
+    46: "zf_xiaoni",
+    47: "zf_xiaoxiao",
+    48: "zf_xiaoyi",
+    49: "zm_yunjian",
+    50: "zm_yunxi",
+    51: "zm_yunxia",
+    52: "zm_yunyang",
+}
+
+speaker2id = {speaker: idx for idx, speaker in id2speaker.items()}
+
+
+def main():
+    if Path("./voices.bin").is_file():
+        print("./voices.bin exists - skip")
+        return
+
+    with open("voices.bin", "wb") as f:
+        for _, speaker in id2speaker.items():
+            m = torch.load(
+                f"voices/{speaker}.pt",
+                weights_only=True,
+                map_location="cpu",
+            ).numpy()
+            # m.shape (510, 1, 256)
+
+            f.write(m.tobytes())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/kokoro/v1.0/run.sh b/scripts/kokoro/v1.0/run.sh
new file mode 100755
index 0000000000..47aefdbc9a
--- /dev/null
+++ b/scripts/kokoro/v1.0/run.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+set -ex
+
+if [ ! -f kokoro.onnx ]; then
+  # see https://github.com/taylorchu/kokoro-onnx/releases
+  curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
+fi
+
+if [ ! -f config.json ]; then
+  # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
+  curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
+fi
+
+# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
+# and
+# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
+#
+# af -> American female
+# am -> American male
+# bf -> British female
+# bm -> British male
+voices=(
+af_alloy
+af_aoede
+af_bella
+af_heart
+af_jessica
+af_kore
+af_nicole
+af_nova
+af_river
+af_sarah
+af_sky
+am_adam
+am_echo
+am_eric
+am_fenrir
+am_liam
+am_michael
+am_onyx
+am_puck
+am_santa
+bf_alice
+bf_emma
+bf_isabella
+bf_lily
+bm_daniel
+bm_fable
+bm_george
+bm_lewis
+ef_dora
+em_alex
+ff_siwis
+hf_alpha
+hf_beta
+hm_omega
+hm_psi
+if_sara
+im_nicola
+jf_alpha
+jf_gongitsune
+jf_nezumi
+jf_tebukuro
+jm_kumo
+pf_dora
+pm_alex
+pm_santa
+zf_xiaobei # 东北话
+zf_xiaoni
+zf_xiaoxiao
+zf_xiaoyi
+zm_yunjian
+zm_yunxi
+zm_yunxia
+zm_yunyang
+)
+
+mkdir -p voices
+
+for v in ${voices[@]}; do
+  if [ ! -f voices/$v.pt ]; then
+    curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt
+  fi
+done
+
+if [ ! -f ./.add-meta-data.done ]; then
+  python3 ./add_meta_data.py
+  touch ./.add-meta-data.done
+fi
+
+if [ ! -f us_gold.json ]; then
+  curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
+fi
+
+if [ ! -f us_silver.json ]; then
+  curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_silver.json
+fi
+
+if [ ! -f gb_gold.json ]; then
+  curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/gb_gold.json
+fi
+
+if [ ! -f gb_silver.json ]; then
+  curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/gb_silver.json
+fi
+
+if [ ! -f ./tokens.txt ]; then
+  ./generate_tokens.py
+fi
+
+if [ ! -f ./lexicon.txt ]; then
+  ./generate_lexicon.py
+fi
+
+if [ ! -f ./voices.bin ]; then
+  ./generate_voices_bin.py
+fi
+
+./test.py
+ls -lh
diff --git a/scripts/kokoro/v1.0/test.py b/scripts/kokoro/v1.0/test.py
new file mode 100755
index 0000000000..304a810ec1
--- /dev/null
+++ b/scripts/kokoro/v1.0/test.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import re
+import time
+from typing import Dict, List
+
+import jieba
+import numpy as np
+import onnxruntime as ort
+import soundfile as sf
+import torch
+from misaki import zh
+
+try:
+    from piper_phonemize import phonemize_espeak
+except Exception as ex:
+    raise RuntimeError(
+        f"{ex}\nPlease run\n"
+        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
+    )
+
+
+def show(filename):
+    session_opts = ort.SessionOptions()
+    session_opts.log_severity_level = 3
+    sess = ort.InferenceSession(filename, session_opts)
+    for i in sess.get_inputs():
+        print(i)
+
+    print("-----")
+
+    for i in sess.get_outputs():
+        print(i)
+
+
+"""
+NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
+NodeArg(name='style', type='tensor(float)', shape=[1, 256])
+NodeArg(name='speed', type='tensor(float)', shape=[1])
+-----
+NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
+"""
+
+
+def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
+    embedding = (
+        np.fromfile(voices_bin, dtype="uint8")
+        .view(np.float32)
+        .reshape(len(speaker_names), *dim)
+    )
+    print("embedding.shape", embedding.shape)
+    ans = dict()
+    for i in range(len(speaker_names)):
+        ans[speaker_names[i]] = embedding[i]
+
+    return ans
+
+
+def load_tokens(filename: str) -> Dict[str, int]:
+    ans = dict()
+    with open(filename, encoding="utf-8") as f:
+        for line in f:
+            fields = line.strip().split()
+            if len(fields) == 2:
+                token, idx = fields
+                ans[token] = int(idx)
+            else:
+                assert len(fields) == 1, (len(fields), line)
+                ans[" "] = int(fields[0])
+    return ans
+
+
+def load_lexicon(filename: str) -> Dict[str, List[str]]:
+    ans = dict()
+    for lexicon in filename.split(","):
+        print(lexicon)
+        with open(lexicon, encoding="utf-8") as f:
+            for line in f:
+                w, tokens = line.strip().split(" ", maxsplit=1)
+                ans[w] = "".join(tokens.split())
+    return ans
+
+
+class OnnxModel:
+    def __init__(self, model_filename: str, tokens: str, lexicon: str, voices_bin: str):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.session_opts = session_opts
+        self.model = ort.InferenceSession(
+            model_filename,
+            sess_options=self.session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+        self.token2id = load_tokens(tokens)
+        self.word2tokens = load_lexicon(lexicon)
+
+        meta = self.model.get_modelmeta().custom_metadata_map
+        print(meta)
+        dim = list(map(int, meta["style_dim"].split(",")))
+        speaker_names = meta["speaker_names"].split(",")
+        self.voices = load_voices(
+            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
+        )
+        self.sample_rate = int(meta["sample_rate"])
+        print(list(self.voices.keys()))
+
+        self.sample_rate = 24000
+        self.max_len = self.voices[next(iter(self.voices))].shape[0] - 1
+
+    def __call__(self, text: str, voice: str):
+        punctuations = ';:,.!?-…()"“”'
+        text = text.lower()
+        g2p = zh.ZHG2P()
+
+        tokens = ""
+
+        for t in re.findall("[\u4E00-\u9FFF]+|[\u0000-\u007f]+", text):
+            if ord(t[0]) < 0x7F:
+                for w in t.split():
+                    while w:
+                        if w[0] in punctuations:
+                            tokens += w[0] + " "
+                            w = w[1:]
+                            continue
+
+                        if w[-1] in punctuations:
+                            if w[:-1] in self.word2tokens:
+                                tokens += self.word2tokens[w[:-1]]
+                                tokens += w[-1]
+                        else:
+                            if w in self.word2tokens:
+                                tokens += self.word2tokens[w]
+                            else:
+                                print(f"Use espeak-ng for word {w}")
+                                tokens += "".join(phonemize_espeak(w, "en-us")[0])
+
+                        tokens += " "
+                        break
+            else:
+                # Chinese
+                for w in jieba.cut(t):
+                    if w in self.word2tokens:
+                        tokens += self.word2tokens[w]
+                    else:
+                        for i in w:
+                            if i in self.word2tokens:
+                                tokens += self.word2tokens[i]
+                            else:
+                                print(f"skip {i}")
+
+        token_ids = [self.token2id[i] for i in tokens]
+        token_ids = token_ids[: self.max_len]
+
+        style = self.voices[voice][len(token_ids)]
+
+        token_ids = [0, *token_ids, 0]
+        token_ids = np.array([token_ids], dtype=np.int64)
+
+        speed = np.array([1.0], dtype=np.float32)
+
+        audio = self.model.run(
+            [
+                self.model.get_outputs()[0].name,
+            ],
+            {
+                self.model.get_inputs()[0].name: token_ids,
+                self.model.get_inputs()[1].name: style,
+                self.model.get_inputs()[2].name: speed,
+            },
+        )[0]
+        return audio
+
+
+def main():
+    m = OnnxModel(
+        model_filename="./kokoro.onnx",
+        tokens="./tokens.txt",
+        lexicon="./lexicon-gb-en.txt,./lexicon-zh.txt",
+        voices_bin="./voices.bin",
+    )
+    text = "来听一听, 这个是什么口音? How are you doing? Are you ok? Thank you! 你觉得中英文说得如何呢?"
+
+    text = text.lower()
+
+    voice = "bf_alice"
+    start = time.time()
+    audio = m(text, voice=voice)
+    end = time.time()
+
+    elapsed_seconds = end - start
+    audio_duration = len(audio) / m.sample_rate
+    real_time_factor = elapsed_seconds / audio_duration
+
+    filename = f"kokoro_v1.0_{voice}_zh_en.wav"
+    sf.write(
+        filename,
+        audio,
+        samplerate=m.sample_rate,
+        subtype="PCM_16",
+    )
+    print(f" Saved to {filename}")
+    print(f" Elapsed seconds: {elapsed_seconds:.3f}")
+    print(f" Audio duration in seconds: {audio_duration:.3f}")
+    print(f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/lazarus/generate-subtitles.py b/scripts/lazarus/generate-subtitles.py
index 6459886c10..b7d7f62a43 100755
--- a/scripts/lazarus/generate-subtitles.py
+++ b/scripts/lazarus/generate-subtitles.py
@@ -50,6 +50,20 @@ def get_models():
             popd
             """,
         ),
+        Model(
+            model_name="sherpa-onnx-moonshine-tiny-en-int8",
+            lang="en",
+            short_name="moonshine_tiny",
+            cmd="""
+            pushd $model_name
+            mv -v preprocess.onnx moonshine-preprocessor.onnx
+            mv -v encode.int8.onnx moonshine-encoder.onnx
+            mv -v uncached_decode.int8.onnx moonshine-uncached-decoder.onnx
+            mv -v cached_decode.int8.onnx moonshine-cached-decoder.onnx
+
+            popd
+            """,
+        ),
         Model(
             model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
             lang="zh_en_ko_ja_yue",
diff --git a/scripts/melo-tts/README.md b/scripts/melo-tts/README.md
index 802af06089..146aa4632b 100644
--- a/scripts/melo-tts/README.md
+++ b/scripts/melo-tts/README.md
@@ -3,4 +3,5 @@
 Models in this directory are converted from
 https://github.com/myshell-ai/MeloTTS
 
-Note there is only a single female speaker in the model.
+Note there is only a single female speaker in the model for Chinese+English TTS.
+TTS model, whereas there are 5 female speakers in the model For English TTS.
diff --git a/scripts/melo-tts/export-onnx-en.py b/scripts/melo-tts/export-onnx-en.py
new file mode 100755
index 0000000000..e81bcff6d5
--- /dev/null
+++ b/scripts/melo-tts/export-onnx-en.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+# This model exports the English-only TTS model.
+# It has 5 speakers.
+# {'EN-US': 0, 'EN-BR': 1, 'EN_INDIA': 2, 'EN-AU': 3, 'EN-Default': 4}
+
+from typing import Any, Dict
+
+import onnx
+import torch
+from melo.api import TTS
+from melo.text import language_id_map, language_tone_start_map
+from melo.text.chinese import pinyin_to_symbol_map
+from melo.text.english import eng_dict, refine_syllables
+from pypinyin import Style, lazy_pinyin, phrases_dict, pinyin_dict
+
+
+def generate_tokens(symbol_list):
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for i, s in enumerate(symbol_list):
+            f.write(f"{s} {i}\n")
+
+
+def add_new_english_words(lexicon):
+    """
+    Args:
+      lexicon:
+        Please modify it in-place.
+    """
+
+    # Please have a look at
+    # https://github.com/myshell-ai/MeloTTS/blob/main/melo/text/cmudict.rep
+
+    # We give several examples below about how to add new words
+
+    # Example 1. Add a new word kaldi
+
+    # It does not contain the word kaldi in cmudict.rep
+    # so if we add the following line to cmudict.rep
+    #
+    #  KALDI K AH0 - L D IH0
+    #
+    # then we need to change the lexicon like below
+    lexicon["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
+    #
+    # K AH0 and L D IH0 are separated by a dash "-", so
+    # ["K", "AH0"] is a in list and ["L", "D", "IH0"] is in a separate list
+
+    # Note: Either kaldi or KALDI is fine. You can use either lowercase or
+    # uppercase or both
+
+    # Example 2. Add a new word SF
+    #
+    # If we add the following line to cmudict.rep
+    #
+    #  SF EH1 S - EH1 F
+    #
+    # to cmudict.rep, then we need to change the lexicon like below:
+    lexicon["SF"] = [["EH1", "S"], ["EH1", "F"]]
+
+    # Please add your new words here
+
+    # No need to return lexicon since it is changed in-place
+
+
+def generate_lexicon():
+    add_new_english_words(eng_dict)
+    with open("lexicon.txt", "w", encoding="utf-8") as f:
+        for word in eng_dict:
+            phones, tones = refine_syllables(eng_dict[word])
+            tones = [t + language_tone_start_map["EN"] for t in tones]
+            tones = [str(t) for t in tones]
+
+            phones = " ".join(phones)
+            tones = " ".join(tones)
+
+            f.write(f"{word.lower()} {phones} {tones}\n")
+
+
+def add_meta_data(filename: str, meta_data: Dict[str, Any]):
+    """Add meta data to an ONNX model. It is changed in-place.
+
+    Args:
+      filename:
+        Filename of the ONNX model to be changed.
+      meta_data:
+        Key-value pairs.
+    """
+    model = onnx.load(filename)
+    while len(model.metadata_props):
+        model.metadata_props.pop()
+
+    for key, value in meta_data.items():
+        meta = model.metadata_props.add()
+        meta.key = key
+        meta.value = str(value)
+
+    onnx.save(model, filename)
+
+
+class ModelWrapper(torch.nn.Module):
+    def __init__(self, model: "SynthesizerTrn"):
+        super().__init__()
+        self.model = model
+        self.lang_id = language_id_map[model.language]
+
+    def forward(
+        self,
+        x,
+        x_lengths,
+        tones,
+        sid,
+        noise_scale,
+        length_scale,
+        noise_scale_w,
+        max_len=None,
+    ):
+        """
+        Args:
+          x: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
+          tones: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
+          lang_id: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
+          sid: an integer
+        """
+        bert = torch.zeros(x.shape[0], 1024, x.shape[1], dtype=torch.float32)
+        ja_bert = torch.zeros(x.shape[0], 768, x.shape[1], dtype=torch.float32)
+        lang_id = torch.zeros_like(x)
+        lang_id[:, 1::2] = self.lang_id
+        return self.model.model.infer(
+            x=x,
+            x_lengths=x_lengths,
+            sid=sid,
+            tone=tones,
+            language=lang_id,
+            bert=bert,
+            ja_bert=ja_bert,
+            noise_scale=noise_scale,
+            noise_scale_w=noise_scale_w,
+            length_scale=length_scale,
+        )[0]
+
+
+def main():
+    generate_lexicon()
+
+    language = "EN"
+    model = TTS(language=language, device="cpu")
+
+    generate_tokens(model.hps["symbols"])
+
+    torch_model = ModelWrapper(model)
+
+    opset_version = 13
+    x = torch.randint(low=0, high=10, size=(60,), dtype=torch.int64)
+    print(x.shape)
+    x_lengths = torch.tensor([x.size(0)], dtype=torch.int64)
+    sid = torch.tensor([1], dtype=torch.int64)
+    tones = torch.zeros_like(x)
+
+    noise_scale = torch.tensor([1.0], dtype=torch.float32)
+    length_scale = torch.tensor([1.0], dtype=torch.float32)
+    noise_scale_w = torch.tensor([1.0], dtype=torch.float32)
+
+    x = x.unsqueeze(0)
+    tones = tones.unsqueeze(0)
+
+    filename = "model.onnx"
+
+    torch.onnx.export(
+        torch_model,
+        (
+            x,
+            x_lengths,
+            tones,
+            sid,
+            noise_scale,
+            length_scale,
+            noise_scale_w,
+        ),
+        filename,
+        opset_version=opset_version,
+        input_names=[
+            "x",
+            "x_lengths",
+            "tones",
+            "sid",
+            "noise_scale",
+            "length_scale",
+            "noise_scale_w",
+        ],
+        output_names=["y"],
+        dynamic_axes={
+            "x": {0: "N", 1: "L"},
+            "x_lengths": {0: "N"},
+            "tones": {0: "N", 1: "L"},
+            "y": {0: "N", 1: "S", 2: "T"},
+        },
+    )
+
+    meta_data = {
+        "model_type": "melo-vits",
+        "comment": "melo",
+        "version": 2,
+        "language": "English",
+        "add_blank": int(model.hps.data.add_blank),
+        "n_speakers": len(model.hps.data.spk2id),  # 5
+        "jieba": 0,
+        "sample_rate": model.hps.data.sampling_rate,
+        "bert_dim": 1024,
+        "ja_bert_dim": 768,
+        "speaker_id": 0,
+        "lang_id": language_id_map[model.language],
+        "tone_start": language_tone_start_map[model.language],
+        "url": "https://github.com/myshell-ai/MeloTTS",
+        "license": "MIT license",
+        "description": "MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai",
+    }
+    add_meta_data(filename, meta_data)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/melo-tts/export-onnx.py b/scripts/melo-tts/export-onnx.py
index 6db8335a70..84e7eaf1b0 100755
--- a/scripts/melo-tts/export-onnx.py
+++ b/scripts/melo-tts/export-onnx.py
@@ -1,4 +1,7 @@
 #!/usr/bin/env python3
+# This script exports ZH_EN TTS model, which supports both Chinese and English.
+# This model has only 1 speaker.
+
 from typing import Any, Dict
 
 import onnx
diff --git a/scripts/melo-tts/run.sh b/scripts/melo-tts/run.sh
index b8fee07ed2..e3ba3c0626 100755
--- a/scripts/melo-tts/run.sh
+++ b/scripts/melo-tts/run.sh
@@ -38,4 +38,24 @@ tail tokens.txt
 
 ./test.py
 
+mkdir zh_en
+mv -v *.onnx zh_en/
+mv -v lexicon.txt zh_en
+mv -v tokens.txt zh_en
+cp -v README.md zh_en
+
+ls -lh
+echo "---"
+ls -lh zh_en
+
+./export-onnx-en.py
+
+mkdir en
+mv -v *.onnx en/
+mv -v lexicon.txt en
+mv -v tokens.txt en
+cp -v README.md en
+
+ls -lh en
+
 ls -lh
diff --git a/scripts/moonshine/.gitignore b/scripts/moonshine/.gitignore
new file mode 100644
index 0000000000..c78219ab07
--- /dev/null
+++ b/scripts/moonshine/.gitignore
@@ -0,0 +1 @@
+tokenizer.json
diff --git a/scripts/moonshine/README.md b/scripts/moonshine/README.md
new file mode 100644
index 0000000000..b9c5e37fb9
--- /dev/null
+++ b/scripts/moonshine/README.md
@@ -0,0 +1,7 @@
+# Introduction
+
+This directory contains models from
+https://github.com/usefulsensors/moonshine
+
+See its license at
+https://github.com/usefulsensors/moonshine/blob/main/LICENSE
diff --git a/scripts/moonshine/export-onnx.py b/scripts/moonshine/export-onnx.py
new file mode 100755
index 0000000000..6b0a34720c
--- /dev/null
+++ b/scripts/moonshine/export-onnx.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+from pathlib import Path
+
+import tokenizers
+from onnxruntime.quantization import QuantType, quantize_dynamic
+
+
+def generate_tokens():
+    if Path("./tokens.txt").is_file():
+        return
+    print("Generating tokens.txt")
+    tokenizer = tokenizers.Tokenizer.from_file("./tokenizer.json")
+    vocab_size = tokenizer.get_vocab_size()
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for i in range(vocab_size):
+            s = tokenizer.id_to_token(i).strip()
+            f.write(f"{s}\t{i}\n")
+
+
+def main():
+    generate_tokens()
+
+    # Note(fangjun): Don't use int8 for the preprocessor since it has
+    # a larger impact on the accuracy
+    for f in ["uncached_decode", "cached_decode", "encode"]:
+        if Path(f"{f}.int8.onnx").is_file():
+            continue
+
+        print("processing", f)
+        quantize_dynamic(
+            model_input=f"{f}.onnx",
+            model_output=f"{f}.int8.onnx",
+            weight_type=QuantType.QInt8,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/moonshine/run.sh b/scripts/moonshine/run.sh
new file mode 100755
index 0000000000..0ad1396603
--- /dev/null
+++ b/scripts/moonshine/run.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+set -ex
+
+cat >LICENSE <<EOF
+MIT License
+
+Copyright (c) 2024 Useful Sensors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+EOF
+
+function download_files() {
+  for d in tiny base; do
+    mkdir $d
+
+    pushd $d
+      curl -SL -O https://huggingface.co/UsefulSensors/moonshine/resolve/main/onnx/$d/preprocess.onnx
+      curl -SL -O https://huggingface.co/UsefulSensors/moonshine/resolve/main/onnx/$d/encode.onnx
+      curl -SL -O https://huggingface.co/UsefulSensors/moonshine/resolve/main/onnx/$d/uncached_decode.onnx
+      curl -SL -O https://huggingface.co/UsefulSensors/moonshine/resolve/main/onnx/$d/cached_decode.onnx
+    popd
+  done
+
+  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-base/resolve/main/test_wavs/0.wav
+  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-base/resolve/main/test_wavs/1.wav
+  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-base/resolve/main/test_wavs/8k.wav
+  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-base/resolve/main/test_wavs/trans.txt
+
+  curl -SL -O https://raw.githubusercontent.com/usefulsensors/moonshine/refs/heads/main/moonshine/assets/tokenizer.json
+}
+
+function quantize() {
+  for d in tiny base; do
+    echo "==========$d=========="
+    ls -lh
+    mv $d/*.onnx .
+    ./export-onnx.py
+    rm cached_decode.onnx
+    rm uncached_decode.onnx
+    rm encode.onnx
+    ls -lh
+
+    ./test.py
+
+    mv *.onnx $d
+    mv tokens.txt $d
+    ls -lh $d
+
+  done
+}
+
+function zip() {
+  for d in tiny base; do
+    s=sherpa-onnx-moonshine-$d-en-int8
+    mv $d $s
+
+    mkdir $s/test_wavs
+
+    cp -v *.wav $s/test_wavs
+    cp trans.txt $s/test_wavs
+    cp LICENSE $s/
+    cp ./README.md $s
+
+    ls -lh $s
+    tar cjfv $s.tar.bz2 $s
+  done
+}
+
+download_files
+quantize
+zip
+
+ls -lh
diff --git a/scripts/moonshine/test.py b/scripts/moonshine/test.py
new file mode 100755
index 0000000000..daa3ae2714
--- /dev/null
+++ b/scripts/moonshine/test.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+import datetime as dt
+
+import librosa
+import numpy as np
+import onnxruntime as ort
+import soundfile as sf
+
+
+def display(sess, name):
+    print(f"=========={name} Input==========")
+    for i in sess.get_inputs():
+        print(i)
+    print(f"=========={name} Output==========")
+    for i in sess.get_outputs():
+        print(i)
+
+
+class OnnxModel:
+    def __init__(
+        self,
+        preprocess: str,
+        encode: str,
+        uncached_decode: str,
+        cached_decode: str,
+    ):
+        self.init_preprocess(preprocess)
+        display(self.preprocess, "preprocess")
+
+        self.init_encode(encode)
+        display(self.encode, "encode")
+
+        self.init_uncached_decode(uncached_decode)
+        display(self.uncached_decode, "uncached_decode")
+
+        self.init_cached_decode(cached_decode)
+        display(self.cached_decode, "cached_decode")
+
+    def init_preprocess(self, preprocess):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.preprocess = ort.InferenceSession(
+            preprocess,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def init_encode(self, encode):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.encode = ort.InferenceSession(
+            encode,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def init_uncached_decode(self, uncached_decode):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.uncached_decode = ort.InferenceSession(
+            uncached_decode,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def init_cached_decode(self, cached_decode):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.cached_decode = ort.InferenceSession(
+            cached_decode,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def run_preprocess(self, audio):
+        """
+        Args:
+          audio: (batch_size, num_samples), float32
+        Returns:
+          A tensor of shape (batch_size, T, dim), float32
+        """
+        return self.preprocess.run(
+            [
+                self.preprocess.get_outputs()[0].name,
+            ],
+            {
+                self.preprocess.get_inputs()[0].name: audio,
+            },
+        )[0]
+
+    def run_encode(self, features):
+        """
+        Args:
+          features: (batch_size, T, dim)
+        Returns:
+          A tensor of shape (batch_size, T, dim)
+        """
+        features_len = np.array([features.shape[1]], dtype=np.int32)
+
+        return self.encode.run(
+            [
+                self.encode.get_outputs()[0].name,
+            ],
+            {
+                self.encode.get_inputs()[0].name: features,
+                self.encode.get_inputs()[1].name: features_len,
+            },
+        )[0]
+
+    def run_uncached_decode(self, token: int, token_len: int, encoder_out: np.ndarray):
+        """
+        Args:
+          token: The current token
+          token_len: Number of predicted tokens so far
+          encoder_out: A tensor fo shape (batch_size, T, dim)
+        Returns:
+          A a tuple:
+            - a tensor of shape (batch_size, 1, dim)
+            - a list of states
+        """
+        token_tensor = np.array([[token]], dtype=np.int32)
+        token_len_tensor = np.array([token_len], dtype=np.int32)
+
+        num_outs = len(self.uncached_decode.get_outputs())
+        out_names = [
+            self.uncached_decode.get_outputs()[i].name for i in range(num_outs)
+        ]
+
+        out = self.uncached_decode.run(
+            out_names,
+            {
+                self.uncached_decode.get_inputs()[0].name: token_tensor,
+                self.uncached_decode.get_inputs()[1].name: encoder_out,
+                self.uncached_decode.get_inputs()[2].name: token_len_tensor,
+            },
+        )
+
+        logits = out[0]
+        states = out[1:]
+
+        return logits, states
+
+    def run_cached_decode(
+        self, token: int, token_len: int, encoder_out: np.ndarray, states
+    ):
+        """
+        Args:
+          token: The current token
+          token_len: Number of predicted tokens so far
+          encoder_out: A tensor of shape (batch_size, T, dim)
+          states: previous states
+        Returns:
+          A a tuple:
+            - a tensor of shape (batch_size, 1, dim)
+            - a list of states
+        """
+        token_tensor = np.array([[token]], dtype=np.int32)
+        token_len_tensor = np.array([token_len], dtype=np.int32)
+
+        num_outs = len(self.cached_decode.get_outputs())
+        out_names = [self.cached_decode.get_outputs()[i].name for i in range(num_outs)]
+
+        states_inputs = {}
+        for i in range(3, len(self.cached_decode.get_inputs())):
+            name = self.cached_decode.get_inputs()[i].name
+            states_inputs[name] = states[i - 3]
+
+        out = self.cached_decode.run(
+            out_names,
+            {
+                self.cached_decode.get_inputs()[0].name: token_tensor,
+                self.cached_decode.get_inputs()[1].name: encoder_out,
+                self.cached_decode.get_inputs()[2].name: token_len_tensor,
+                **states_inputs,
+            },
+        )
+
+        logits = out[0]
+        states = out[1:]
+
+        return logits, states
+
+
+def main():
+    wave = "./1.wav"
+    id2token = dict()
+    token2id = dict()
+    with open("./tokens.txt", encoding="utf-8") as f:
+        for k, line in enumerate(f):
+            t, idx = line.split("\t")
+            id2token[int(idx)] = t
+            token2id[t] = int(idx)
+
+    model = OnnxModel(
+        preprocess="./preprocess.onnx",
+        encode="./encode.int8.onnx",
+        uncached_decode="./uncached_decode.int8.onnx",
+        cached_decode="./cached_decode.int8.onnx",
+    )
+
+    audio, sample_rate = sf.read(wave, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+    if sample_rate != 16000:
+        audio = librosa.resample(
+            audio,
+            orig_sr=sample_rate,
+            target_sr=16000,
+        )
+        sample_rate = 16000
+    audio = audio[None]  # (1, num_samples)
+    print("audio.shape", audio.shape)  # (1, 159414)
+
+    start_t = dt.datetime.now()
+
+    features = model.run_preprocess(audio)  # (1, 413, 288)
+    print("features", features.shape)
+
+    sos = token2id["<s>"]
+    eos = token2id["</s>"]
+
+    tokens = [sos]
+
+    encoder_out = model.run_encode(features)
+    print("encoder_out.shape", encoder_out.shape)  # (1, 413, 288)
+
+    logits, states = model.run_uncached_decode(
+        token=tokens[-1],
+        token_len=len(tokens),
+        encoder_out=encoder_out,
+    )
+
+    print("logits.shape", logits.shape)  # (1, 1, 32768)
+    print("len(states)", len(states))  # 24
+
+    max_len = int((audio.shape[-1] / 16000) * 6)
+
+    for i in range(max_len):
+        token = logits.squeeze().argmax()
+        if token == eos:
+            break
+        tokens.append(token)
+
+        logits, states = model.run_cached_decode(
+            token=tokens[-1],
+            token_len=len(tokens),
+            encoder_out=encoder_out,
+            states=states,
+        )
+
+    tokens = tokens[1:]  # remove sos
+    words = [id2token[i] for i in tokens]
+    underline = "▁"
+    #  underline = b"\xe2\x96\x81".decode()
+    text = "".join(words).replace(underline, " ").strip()
+
+    end_t = dt.datetime.now()
+    t = (end_t - start_t).total_seconds()
+    rtf = t * 16000 / audio.shape[-1]
+
+    print(text)
+    print("RTF:", rtf)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nemo/GigaAM/README.md b/scripts/nemo/GigaAM/README.md
new file mode 100644
index 0000000000..583d10a167
--- /dev/null
+++ b/scripts/nemo/GigaAM/README.md
@@ -0,0 +1,10 @@
+# Introduction
+
+This folder contains scripts for converting models from
+https://github.com/salute-developers/GigaAM
+to sherpa-onnx.
+
+The ASR models are for Russian speech recognition in this folder.
+
+Please see the license of the models at
+https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf
diff --git a/scripts/nemo/GigaAM/export-onnx-ctc.py b/scripts/nemo/GigaAM/export-onnx-ctc.py
new file mode 100755
index 0000000000..81feb3b787
--- /dev/null
+++ b/scripts/nemo/GigaAM/export-onnx-ctc.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+from typing import Dict
+
+import onnx
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecCTCModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+from onnxruntime.quantization import QuantType, quantize_dynamic
+
+
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+
+        super().__init__(**kwargs)
+
+        self._mel_spec_extractor: torchaudio.transforms.MelSpectrogram = (
+            torchaudio.transforms.MelSpectrogram(
+                sample_rate=self._sample_rate,
+                win_length=self.win_length,
+                hop_length=self.hop_length,
+                n_mels=kwargs["nfilt"],
+                window_fn=self.torch_windows[kwargs["window"]],
+                mel_scale=mel_scale,
+                norm=kwargs["mel_norm"],
+                n_fft=kwargs["n_fft"],
+                f_max=kwargs.get("highfreq", None),
+                f_min=kwargs.get("lowfreq", 0),
+                wkwargs=wkwargs,
+            )
+        )
+
+
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+
+
+def add_meta_data(filename: str, meta_data: Dict[str, str]):
+    """Add meta data to an ONNX model. It is changed in-place.
+
+    Args:
+      filename:
+        Filename of the ONNX model to be changed.
+      meta_data:
+        Key-value pairs.
+    """
+    model = onnx.load(filename)
+    while len(model.metadata_props):
+        model.metadata_props.pop()
+
+    for key, value in meta_data.items():
+        meta = model.metadata_props.add()
+        meta.key = key
+        meta.value = str(value)
+
+    onnx.save(model, filename)
+
+
+@torch.no_grad()
+def main():
+    model = EncDecCTCModel.from_config_file("./ctc_model_config.yaml")
+    ckpt = torch.load("./ctc_model_weights.ckpt", map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model.eval()
+
+    with open("tokens.txt", "w", encoding="utf-8") as f:
+        for i, t in enumerate(model.cfg.labels):
+            f.write(f"{t} {i}\n")
+        f.write(f"<blk> {i+1}\n")
+
+    filename = "model.onnx"
+    model.export(filename)
+
+    meta_data = {
+        "vocab_size": len(model.cfg.labels) + 1,
+        "normalize_type": "",
+        "subsampling_factor": 4,
+        "model_type": "EncDecCTCModel",
+        "version": "1",
+        "model_author": "https://github.com/salute-developers/GigaAM",
+        "license": "https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf",
+        "language": "Russian",
+        "is_giga_am": 1,
+    }
+    add_meta_data(filename, meta_data)
+
+    filename_int8 = "model.int8.onnx"
+    quantize_dynamic(
+        model_input=filename,
+        model_output=filename_int8,
+        weight_type=QuantType.QUInt8,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nemo/GigaAM/export-onnx-rnnt.py b/scripts/nemo/GigaAM/export-onnx-rnnt.py
new file mode 100644
index 0000000000..1ac05ff7fc
--- /dev/null
+++ b/scripts/nemo/GigaAM/export-onnx-rnnt.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+from typing import Dict
+
+import onnx
+import torch
+import torchaudio
+from nemo.collections.asr.models import EncDecRNNTBPEModel
+from nemo.collections.asr.modules.audio_preprocessing import (
+    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
+)
+from nemo.collections.asr.parts.preprocessing.features import (
+    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
+)
+from onnxruntime.quantization import QuantType, quantize_dynamic
+
+
+def add_meta_data(filename: str, meta_data: Dict[str, str]):
+    """Add meta data to an ONNX model. It is changed in-place.
+
+    Args:
+      filename:
+        Filename of the ONNX model to be changed.
+      meta_data:
+        Key-value pairs.
+    """
+    model = onnx.load(filename)
+    while len(model.metadata_props):
+        model.metadata_props.pop()
+
+    for key, value in meta_data.items():
+        meta = model.metadata_props.add()
+        meta.key = key
+        meta.value = str(value)
+
+    onnx.save(model, filename)
+
+
+class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
+    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
+        if "window_size" in kwargs:
+            del kwargs["window_size"]
+        if "window_stride" in kwargs:
+            del kwargs["window_stride"]
+
+        super().__init__(**kwargs)
+
+        self._mel_spec_extractor: torchaudio.transforms.MelSpectrogram = (
+            torchaudio.transforms.MelSpectrogram(
+                sample_rate=self._sample_rate,
+                win_length=self.win_length,
+                hop_length=self.hop_length,
+                n_mels=kwargs["nfilt"],
+                window_fn=self.torch_windows[kwargs["window"]],
+                mel_scale=mel_scale,
+                norm=kwargs["mel_norm"],
+                n_fft=kwargs["n_fft"],
+                f_max=kwargs.get("highfreq", None),
+                f_min=kwargs.get("lowfreq", 0),
+                wkwargs=wkwargs,
+            )
+        )
+
+
+class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
+    def __init__(self, mel_scale: str = "htk", **kwargs):
+        super().__init__(**kwargs)
+        kwargs["nfilt"] = kwargs["features"]
+        del kwargs["features"]
+        self.featurizer = (
+            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
+                mel_scale=mel_scale,
+                **kwargs,
+            )
+        )
+
+
+@torch.no_grad()
+def main():
+    model = EncDecRNNTBPEModel.from_config_file("./rnnt_model_config.yaml")
+    ckpt = torch.load("./rnnt_model_weights.ckpt", map_location="cpu")
+    model.load_state_dict(ckpt, strict=False)
+    model.eval()
+
+    with open("./tokens.txt", "w", encoding="utf-8") as f:
+        for i, s in enumerate(model.joint.vocabulary):
+            f.write(f"{s} {i}\n")
+        f.write(f"<blk> {i+1}\n")
+        print("Saved to tokens.txt")
+
+    model.encoder.export("encoder.onnx")
+    model.decoder.export("decoder.onnx")
+    model.joint.export("joiner.onnx")
+
+    meta_data = {
+        "vocab_size": model.decoder.vocab_size,  # not including the blank
+        "pred_rnn_layers": model.decoder.pred_rnn_layers,
+        "pred_hidden": model.decoder.pred_hidden,
+        "normalize_type": "",
+        "subsampling_factor": 4,
+        "model_type": "EncDecRNNTBPEModel",
+        "version": "1",
+        "model_author": "https://github.com/salute-developers/GigaAM",
+        "license": "https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf",
+        "language": "Russian",
+        "is_giga_am": 1,
+    }
+    add_meta_data("encoder.onnx", meta_data)
+
+    quantize_dynamic(
+        model_input="encoder.onnx",
+        model_output="encoder.int8.onnx",
+        weight_type=QuantType.QUInt8,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nemo/GigaAM/run-ctc.sh b/scripts/nemo/GigaAM/run-ctc.sh
new file mode 100755
index 0000000000..03acc88e29
--- /dev/null
+++ b/scripts/nemo/GigaAM/run-ctc.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+set -ex
+
+function install_nemo() {
+  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+  python3 get-pip.py
+
+  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
+
+  pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile librosa
+  pip install -qq ipython
+
+  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython
+
+  BRANCH='main'
+  python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
+
+  pip install numpy==1.26.4
+}
+
+function download_files() {
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/long_example.wav
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/ctc/ctc_model_weights.ckpt
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/ctc/ctc_model_config.yaml
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/long_example.wav
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/GigaAM%20License_NC.pdf
+}
+
+install_nemo
+download_files
+
+python3 ./export-onnx-ctc.py
+ls -lh
+python3 ./test-onnx-ctc.py
diff --git a/scripts/nemo/GigaAM/run-rnnt.sh b/scripts/nemo/GigaAM/run-rnnt.sh
new file mode 100755
index 0000000000..209f4f15d4
--- /dev/null
+++ b/scripts/nemo/GigaAM/run-rnnt.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+set -ex
+
+function install_nemo() {
+  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+  python3 get-pip.py
+
+  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
+
+  pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile librosa
+  pip install -qq ipython
+
+  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython
+
+  BRANCH='main'
+  python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
+
+  pip install numpy==1.26.4
+}
+
+function download_files() {
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/long_example.wav
+  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/tokenizer_all_sets.tar
+
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/rnnt_model_weights.ckpt
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/rnnt_model_config.yaml
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/long_example.wav
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/GigaAM%20License_NC.pdf
+  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/tokenizer_all_sets.tar
+  tar -xf tokenizer_all_sets.tar && rm tokenizer_all_sets.tar
+  ls -lh
+  echo "---"
+  ls -lh tokenizer_all_sets
+  echo "---"
+}
+
+install_nemo
+download_files
+
+python3 ./export-onnx-rnnt.py
+ls -lh
+python3 ./test-onnx-rnnt.py
+rm -v encoder.onnx
+ls -lh
diff --git a/scripts/nemo/GigaAM/test-onnx-ctc.py b/scripts/nemo/GigaAM/test-onnx-ctc.py
new file mode 100755
index 0000000000..5c181e49af
--- /dev/null
+++ b/scripts/nemo/GigaAM/test-onnx-ctc.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+# https://github.com/salute-developers/GigaAM
+
+import kaldi_native_fbank as knf
+import librosa
+import numpy as np
+import onnxruntime as ort
+import soundfile as sf
+import torch
+
+
+def create_fbank():
+    opts = knf.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.frame_opts.remove_dc_offset = False
+    opts.frame_opts.preemph_coeff = 0
+    opts.frame_opts.window_type = "hann"
+
+    # Even though GigaAM uses 400 for fft, here we use 512
+    # since kaldi-native-fbank only support fft for power of 2.
+    opts.frame_opts.round_to_power_of_two = True
+
+    opts.mel_opts.low_freq = 0
+    opts.mel_opts.high_freq = 8000
+    opts.mel_opts.num_bins = 64
+
+    fbank = knf.OnlineFbank(opts)
+    return fbank
+
+
+def compute_features(audio, fbank) -> np.ndarray:
+    """
+    Args:
+      audio: (num_samples,), np.float32
+      fbank: the fbank extractor
+    Returns:
+      features: (num_frames, feat_dim), np.float32
+    """
+    assert len(audio.shape) == 1, audio.shape
+    fbank.accept_waveform(16000, audio)
+    ans = []
+    processed = 0
+    while processed < fbank.num_frames_ready:
+        ans.append(np.array(fbank.get_frame(processed)))
+        processed += 1
+    ans = np.stack(ans)
+    return ans
+
+
+def display(sess):
+    print("==========Input==========")
+    for i in sess.get_inputs():
+        print(i)
+    print("==========Output==========")
+    for i in sess.get_outputs():
+        print(i)
+
+
+"""
+==========Input==========
+NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 64, 'audio_signal_dynamic_axes_2'])
+NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
+==========Output==========
+NodeArg(name='logprobs', type='tensor(float)', shape=['logprobs_dynamic_axes_1', 'logprobs_dynamic_axes_2', 34])
+"""
+
+
+class OnnxModel:
+    def __init__(
+        self,
+        filename: str,
+    ):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.model = ort.InferenceSession(
+            filename,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+        display(self.model)
+
+    def __call__(self, x: np.ndarray):
+        # x: (T, C)
+        x = torch.from_numpy(x)
+        x = x.t().unsqueeze(0)
+        # x: [1, C, T]
+        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)
+
+        log_probs = self.model.run(
+            [
+                self.model.get_outputs()[0].name,
+            ],
+            {
+                self.model.get_inputs()[0].name: x.numpy(),
+                self.model.get_inputs()[1].name: x_lens.numpy(),
+            },
+        )[0]
+        # [batch_size, T, dim]
+        return log_probs
+
+
+def main():
+    filename = "./model.int8.onnx"
+    tokens = "./tokens.txt"
+    wav = "./example.wav"
+
+    model = OnnxModel(filename)
+
+    id2token = dict()
+    with open(tokens, encoding="utf-8") as f:
+        for line in f:
+            fields = line.split()
+            if len(fields) == 1:
+                id2token[int(fields[0])] = " "
+            else:
+                t, idx = fields
+                id2token[int(idx)] = t
+
+    fbank = create_fbank()
+    audio, sample_rate = sf.read(wav, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+    if sample_rate != 16000:
+        audio = librosa.resample(
+            audio,
+            orig_sr=sample_rate,
+            target_sr=16000,
+        )
+        sample_rate = 16000
+
+    features = compute_features(audio, fbank)
+    print("features.shape", features.shape)
+
+    blank = len(id2token) - 1
+    prev = -1
+    ans = []
+    log_probs = model(features)
+    print("log_probs", log_probs.shape)
+    log_probs = torch.from_numpy(log_probs)[0]
+    ids = torch.argmax(log_probs, dim=1).tolist()
+    for i in ids:
+        if i != blank and i != prev:
+            ans.append(i)
+        prev = i
+
+    tokens = [id2token[i] for i in ans]
+
+    text = "".join(tokens)
+    print(wav)
+    print(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nemo/GigaAM/test-onnx-rnnt.py b/scripts/nemo/GigaAM/test-onnx-rnnt.py
new file mode 100755
index 0000000000..85c6a5e949
--- /dev/null
+++ b/scripts/nemo/GigaAM/test-onnx-rnnt.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+import argparse
+from pathlib import Path
+
+import kaldi_native_fbank as knf
+import librosa
+import numpy as np
+import onnxruntime as ort
+import soundfile as sf
+import torch
+
+
+def create_fbank():
+    opts = knf.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.frame_opts.remove_dc_offset = False
+    opts.frame_opts.preemph_coeff = 0
+    opts.frame_opts.window_type = "hann"
+
+    # Even though GigaAM uses 400 for fft, here we use 512
+    # since kaldi-native-fbank only support fft for power of 2.
+    opts.frame_opts.round_to_power_of_two = True
+
+    opts.mel_opts.low_freq = 0
+    opts.mel_opts.high_freq = 8000
+    opts.mel_opts.num_bins = 64
+
+    fbank = knf.OnlineFbank(opts)
+    return fbank
+
+
+def compute_features(audio, fbank):
+    assert len(audio.shape) == 1, audio.shape
+    fbank.accept_waveform(16000, audio)
+    ans = []
+    processed = 0
+    while processed < fbank.num_frames_ready:
+        ans.append(np.array(fbank.get_frame(processed)))
+        processed += 1
+    ans = np.stack(ans)
+    return ans
+
+
+def display(sess):
+    print("==========Input==========")
+    for i in sess.get_inputs():
+        print(i)
+    print("==========Output==========")
+    for i in sess.get_outputs():
+        print(i)
+
+
+"""
+==========Input==========
+NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 64, 'audio_signal_dynamic_axes_2'])
+NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
+==========Output==========
+NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 768, 'outputs_dynamic_axes_2'])
+NodeArg(name='encoded_lengths', type='tensor(int64)', shape=['encoded_lengths_dynamic_axes_1'])
+==========Input==========
+NodeArg(name='targets', type='tensor(int32)', shape=['targets_dynamic_axes_1', 'targets_dynamic_axes_2'])
+NodeArg(name='target_length', type='tensor(int32)', shape=['target_length_dynamic_axes_1'])
+NodeArg(name='states.1', type='tensor(float)', shape=[1, 'states.1_dim_1', 320])
+NodeArg(name='onnx::LSTM_3', type='tensor(float)', shape=[1, 1, 320])
+==========Output==========
+NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 320, 'outputs_dynamic_axes_2'])
+NodeArg(name='prednet_lengths', type='tensor(int32)', shape=['prednet_lengths_dynamic_axes_1'])
+NodeArg(name='states', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 320])
+NodeArg(name='74', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 320])
+==========Input==========
+NodeArg(name='encoder_outputs', type='tensor(float)', shape=['encoder_outputs_dynamic_axes_1', 768, 'encoder_outputs_dynamic_axes_2'])
+NodeArg(name='decoder_outputs', type='tensor(float)', shape=['decoder_outputs_dynamic_axes_1', 320, 'decoder_outputs_dynamic_axes_2'])
+==========Output==========
+NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 'outputs_dynamic_axes_2', 'outputs_dynamic_axes_3', 513])
+"""
+
+
+class OnnxModel:
+    def __init__(
+        self,
+        encoder: str,
+        decoder: str,
+        joiner: str,
+    ):
+        self.init_encoder(encoder)
+        display(self.encoder)
+        self.init_decoder(decoder)
+        display(self.decoder)
+        self.init_joiner(joiner)
+        display(self.joiner)
+
+    def init_encoder(self, encoder):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.encoder = ort.InferenceSession(
+            encoder,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+        meta = self.encoder.get_modelmeta().custom_metadata_map
+        self.normalize_type = meta["normalize_type"]
+        print(meta)
+
+        self.pred_rnn_layers = int(meta["pred_rnn_layers"])
+        self.pred_hidden = int(meta["pred_hidden"])
+
+    def init_decoder(self, decoder):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.decoder = ort.InferenceSession(
+            decoder,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def init_joiner(self, joiner):
+        session_opts = ort.SessionOptions()
+        session_opts.inter_op_num_threads = 1
+        session_opts.intra_op_num_threads = 1
+
+        self.joiner = ort.InferenceSession(
+            joiner,
+            sess_options=session_opts,
+            providers=["CPUExecutionProvider"],
+        )
+
+    def get_decoder_state(self):
+        batch_size = 1
+        state0 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
+        state1 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
+        return state0, state1
+
+    def run_encoder(self, x: np.ndarray):
+        # x: (T, C)
+        x = torch.from_numpy(x)
+        x = x.t().unsqueeze(0)
+        # x: [1, C, T]
+        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)
+
+        (encoder_out, out_len) = self.encoder.run(
+            [
+                self.encoder.get_outputs()[0].name,
+                self.encoder.get_outputs()[1].name,
+            ],
+            {
+                self.encoder.get_inputs()[0].name: x.numpy(),
+                self.encoder.get_inputs()[1].name: x_lens.numpy(),
+            },
+        )
+        # [batch_size, dim, T]
+        return encoder_out
+
+    def run_decoder(
+        self,
+        token: int,
+        state0: np.ndarray,
+        state1: np.ndarray,
+    ):
+        target = torch.tensor([[token]], dtype=torch.int32).numpy()
+        target_len = torch.tensor([1], dtype=torch.int32).numpy()
+
+        (
+            decoder_out,
+            decoder_out_length,
+            state0_next,
+            state1_next,
+        ) = self.decoder.run(
+            [
+                self.decoder.get_outputs()[0].name,
+                self.decoder.get_outputs()[1].name,
+                self.decoder.get_outputs()[2].name,
+                self.decoder.get_outputs()[3].name,
+            ],
+            {
+                self.decoder.get_inputs()[0].name: target,
+                self.decoder.get_inputs()[1].name: target_len,
+                self.decoder.get_inputs()[2].name: state0,
+                self.decoder.get_inputs()[3].name: state1,
+            },
+        )
+        return decoder_out, state0_next, state1_next
+
+    def run_joiner(
+        self,
+        encoder_out: np.ndarray,
+        decoder_out: np.ndarray,
+    ):
+        # encoder_out: [batch_size,  dim, 1]
+        # decoder_out: [batch_size,  dim, 1]
+        logit = self.joiner.run(
+            [
+                self.joiner.get_outputs()[0].name,
+            ],
+            {
+                self.joiner.get_inputs()[0].name: encoder_out,
+                self.joiner.get_inputs()[1].name: decoder_out,
+            },
+        )[0]
+        # logit: [batch_size, 1, 1, vocab_size]
+        return logit
+
+
+def main():
+    model = OnnxModel("encoder.int8.onnx", "decoder.onnx", "joiner.onnx")
+
+    id2token = dict()
+    with open("./tokens.txt", encoding="utf-8") as f:
+        for line in f:
+            t, idx = line.split()
+            id2token[int(idx)] = t
+
+    fbank = create_fbank()
+    audio, sample_rate = sf.read("./example.wav", dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+    if sample_rate != 16000:
+        audio = librosa.resample(
+            audio,
+            orig_sr=sample_rate,
+            target_sr=16000,
+        )
+        sample_rate = 16000
+
+    tail_padding = np.zeros(sample_rate * 2)
+
+    audio = np.concatenate([audio, tail_padding])
+
+    blank = len(id2token) - 1
+    ans = [blank]
+    state0, state1 = model.get_decoder_state()
+    decoder_out, state0_next, state1_next = model.run_decoder(ans[-1], state0, state1)
+
+    features = compute_features(audio, fbank)
+    print("audio.shape", audio.shape)
+    print("features.shape", features.shape)
+
+    encoder_out = model.run_encoder(features)
+    # encoder_out:[batch_size, dim, T)
+    for t in range(encoder_out.shape[2]):
+        encoder_out_t = encoder_out[:, :, t : t + 1]
+        logits = model.run_joiner(encoder_out_t, decoder_out)
+        logits = torch.from_numpy(logits)
+        logits = logits.squeeze()
+        idx = torch.argmax(logits, dim=-1).item()
+        if idx != blank:
+            ans.append(idx)
+            state0 = state0_next
+            state1 = state1_next
+            decoder_out, state0_next, state1_next = model.run_decoder(
+                ans[-1], state0, state1
+            )
+
+    ans = ans[1:]  # remove the first blank
+    print(ans)
+    tokens = [id2token[i] for i in ans]
+    underline = "▁"
+    #  underline = b"\xe2\x96\x81".decode()
+    text = "".join(tokens).replace(underline, " ").strip()
+    print("./example.wav")
+    print(text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/nemo/speaker-verification/run.sh b/scripts/nemo/speaker-verification/run.sh
index f5a2280194..16cf43ae23 100755
--- a/scripts/nemo/speaker-verification/run.sh
+++ b/scripts/nemo/speaker-verification/run.sh
@@ -7,14 +7,17 @@ function install_nemo() {
   curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
   python3 get-pip.py
 
-  pip install torch==2.1.0+cpu torchaudio==2.1.0+cpu   -f https://download.pytorch.org/whl/torch_stable.html
+  pip install torch==2.1.0 torchaudio==2.1.0 -f https://download.pytorch.org/whl/torch_stable.html
 
-  pip install wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
+  pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
+  pip install -qq ipython
 
-  sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip
+  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython
 
   BRANCH='main'
   python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
+
+  pip install numpy==1.26.4
 }
 
 install_nemo
diff --git a/scripts/node-addon-api/CMakeLists.txt b/scripts/node-addon-api/CMakeLists.txt
index f215736982..4fdffbfc70 100644
--- a/scripts/node-addon-api/CMakeLists.txt
+++ b/scripts/node-addon-api/CMakeLists.txt
@@ -11,9 +11,7 @@ cmake_policy(SET CMP0042 NEW)
 
 project(sherpa-onnx)
 
-set(CMAKE_CXX_STANDARD 14)
-
-add_definitions(-DNAPI_VERSION=3)
+set(CMAKE_CXX_STANDARD 17)
 
 include_directories(${CMAKE_JS_INC})
 
@@ -21,6 +19,7 @@ set(srcs
   src/audio-tagging.cc
   src/keyword-spotting.cc
   src/non-streaming-asr.cc
+  src/non-streaming-speaker-diarization.cc
   src/non-streaming-tts.cc
   src/punctuation.cc
   src/sherpa-onnx-node-addon-api.cc
diff --git a/scripts/node-addon-api/lib/addon.js b/scripts/node-addon-api/lib/addon.js
index 9ba19351ff..840ead7103 100644
--- a/scripts/node-addon-api/lib/addon.js
+++ b/scripts/node-addon-api/lib/addon.js
@@ -1,4 +1,5 @@
 const os = require('os');
+const path = require('path');
 
 // Package name triggered spam for sherpa-onnx-win32-x64
 // so we have renamed it to sherpa-onnx-win-x64
@@ -25,6 +26,14 @@ for (const p of possible_paths) {
 }
 
 if (!found) {
+  let addon_path = `${process.env.PWD}/node_modules/sherpa-onnx-${platform_arch}`;
+  const pnpmIndex = __dirname.indexOf(`node_modules${path.sep}.pnpm`);
+  if (pnpmIndex !== -1) {
+    const parts = __dirname.slice(pnpmIndex).split(path.sep);
+    parts.pop();
+    addon_path = `${process.env.PWD}/${parts.join('/')}/sherpa-onnx-${platform_arch}`;
+  }
+
   let msg = `Could not find sherpa-onnx-node. Tried\n\n  ${
       possible_paths.join('\n  ')}\n`
   if (os.platform() == 'darwin' &&
@@ -34,8 +43,7 @@ if (!found) {
     msg +=
         'Please remeber to set the following environment variable and try again:\n';
 
-    msg += `export DYLD_LIBRARY_PATH=${
-        process.env.PWD}/node_modules/sherpa-onnx-${platform_arch}`;
+    msg += `export DYLD_LIBRARY_PATH=${addon_path}`;
 
     msg += ':$DYLD_LIBRARY_PATH\n';
   }
@@ -47,8 +55,7 @@ if (!found) {
     msg +=
         'Please remeber to set the following environment variable and try again:\n';
 
-    msg += `export LD_LIBRARY_PATH=${
-        process.env.PWD}/node_modules/sherpa-onnx-${platform_arch}`;
+    msg += `export LD_LIBRARY_PATH=${addon_path}`;
 
     msg += ':$LD_LIBRARY_PATH\n';
   }
diff --git a/scripts/node-addon-api/lib/keyword-spotter.js b/scripts/node-addon-api/lib/keyword-spotter.js
index 9fbadef4c5..d06764e815 100644
--- a/scripts/node-addon-api/lib/keyword-spotter.js
+++ b/scripts/node-addon-api/lib/keyword-spotter.js
@@ -20,6 +20,10 @@ class KeywordSpotter {
     addon.decodeKeywordStream(this.handle, stream.handle);
   }
 
+  reset(stream) {
+    addon.resetKeywordStream(this.handle, stream.handle);
+  }
+
   getResult(stream) {
     const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);
 
diff --git a/scripts/node-addon-api/lib/non-streaming-speaker-diarization.js b/scripts/node-addon-api/lib/non-streaming-speaker-diarization.js
new file mode 100644
index 0000000000..37c4a74930
--- /dev/null
+++ b/scripts/node-addon-api/lib/non-streaming-speaker-diarization.js
@@ -0,0 +1,37 @@
+const addon = require('./addon.js');
+
+class OfflineSpeakerDiarization {
+  constructor(config) {
+    this.handle = addon.createOfflineSpeakerDiarization(config);
+    this.config = config;
+
+    this.sampleRate = addon.getOfflineSpeakerDiarizationSampleRate(this.handle);
+  }
+
+  /**
+   * samples is a 1-d float32 array. Each element of the array should be
+   * in the range [-1, 1].
+   *
+   * We assume its sample rate equals to this.sampleRate.
+   *
+   * Returns an array of object, where an object is
+   *
+   *  {
+   *    "start": start_time_in_seconds,
+   *    "end": end_time_in_seconds,
+   *    "speaker": an_integer,
+   *  }
+   */
+  process(samples) {
+    return addon.offlineSpeakerDiarizationProcess(this.handle, samples);
+  }
+
+  setConfig(config) {
+    addon.offlineSpeakerDiarizationSetConfig(this.handle, config);
+    this.config.clustering = config.clustering;
+  }
+}
+
+module.exports = {
+  OfflineSpeakerDiarization,
+}
diff --git a/scripts/node-addon-api/lib/sherpa-onnx.js b/scripts/node-addon-api/lib/sherpa-onnx.js
index 6f76e539b1..76dffd62d0 100644
--- a/scripts/node-addon-api/lib/sherpa-onnx.js
+++ b/scripts/node-addon-api/lib/sherpa-onnx.js
@@ -8,6 +8,7 @@ const sid = require('./speaker-identification.js');
 const at = require('./audio-tagg.js');
 const punct = require('./punctuation.js');
 const kws = require('./keyword-spotter.js');
+const sd = require('./non-streaming-speaker-diarization.js');
 
 module.exports = {
   OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -24,4 +25,5 @@ module.exports = {
   AudioTagging: at.AudioTagging,
   Punctuation: punct.Punctuation,
   KeywordSpotter: kws.KeywordSpotter,
+  OfflineSpeakerDiarization: sd.OfflineSpeakerDiarization,
 }
diff --git a/scripts/node-addon-api/package.json b/scripts/node-addon-api/package.json
index 072007b31e..201cb77def 100644
--- a/scripts/node-addon-api/package.json
+++ b/scripts/node-addon-api/package.json
@@ -1,10 +1,10 @@
 {
   "main": "lib/sherpa-onnx.js",
   "version": "1.0.0",
-  "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
   "dependencies": {
-    "cmake-js": "^6.0.0",
-    "node-addon-api": "^1.1.0",
+    "cmake-js": "^7.3.0",
+    "node-addon-api": "^8.3.0",
     "perf_hooks": "*"
   },
   "scripts": {
@@ -21,8 +21,18 @@
     "transcription",
     "real-time speech recognition",
     "without internet connection",
+    "locally",
+    "local",
     "embedded systems",
     "open source",
+    "diarization",
+    "speaker diarization",
+    "speaker recognition",
+    "speaker",
+    "speaker segmentation",
+    "speaker verification",
+    "spoken language identification",
+    "sherpa",
     "zipformer",
     "asr",
     "tts",
diff --git a/scripts/node-addon-api/src/audio-tagging.cc b/scripts/node-addon-api/src/audio-tagging.cc
deleted file mode 100644
index bed4e48a24..0000000000
--- a/scripts/node-addon-api/src/audio-tagging.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-// scripts/node-addon-api/src/audio-tagging.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static SherpaOnnxOfflineZipformerAudioTaggingModelConfig
-GetAudioTaggingZipformerModelConfig(Napi::Object obj) {
-  SherpaOnnxOfflineZipformerAudioTaggingModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("zipformer").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-
-  return c;
-}
-
-static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxAudioTaggingModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("model").As<Napi::Object>();
-  c.zipformer = GetAudioTaggingZipformerModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxAudioTagging> CreateAudioTaggingWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "You should pass an object as the only argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxAudioTaggingConfig c;
-  memset(&c, 0, sizeof(c));
-  c.model = GetAudioTaggingModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK);
-
-  const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c);
-
-  if (c.model.zipformer.model) {
-    delete[] c.model.zipformer.model;
-  }
-
-  if (c.model.ced) {
-    delete[] c.model.ced;
-  }
-
-  if (c.model.provider) {
-    delete[] c.model.provider;
-  }
-
-  if (c.labels) {
-    delete[] c.labels;
-  }
-
-  if (!at) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxAudioTagging>::New(
-      env, const_cast<SherpaOnnxAudioTagging *>(at),
-      [](Napi::Env env, SherpaOnnxAudioTagging *at) {
-        SherpaOnnxDestroyAudioTagging(at);
-      });
-}
-
-static Napi::External<SherpaOnnxOfflineStream>
-AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "You should pass an audio tagging pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxAudioTagging *at =
-      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
-
-  const SherpaOnnxOfflineStream *stream =
-      SherpaOnnxAudioTaggingCreateOfflineStream(at);
-
-  return Napi::External<SherpaOnnxOfflineStream>::New(
-      env, const_cast<SherpaOnnxOfflineStream *>(stream),
-      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
-        SherpaOnnxDestroyOfflineStream(stream);
-      });
-}
-
-static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 3) {
-    std::ostringstream os;
-    os << "Expect only 3 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "You should pass an audio tagging pointer as the first argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(
-        env, "You should pass an offline stream pointer as the second argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[2].IsNumber()) {
-    Napi::TypeError::New(env,
-                         "You should pass an integer as the third argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxAudioTagging *at =
-      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
-
-  SherpaOnnxOfflineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
-
-  int32_t top_k = info[2].As<Napi::Number>().Int32Value();
-
-  const SherpaOnnxAudioEvent *const *events =
-      SherpaOnnxAudioTaggingCompute(at, stream, top_k);
-
-  auto p = events;
-  int32_t k = 0;
-  while (p && *p) {
-    ++k;
-    ++p;
-  }
-
-  Napi::Array ans = Napi::Array::New(env, k);
-  for (uint32_t i = 0; i != k; ++i) {
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set(Napi::String::New(env, "name"),
-            Napi::String::New(env, events[i]->name));
-    obj.Set(Napi::String::New(env, "index"),
-            Napi::Number::New(env, events[i]->index));
-    obj.Set(Napi::String::New(env, "prob"),
-            Napi::Number::New(env, events[i]->prob));
-    ans[i] = obj;
-  }
-
-  SherpaOnnxAudioTaggingFreeResults(events);
-
-  return ans;
-}
-
-void InitAudioTagging(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createAudioTagging"),
-              Napi::Function::New(env, CreateAudioTaggingWrapper));
-
-  exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"),
-              Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "audioTaggingCompute"),
-              Napi::Function::New(env, AudioTaggingComputeWrapper));
-}
diff --git a/scripts/node-addon-api/src/audio-tagging.cc b/scripts/node-addon-api/src/audio-tagging.cc
new file mode 120000
index 0000000000..42b0528700
--- /dev/null
+++ b/scripts/node-addon-api/src/audio-tagging.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/keyword-spotting.cc b/scripts/node-addon-api/src/keyword-spotting.cc
deleted file mode 100644
index 2b5a241009..0000000000
--- a/scripts/node-addon-api/src/keyword-spotting.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-// scripts/node-addon-api/src/keyword-spotting.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-// defined ./streaming-asr.cc
-SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
-
-// defined ./streaming-asr.cc
-SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj);
-
-static Napi::External<SherpaOnnxKeywordSpotter> CreateKeywordSpotterWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "Expect an object as the argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-  SherpaOnnxKeywordSpotterConfig c;
-  memset(&c, 0, sizeof(c));
-  c.feat_config = GetFeatureConfig(o);
-  c.model_config = GetOnlineModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_trailing_blanks, numTrailingBlanks);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_score, keywordsScore);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_threshold, keywordsThreshold);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_file, keywordsFile);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_buf, keywordsBuf);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(keywords_buf_size, keywordsBufSize);
-
-  SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&c);
-
-  if (c.model_config.transducer.encoder) {
-    delete[] c.model_config.transducer.encoder;
-  }
-
-  if (c.model_config.transducer.decoder) {
-    delete[] c.model_config.transducer.decoder;
-  }
-
-  if (c.model_config.transducer.joiner) {
-    delete[] c.model_config.transducer.joiner;
-  }
-
-  if (c.model_config.paraformer.encoder) {
-    delete[] c.model_config.paraformer.encoder;
-  }
-
-  if (c.model_config.paraformer.decoder) {
-    delete[] c.model_config.paraformer.decoder;
-  }
-
-  if (c.model_config.zipformer2_ctc.model) {
-    delete[] c.model_config.zipformer2_ctc.model;
-  }
-
-  if (c.model_config.tokens) {
-    delete[] c.model_config.tokens;
-  }
-
-  if (c.model_config.provider) {
-    delete[] c.model_config.provider;
-  }
-
-  if (c.model_config.model_type) {
-    delete[] c.model_config.model_type;
-  }
-
-  if (c.keywords_file) {
-    delete[] c.keywords_file;
-  }
-
-  if (c.keywords_buf) {
-    delete[] c.keywords_buf;
-  }
-
-  if (!kws) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxKeywordSpotter>::New(
-      env, kws, [](Napi::Env env, SherpaOnnxKeywordSpotter *kws) {
-        SherpaOnnxDestroyKeywordSpotter(kws);
-      });
-}
-
-static Napi::External<SherpaOnnxOnlineStream> CreateKeywordStreamWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "You should pass a keyword spotter pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxKeywordSpotter *kws =
-      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
-
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateKeywordStream(kws);
-
-  return Napi::External<SherpaOnnxOnlineStream>::New(
-      env, stream, [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
-        SherpaOnnxDestroyOnlineStream(stream);
-      });
-}
-
-static Napi::Boolean IsKeywordStreamReadyWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxKeywordSpotter *kws =
-      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  int32_t is_ready = SherpaOnnxIsKeywordStreamReady(kws, stream);
-
-  return Napi::Boolean::New(env, is_ready);
-}
-
-static void DecodeKeywordStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxKeywordSpotter *kws =
-      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  SherpaOnnxDecodeKeywordStream(kws, stream);
-}
-
-static Napi::String GetKeywordResultAsJsonWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxKeywordSpotter *kws =
-      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  const char *json = SherpaOnnxGetKeywordResultAsJson(kws, stream);
-
-  Napi::String s = Napi::String::New(env, json);
-
-  SherpaOnnxFreeKeywordResultJson(json);
-
-  return s;
-}
-
-void InitKeywordSpotting(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createKeywordSpotter"),
-              Napi::Function::New(env, CreateKeywordSpotterWrapper));
-
-  exports.Set(Napi::String::New(env, "createKeywordStream"),
-              Napi::Function::New(env, CreateKeywordStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "isKeywordStreamReady"),
-              Napi::Function::New(env, IsKeywordStreamReadyWrapper));
-
-  exports.Set(Napi::String::New(env, "decodeKeywordStream"),
-              Napi::Function::New(env, DecodeKeywordStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "getKeywordResultAsJson"),
-              Napi::Function::New(env, GetKeywordResultAsJsonWrapper));
-}
diff --git a/scripts/node-addon-api/src/keyword-spotting.cc b/scripts/node-addon-api/src/keyword-spotting.cc
new file mode 120000
index 0000000000..66230e5860
--- /dev/null
+++ b/scripts/node-addon-api/src/keyword-spotting.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/macros.h b/scripts/node-addon-api/src/macros.h
deleted file mode 100644
index ac0dbd567c..0000000000
--- a/scripts/node-addon-api/src/macros.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// scripts/node-addon-api/src/macros.h
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#ifndef SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
-#define SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
-
-#include <algorithm>
-#include <string>
-
-#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name)                       \
-  do {                                                                     \
-    if (o.Has(#js_name) && o.Get(#js_name).IsString()) {                   \
-      Napi::String _str = o.Get(#js_name).As<Napi::String>();              \
-      std::string s = _str.Utf8Value();                                    \
-      char *p = new char[s.size() + 1];                                    \
-      std::copy(s.begin(), s.end(), p);                                    \
-      p[s.size()] = 0;                                                     \
-                                                                           \
-      c.c_name = p;                                                        \
-    } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) {        \
-      Napi::Uint8Array _array = o.Get(#js_name).As<Napi::Uint8Array>();    \
-      char *p = new char[_array.ElementLength() + 1];                      \
-      std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \
-      p[_array.ElementLength()] = '\0';                                    \
-                                                                           \
-      c.c_name = p;                                                        \
-    }                                                                      \
-  } while (0)
-
-#define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name)            \
-  do {                                                            \
-    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
-      c.c_name = o.Get(#js_name).As<Napi::Number>().Int32Value(); \
-    }                                                             \
-  } while (0)
-
-#define SHERPA_ONNX_ASSIGN_ATTR_FLOAT(c_name, js_name)            \
-  do {                                                            \
-    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
-      c.c_name = o.Get(#js_name).As<Napi::Number>().FloatValue(); \
-    }                                                             \
-  } while (0)
-
-#endif  // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
diff --git a/scripts/node-addon-api/src/macros.h b/scripts/node-addon-api/src/macros.h
new file mode 120000
index 0000000000..3d541e3dae
--- /dev/null
+++ b/scripts/node-addon-api/src/macros.h
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/non-streaming-asr.cc b/scripts/node-addon-api/src/non-streaming-asr.cc
deleted file mode 100644
index b24a37beb5..0000000000
--- a/scripts/node-addon-api/src/non-streaming-asr.cc
+++ /dev/null
@@ -1,494 +0,0 @@
-// scripts/node-addon-api/src/non-streaming-asr.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-// defined in ./streaming-asr.cc
-SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
-
-static SherpaOnnxOfflineTransducerModelConfig GetOfflineTransducerModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineTransducerModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("transducer").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);
-
-  return c;
-}
-
-static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineParaformerModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-
-  return c;
-}
-
-static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineNemoEncDecCtcModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-
-  return c;
-}
-
-static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineWhisperModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("whisper").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(task, task);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);
-
-  return c;
-}
-
-static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineTdnnModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("tdnn") || !obj.Get("tdnn").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("tdnn").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-
-  return c;
-}
-
-static SherpaOnnxOfflineSenseVoiceModelConfig GetOfflineSenseVoiceModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineSenseVoiceModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("senseVoice") || !obj.Get("senseVoice").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("senseVoice").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(use_itn, useInverseTextNormalization);
-
-  return c;
-}
-
-static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
-  SherpaOnnxOfflineModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
-
-  c.transducer = GetOfflineTransducerModelConfig(o);
-  c.paraformer = GetOfflineParaformerModelConfig(o);
-  c.nemo_ctc = GetOfflineNeMoCtcModelConfig(o);
-  c.whisper = GetOfflineWhisperModelConfig(o);
-  c.tdnn = GetOfflineTdnnModelConfig(o);
-  c.sense_voice = GetOfflineSenseVoiceModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(telespeech_ctc, teleSpeechCtc);
-
-  return c;
-}
-
-static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
-  SherpaOnnxOfflineLMConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("lmConfig") || !obj.Get("lmConfig").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("lmConfig").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(scale, scale);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxOfflineRecognizer>
-CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "Expect an object as the argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxOfflineRecognizerConfig c;
-  memset(&c, 0, sizeof(c));
-  c.feat_config = GetFeatureConfig(o);
-  c.model_config = GetOfflineModelConfig(o);
-  c.lm_config = GetOfflineLMConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
-
-  SherpaOnnxOfflineRecognizer *recognizer =
-      SherpaOnnxCreateOfflineRecognizer(&c);
-
-  if (c.model_config.transducer.encoder) {
-    delete[] c.model_config.transducer.encoder;
-  }
-
-  if (c.model_config.transducer.decoder) {
-    delete[] c.model_config.transducer.decoder;
-  }
-
-  if (c.model_config.transducer.joiner) {
-    delete[] c.model_config.transducer.joiner;
-  }
-
-  if (c.model_config.paraformer.model) {
-    delete[] c.model_config.paraformer.model;
-  }
-
-  if (c.model_config.nemo_ctc.model) {
-    delete[] c.model_config.nemo_ctc.model;
-  }
-
-  if (c.model_config.whisper.encoder) {
-    delete[] c.model_config.whisper.encoder;
-  }
-
-  if (c.model_config.whisper.decoder) {
-    delete[] c.model_config.whisper.decoder;
-  }
-
-  if (c.model_config.whisper.language) {
-    delete[] c.model_config.whisper.language;
-  }
-
-  if (c.model_config.whisper.task) {
-    delete[] c.model_config.whisper.task;
-  }
-
-  if (c.model_config.tdnn.model) {
-    delete[] c.model_config.tdnn.model;
-  }
-
-  if (c.model_config.sense_voice.model) {
-    delete[] c.model_config.sense_voice.model;
-  }
-
-  if (c.model_config.sense_voice.language) {
-    delete[] c.model_config.sense_voice.language;
-  }
-
-  if (c.model_config.tokens) {
-    delete[] c.model_config.tokens;
-  }
-
-  if (c.model_config.provider) {
-    delete[] c.model_config.provider;
-  }
-
-  if (c.model_config.model_type) {
-    delete[] c.model_config.model_type;
-  }
-
-  if (c.model_config.modeling_unit) {
-    delete[] c.model_config.modeling_unit;
-  }
-
-  if (c.model_config.bpe_vocab) {
-    delete[] c.model_config.bpe_vocab;
-  }
-
-  if (c.model_config.telespeech_ctc) {
-    delete[] c.model_config.telespeech_ctc;
-  }
-
-  if (c.lm_config.model) {
-    delete[] c.lm_config.model;
-  }
-
-  if (c.decoding_method) {
-    delete[] c.decoding_method;
-  }
-
-  if (c.hotwords_file) {
-    delete[] c.hotwords_file;
-  }
-
-  if (c.rule_fsts) {
-    delete[] c.rule_fsts;
-  }
-
-  if (c.rule_fars) {
-    delete[] c.rule_fars;
-  }
-
-  if (!recognizer) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxOfflineRecognizer>::New(
-      env, recognizer,
-      [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) {
-        SherpaOnnxDestroyOfflineRecognizer(recognizer);
-      });
-}
-
-static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env,
-        "You should pass an offline recognizer pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
-
-  SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer);
-
-  return Napi::External<SherpaOnnxOfflineStream>::New(
-      env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
-        SherpaOnnxDestroyOfflineStream(stream);
-      });
-}
-
-static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOfflineStream *stream =
-      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("samples")) {
-    Napi::TypeError::New(env, "The argument object should have a field samples")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Get("samples").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Has("sampleRate")) {
-    Napi::TypeError::New(env,
-                         "The argument object should have a field sampleRate")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Get("sampleRate").IsNumber()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a number")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
-  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
-
-  SherpaOnnxAcceptWaveformOffline(stream, sample_rate, samples.Data(),
-                                  samples.ElementLength());
-}
-
-static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an offline recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOfflineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
-
-  SherpaOnnxOfflineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
-
-  SherpaOnnxDecodeOfflineStream(recognizer, stream);
-}
-
-static Napi::String GetOfflineStreamResultAsJsonWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflineStream *stream =
-      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
-
-  const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
-  Napi::String s = Napi::String::New(env, json);
-
-  SherpaOnnxDestroyOfflineStreamResultJson(json);
-
-  return s;
-}
-
-void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createOfflineRecognizer"),
-              Napi::Function::New(env, CreateOfflineRecognizerWrapper));
-
-  exports.Set(Napi::String::New(env, "createOfflineStream"),
-              Napi::Function::New(env, CreateOfflineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "acceptWaveformOffline"),
-              Napi::Function::New(env, AcceptWaveformOfflineWrapper));
-
-  exports.Set(Napi::String::New(env, "decodeOfflineStream"),
-              Napi::Function::New(env, DecodeOfflineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
-              Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
-}
diff --git a/scripts/node-addon-api/src/non-streaming-asr.cc b/scripts/node-addon-api/src/non-streaming-asr.cc
new file mode 120000
index 0000000000..12f89f8918
--- /dev/null
+++ b/scripts/node-addon-api/src/non-streaming-asr.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/non-streaming-speaker-diarization.cc b/scripts/node-addon-api/src/non-streaming-speaker-diarization.cc
new file mode 120000
index 0000000000..5c325a885e
--- /dev/null
+++ b/scripts/node-addon-api/src/non-streaming-speaker-diarization.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/non-streaming-tts.cc b/scripts/node-addon-api/src/non-streaming-tts.cc
deleted file mode 100644
index 70d97cddb0..0000000000
--- a/scripts/node-addon-api/src/non-streaming-tts.cc
+++ /dev/null
@@ -1,329 +0,0 @@
-// scripts/node-addon-api/src/non-streaming-tts.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-
-#include <algorithm>
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static SherpaOnnxOfflineTtsVitsModelConfig GetOfflineTtsVitsModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineTtsVitsModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("vits") || !obj.Get("vits").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("vits").As<Napi::Object>();
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale_w, noiseScaleW);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir);
-
-  return c;
-}
-
-static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflineTtsModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("model").As<Napi::Object>();
-
-  c.vits = GetOfflineTtsVitsModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "Expect an object as the argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxOfflineTtsConfig c;
-  memset(&c, 0, sizeof(c));
-
-  c.model = GetOfflineTtsModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
-
-  SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c);
-
-  if (c.model.vits.model) {
-    delete[] c.model.vits.model;
-  }
-
-  if (c.model.vits.lexicon) {
-    delete[] c.model.vits.lexicon;
-  }
-
-  if (c.model.vits.tokens) {
-    delete[] c.model.vits.tokens;
-  }
-
-  if (c.model.vits.data_dir) {
-    delete[] c.model.vits.data_dir;
-  }
-
-  if (c.model.vits.dict_dir) {
-    delete[] c.model.vits.dict_dir;
-  }
-
-  if (c.model.provider) {
-    delete[] c.model.provider;
-  }
-
-  if (c.rule_fsts) {
-    delete[] c.rule_fsts;
-  }
-
-  if (c.rule_fars) {
-    delete[] c.rule_fars;
-  }
-
-  if (!tts) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxOfflineTts>::New(
-      env, tts, [](Napi::Env env, SherpaOnnxOfflineTts *tts) {
-        SherpaOnnxDestroyOfflineTts(tts);
-      });
-}
-
-static Napi::Number OfflineTtsSampleRateWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflineTts *tts =
-      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
-
-  int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts);
-
-  return Napi::Number::New(env, sample_rate);
-}
-
-static Napi::Number OfflineTtsNumSpeakersWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflineTts *tts =
-      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
-
-  int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts);
-
-  return Napi::Number::New(env, num_speakers);
-}
-
-static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflineTts *tts =
-      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("text")) {
-    Napi::TypeError::New(env, "The argument object should have a field text")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("text").IsString()) {
-    Napi::TypeError::New(env, "The object['text'] should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("sid")) {
-    Napi::TypeError::New(env, "The argument object should have a field sid")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("sid").IsNumber()) {
-    Napi::TypeError::New(env, "The object['sid'] should be a number")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("speed")) {
-    Napi::TypeError::New(env, "The argument object should have a field speed")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("speed").IsNumber()) {
-    Napi::TypeError::New(env, "The object['speed'] should be a number")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  bool enable_external_buffer = true;
-  if (obj.Has("enableExternalBuffer") &&
-      obj.Get("enableExternalBuffer").IsBoolean()) {
-    enable_external_buffer =
-        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
-  }
-
-  Napi::String _text = obj.Get("text").As<Napi::String>();
-  std::string text = _text.Utf8Value();
-  int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
-  float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
-
-  const SherpaOnnxGeneratedAudio *audio =
-      SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
-
-  if (enable_external_buffer) {
-    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
-        env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
-        [](Napi::Env /*env*/, void * /*data*/,
-           const SherpaOnnxGeneratedAudio *hint) {
-          SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
-        },
-        audio);
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
-
-    Napi::Object ans = Napi::Object::New(env);
-    ans.Set(Napi::String::New(env, "samples"), float32Array);
-    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
-    return ans;
-  } else {
-    // don't use external buffer
-    Napi::ArrayBuffer arrayBuffer =
-        Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
-
-    std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
-
-    Napi::Object ans = Napi::Object::New(env);
-    ans.Set(Napi::String::New(env, "samples"), float32Array);
-    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
-    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
-    return ans;
-  }
-}
-
-void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createOfflineTts"),
-              Napi::Function::New(env, CreateOfflineTtsWrapper));
-
-  exports.Set(Napi::String::New(env, "getOfflineTtsSampleRate"),
-              Napi::Function::New(env, OfflineTtsSampleRateWrapper));
-
-  exports.Set(Napi::String::New(env, "getOfflineTtsNumSpeakers"),
-              Napi::Function::New(env, OfflineTtsNumSpeakersWrapper));
-
-  exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
-              Napi::Function::New(env, OfflineTtsGenerateWrapper));
-}
diff --git a/scripts/node-addon-api/src/non-streaming-tts.cc b/scripts/node-addon-api/src/non-streaming-tts.cc
new file mode 120000
index 0000000000..fd4eb93f2a
--- /dev/null
+++ b/scripts/node-addon-api/src/non-streaming-tts.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/punctuation.cc b/scripts/node-addon-api/src/punctuation.cc
deleted file mode 100644
index df079b96d1..0000000000
--- a/scripts/node-addon-api/src/punctuation.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-// scripts/node-addon-api/src/punctuation.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static SherpaOnnxOfflinePunctuationModelConfig GetOfflinePunctuationModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOfflinePunctuationModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("model").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(ct_transformer, ctTransformer);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxOfflinePunctuation>
-CreateOfflinePunctuationWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "You should pass an object as the only argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxOfflinePunctuationConfig c;
-  memset(&c, 0, sizeof(c));
-  c.model = GetOfflinePunctuationModelConfig(o);
-
-  const SherpaOnnxOfflinePunctuation *punct =
-      SherpaOnnxCreateOfflinePunctuation(&c);
-
-  if (c.model.ct_transformer) {
-    delete[] c.model.ct_transformer;
-  }
-
-  if (c.model.provider) {
-    delete[] c.model.provider;
-  }
-
-  if (!punct) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxOfflinePunctuation>::New(
-      env, const_cast<SherpaOnnxOfflinePunctuation *>(punct),
-      [](Napi::Env env, SherpaOnnxOfflinePunctuation *punct) {
-        SherpaOnnxDestroyOfflinePunctuation(punct);
-      });
-}
-
-static Napi::String OfflinePunctuationAddPunctWraper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env,
-        "You should pass an offline punctuation pointer as the first argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsString()) {
-    Napi::TypeError::New(env, "You should pass a string as the second argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOfflinePunctuation *punct =
-      info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data();
-  Napi::String js_text = info[1].As<Napi::String>();
-  std::string text = js_text.Utf8Value();
-
-  const char *punct_text =
-      SherpaOfflinePunctuationAddPunct(punct, text.c_str());
-
-  Napi::String ans = Napi::String::New(env, punct_text);
-  SherpaOfflinePunctuationFreeText(punct_text);
-  return ans;
-}
-
-void InitPunctuation(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createOfflinePunctuation"),
-              Napi::Function::New(env, CreateOfflinePunctuationWrapper));
-
-  exports.Set(Napi::String::New(env, "offlinePunctuationAddPunct"),
-              Napi::Function::New(env, OfflinePunctuationAddPunctWraper));
-}
diff --git a/scripts/node-addon-api/src/punctuation.cc b/scripts/node-addon-api/src/punctuation.cc
new file mode 120000
index 0000000000..a0d6b08e10
--- /dev/null
+++ b/scripts/node-addon-api/src/punctuation.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
deleted file mode 100644
index b86883d868..0000000000
--- a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-// scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include "napi.h"  // NOLINT
-
-void InitStreamingAsr(Napi::Env env, Napi::Object exports);
-
-void InitNonStreamingAsr(Napi::Env env, Napi::Object exports);
-
-void InitNonStreamingTts(Napi::Env env, Napi::Object exports);
-
-void InitVad(Napi::Env env, Napi::Object exports);
-
-void InitWaveReader(Napi::Env env, Napi::Object exports);
-
-void InitWaveWriter(Napi::Env env, Napi::Object exports);
-
-void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);
-
-void InitSpeakerID(Napi::Env env, Napi::Object exports);
-
-void InitAudioTagging(Napi::Env env, Napi::Object exports);
-
-void InitPunctuation(Napi::Env env, Napi::Object exports);
-
-void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
-
-Napi::Object Init(Napi::Env env, Napi::Object exports) {
-  InitStreamingAsr(env, exports);
-  InitNonStreamingAsr(env, exports);
-  InitNonStreamingTts(env, exports);
-  InitVad(env, exports);
-  InitWaveReader(env, exports);
-  InitWaveWriter(env, exports);
-  InitSpokenLanguageID(env, exports);
-  InitSpeakerID(env, exports);
-  InitAudioTagging(env, exports);
-  InitPunctuation(env, exports);
-  InitKeywordSpotting(env, exports);
-
-  return exports;
-}
-
-NODE_API_MODULE(addon, Init)
diff --git a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
new file mode 120000
index 0000000000..9394c068ab
--- /dev/null
+++ b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/speaker-identification.cc b/scripts/node-addon-api/src/speaker-identification.cc
deleted file mode 100644
index a08a6ed66d..0000000000
--- a/scripts/node-addon-api/src/speaker-identification.cc
+++ /dev/null
@@ -1,808 +0,0 @@
-// scripts/node-addon-api/src/speaker-identification.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <algorithm>
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
-CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "You should pass an object as the only argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxSpeakerEmbeddingExtractorConfig c;
-  memset(&c, 0, sizeof(c));
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
-      SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
-
-  if (c.model) {
-    delete[] c.model;
-  }
-
-  if (c.provider) {
-    delete[] c.provider;
-  }
-
-  if (!extractor) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>::New(
-      env, const_cast<SherpaOnnxSpeakerEmbeddingExtractor *>(extractor),
-      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) {
-        SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor);
-      });
-}
-
-static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "Argument 0 should be a speaker embedding extractor pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
-
-  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
-
-  return Napi::Number::New(env, dim);
-}
-
-static Napi::External<SherpaOnnxOnlineStream>
-SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding extractor "
-                         "pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
-
-  const SherpaOnnxOnlineStream *stream =
-      SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor);
-
-  return Napi::External<SherpaOnnxOnlineStream>::New(
-      env, const_cast<SherpaOnnxOnlineStream *>(stream),
-      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
-        SherpaOnnxDestroyOnlineStream(stream);
-      });
-}
-
-static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "Argument 0 should be a speaker embedding extractor pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  int32_t is_ready =
-      SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream);
-
-  return Napi::Boolean::New(env, is_ready);
-}
-
-static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2 && info.Length() != 3) {
-    std::ostringstream os;
-    os << "Expect only 2 or 3 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "Argument 0 should be a speaker embedding extractor pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  bool enable_external_buffer = true;
-  if (info.Length() == 3) {
-    if (info[2].IsBoolean()) {
-      enable_external_buffer = info[2].As<Napi::Boolean>().Value();
-    } else {
-      Napi::TypeError::New(env, "Argument 2 should be a boolean.")
-          .ThrowAsJavaScriptException();
-    }
-  }
-
-  SherpaOnnxSpeakerEmbeddingExtractor *extractor =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  const float *v =
-      SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream);
-
-  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
-
-  if (enable_external_buffer) {
-    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
-        env, const_cast<float *>(v), sizeof(float) * dim,
-        [](Napi::Env /*env*/, void *data) {
-          SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
-              reinterpret_cast<float *>(data));
-        });
-
-    return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
-  } else {
-    // don't use external buffer
-    Napi::ArrayBuffer arrayBuffer =
-        Napi::ArrayBuffer::New(env, sizeof(float) * dim);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, dim, arrayBuffer, 0);
-
-    std::copy(v, v + dim, float32Array.Data());
-
-    SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v);
-
-    return float32Array;
-  }
-}
-
-static Napi::External<SherpaOnnxSpeakerEmbeddingManager>
-CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsNumber()) {
-    Napi::TypeError::New(env,
-                         "You should pass an integer as the only argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  int32_t dim = info[0].As<Napi::Number>().Int32Value();
-
-  const SherpaOnnxSpeakerEmbeddingManager *manager =
-      SherpaOnnxCreateSpeakerEmbeddingManager(dim);
-
-  if (!manager) {
-    Napi::TypeError::New(env, "Please check your input dim!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxSpeakerEmbeddingManager>::New(
-      env, const_cast<SherpaOnnxSpeakerEmbeddingManager *>(manager),
-      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) {
-        SherpaOnnxDestroySpeakerEmbeddingManager(manager);
-      });
-}
-
-static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("v")) {
-    Napi::TypeError::New(env, "The argument object should have a field v")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("v").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['v'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("name")) {
-    Napi::TypeError::New(env, "The argument object should have a field name")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("name").IsString()) {
-    Napi::TypeError::New(env, "The object['name'] should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
-  Napi::String js_name = obj.Get("name").As<Napi::String>();
-  std::string name = js_name.Utf8Value();
-
-  int32_t ok =
-      SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data());
-  return Napi::Boolean::New(env, ok);
-}
-
-static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("vv")) {
-    Napi::TypeError::New(env, "The argument object should have a field vv")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("vv").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['vv'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("name")) {
-    Napi::TypeError::New(env, "The argument object should have a field name")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("name").IsString()) {
-    Napi::TypeError::New(env, "The object['name'] should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("n")) {
-    Napi::TypeError::New(env, "The argument object should have a field n")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("n").IsNumber()) {
-    Napi::TypeError::New(env, "The object['n'] should be an integer")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Float32Array v = obj.Get("vv").As<Napi::Float32Array>();
-  Napi::String js_name = obj.Get("name").As<Napi::String>();
-  int32_t n = obj.Get("n").As<Napi::Number>().Int32Value();
-
-  std::string name = js_name.Utf8Value();
-
-  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
-      manager, name.c_str(), v.Data(), n);
-
-  return Napi::Boolean::New(env, ok);
-}
-
-static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsString()) {
-    Napi::TypeError::New(env, "Argument 1 should be string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::String js_name = info[1].As<Napi::String>();
-  std::string name = js_name.Utf8Value();
-
-  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str());
-
-  return Napi::Boolean::New(env, ok);
-}
-
-static Napi::String SpeakerEmbeddingManagerSearchWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("v")) {
-    Napi::TypeError::New(env, "The argument object should have a field v")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("v").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['v'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("threshold")) {
-    Napi::TypeError::New(env,
-                         "The argument object should have a field threshold")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("threshold").IsNumber()) {
-    Napi::TypeError::New(env, "The object['threshold'] should be a float")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
-  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
-
-  const char *name =
-      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold);
-  const char *p = name;
-  if (!p) {
-    p = "";
-  }
-
-  Napi::String js_name = Napi::String::New(env, p);
-  SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name);
-
-  return js_name;
-}
-
-static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("v")) {
-    Napi::TypeError::New(env, "The argument object should have a field v")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("v").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['v'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("threshold")) {
-    Napi::TypeError::New(env,
-                         "The argument object should have a field threshold")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("threshold").IsNumber()) {
-    Napi::TypeError::New(env, "The object['threshold'] should be a float")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("name")) {
-    Napi::TypeError::New(env, "The argument object should have a field name")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("name").IsString()) {
-    Napi::TypeError::New(env, "The object['name'] should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
-  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
-
-  Napi::String js_name = obj.Get("name").As<Napi::String>();
-  std::string name = js_name.Utf8Value();
-
-  int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(),
-                                                          v.Data(), threshold);
-
-  return Napi::Boolean::New(env, found);
-}
-
-static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsString()) {
-    Napi::TypeError::New(env, "Argument 1 should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  Napi::String js_name = info[1].As<Napi::String>();
-  std::string name = js_name.Utf8Value();
-
-  int32_t exists =
-      SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str());
-
-  return Napi::Boolean::New(env, exists);
-}
-
-static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
-
-  return Napi::Number::New(env, num_speakers);
-}
-
-static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "You should pass a speaker embedding manager pointer "
-                         "as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpeakerEmbeddingManager *manager =
-      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
-
-  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
-  if (num_speakers == 0) {
-    return {};
-  }
-
-  const char *const *all_speaker_names =
-      SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
-
-  Napi::Array ans = Napi::Array::New(env, num_speakers);
-  for (uint32_t i = 0; i != num_speakers; ++i) {
-    ans[i] = Napi::String::New(env, all_speaker_names[i]);
-  }
-  SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names);
-  return ans;
-}
-
-void InitSpeakerID(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"),
-              Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"),
-              Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"),
-      Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingExtractorIsReady"),
-      Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"),
-      Napi::Function::New(env,
-                          SpeakerEmbeddingExtractorComputeEmbeddingWrapper));
-
-  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"),
-              Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"),
-              Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"),
-      Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"),
-              Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"),
-              Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"),
-              Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper));
-
-  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"),
-              Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"),
-      Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"),
-      Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper));
-}
diff --git a/scripts/node-addon-api/src/speaker-identification.cc b/scripts/node-addon-api/src/speaker-identification.cc
new file mode 120000
index 0000000000..f83455f6b3
--- /dev/null
+++ b/scripts/node-addon-api/src/speaker-identification.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/spoken-language-identification.cc b/scripts/node-addon-api/src/spoken-language-identification.cc
deleted file mode 100644
index 35ade6541c..0000000000
--- a/scripts/node-addon-api/src/spoken-language-identification.cc
+++ /dev/null
@@ -1,188 +0,0 @@
-// scripts/node-addon-api/src/spoken-language-identification.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static SherpaOnnxSpokenLanguageIdentificationWhisperConfig
-GetSpokenLanguageIdentificationWhisperConfig(Napi::Object obj) {
-  SherpaOnnxSpokenLanguageIdentificationWhisperConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("whisper").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxSpokenLanguageIdentification>
-CreateSpokenLanguageIdentificationWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "You should pass an object as the only argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxSpokenLanguageIdentificationConfig c;
-  memset(&c, 0, sizeof(c));
-  c.whisper = GetSpokenLanguageIdentificationWhisperConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  const SherpaOnnxSpokenLanguageIdentification *slid =
-      SherpaOnnxCreateSpokenLanguageIdentification(&c);
-
-  if (c.whisper.encoder) {
-    delete[] c.whisper.encoder;
-  }
-
-  if (c.whisper.decoder) {
-    delete[] c.whisper.decoder;
-  }
-
-  if (c.provider) {
-    delete[] c.provider;
-  }
-
-  if (!slid) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxSpokenLanguageIdentification>::New(
-      env, const_cast<SherpaOnnxSpokenLanguageIdentification *>(slid),
-      [](Napi::Env env, SherpaOnnxSpokenLanguageIdentification *slid) {
-        SherpaOnnxDestroySpokenLanguageIdentification(slid);
-      });
-}
-
-static Napi::External<SherpaOnnxOfflineStream>
-SpokenLanguageIdentificationCreateOfflineStreamWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env,
-        "You should pass an offline language ID pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpokenLanguageIdentification *slid =
-      info[0]
-          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
-          .Data();
-
-  SherpaOnnxOfflineStream *stream =
-      SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
-
-  return Napi::External<SherpaOnnxOfflineStream>::New(
-      env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
-        SherpaOnnxDestroyOfflineStream(stream);
-      });
-}
-
-static Napi::String SpokenLanguageIdentificationComputeWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env, "Argument 0 should be an offline spoken language ID pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxSpokenLanguageIdentification *slid =
-      info[0]
-          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
-          .Data();
-
-  SherpaOnnxOfflineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
-
-  const SherpaOnnxSpokenLanguageIdentificationResult *r =
-      SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
-
-  std::string lang = r->lang;
-  SherpaOnnxDestroySpokenLanguageIdentificationResult(r);
-
-  return Napi::String::New(env, lang);
-}
-
-void InitSpokenLanguageID(Napi::Env env, Napi::Object exports) {
-  exports.Set(
-      Napi::String::New(env, "createSpokenLanguageIdentification"),
-      Napi::Function::New(env, CreateSpokenLanguageIdentificationWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "createSpokenLanguageIdentificationOfflineStream"),
-      Napi::Function::New(
-          env, SpokenLanguageIdentificationCreateOfflineStreamWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "spokenLanguageIdentificationCompute"),
-      Napi::Function::New(env, SpokenLanguageIdentificationComputeWrapper));
-}
diff --git a/scripts/node-addon-api/src/spoken-language-identification.cc b/scripts/node-addon-api/src/spoken-language-identification.cc
new file mode 120000
index 0000000000..39c77df755
--- /dev/null
+++ b/scripts/node-addon-api/src/spoken-language-identification.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/streaming-asr.cc b/scripts/node-addon-api/src/streaming-asr.cc
deleted file mode 100644
index 6057cade30..0000000000
--- a/scripts/node-addon-api/src/streaming-asr.cc
+++ /dev/null
@@ -1,705 +0,0 @@
-// scripts/node-addon-api/src/streaming-asr.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-/*
-{
-  'featConfig': {
-    'sampleRate': 16000,
-    'featureDim': 80,
-  }
-};
- */
-SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj) {
-  SherpaOnnxFeatureConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("featConfig") || !obj.Get("featConfig").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("featConfig").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(feature_dim, featureDim);
-
-  return c;
-}
-/*
-{
-  'transducer': {
-    'encoder': './encoder.onnx',
-    'decoder': './decoder.onnx',
-    'joiner': './joiner.onnx',
-  }
-}
- */
-
-static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOnlineTransducerModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("transducer").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);
-
-  return c;
-}
-
-static SherpaOnnxOnlineZipformer2CtcModelConfig
-GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
-  SherpaOnnxOnlineZipformer2CtcModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-
-  return c;
-}
-
-static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
-    Napi::Object obj) {
-  SherpaOnnxOnlineParaformerModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
-
-  return c;
-}
-
-SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
-  SherpaOnnxOnlineModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
-
-  c.transducer = GetOnlineTransducerModelConfig(o);
-  c.paraformer = GetOnlineParaformerModelConfig(o);
-  c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens_buf, tokensBuf);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(tokens_buf_size, tokensBufSize);
-
-  return c;
-}
-
-static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
-    Napi::Object obj) {
-  SherpaOnnxOnlineCtcFstDecoderConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("ctcFstDecoderConfig") ||
-      !obj.Get("ctcFstDecoderConfig").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(graph, graph);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active, maxActive);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env, "Expect an object as the argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-  SherpaOnnxOnlineRecognizerConfig c;
-  memset(&c, 0, sizeof(c));
-  c.feat_config = GetFeatureConfig(o);
-  c.model_config = GetOnlineModelConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
-
-  // enableEndpoint can be either a boolean or an integer
-  if (o.Has("enableEndpoint") && (o.Get("enableEndpoint").IsNumber() ||
-                                  o.Get("enableEndpoint").IsBoolean())) {
-    if (o.Get("enableEndpoint").IsNumber()) {
-      c.enable_endpoint =
-          o.Get("enableEndpoint").As<Napi::Number>().Int32Value();
-    } else {
-      c.enable_endpoint = o.Get("enableEndpoint").As<Napi::Boolean>().Value();
-    }
-  }
-
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule1_min_trailing_silence,
-                                rule1MinTrailingSilence);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule2_min_trailing_silence,
-                                rule2MinTrailingSilence);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule3_min_utterance_length,
-                                rule3MinUtteranceLength);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_buf, hotwordsBuf);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(hotwords_buf_size, hotwordsBufSize);
-
-  c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
-
-  SherpaOnnxOnlineRecognizer *recognizer = SherpaOnnxCreateOnlineRecognizer(&c);
-
-  if (c.model_config.transducer.encoder) {
-    delete[] c.model_config.transducer.encoder;
-  }
-
-  if (c.model_config.transducer.decoder) {
-    delete[] c.model_config.transducer.decoder;
-  }
-
-  if (c.model_config.transducer.joiner) {
-    delete[] c.model_config.transducer.joiner;
-  }
-
-  if (c.model_config.paraformer.encoder) {
-    delete[] c.model_config.paraformer.encoder;
-  }
-
-  if (c.model_config.paraformer.decoder) {
-    delete[] c.model_config.paraformer.decoder;
-  }
-
-  if (c.model_config.zipformer2_ctc.model) {
-    delete[] c.model_config.zipformer2_ctc.model;
-  }
-
-  if (c.model_config.tokens) {
-    delete[] c.model_config.tokens;
-  }
-
-  if (c.model_config.provider) {
-    delete[] c.model_config.provider;
-  }
-
-  if (c.model_config.model_type) {
-    delete[] c.model_config.model_type;
-  }
-
-  if (c.model_config.modeling_unit) {
-    delete[] c.model_config.modeling_unit;
-  }
-
-  if (c.model_config.bpe_vocab) {
-    delete[] c.model_config.bpe_vocab;
-  }
-
-  if (c.model_config.tokens_buf) {
-    delete[] c.model_config.tokens_buf;
-  }
-
-  if (c.decoding_method) {
-    delete[] c.decoding_method;
-  }
-
-  if (c.hotwords_file) {
-    delete[] c.hotwords_file;
-  }
-
-  if (c.rule_fsts) {
-    delete[] c.rule_fsts;
-  }
-
-  if (c.rule_fars) {
-    delete[] c.rule_fars;
-  }
-
-  if (c.hotwords_buf) {
-    delete[] c.hotwords_buf;
-  }
-
-  if (c.ctc_fst_decoder_config.graph) {
-    delete[] c.ctc_fst_decoder_config.graph;
-  }
-
-  if (!recognizer) {
-    Napi::TypeError::New(env, "Please check your config!")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  return Napi::External<SherpaOnnxOnlineRecognizer>::New(
-      env, recognizer,
-      [](Napi::Env env, SherpaOnnxOnlineRecognizer *recognizer) {
-        SherpaOnnxDestroyOnlineRecognizer(recognizer);
-      });
-}
-
-static Napi::External<SherpaOnnxOnlineStream> CreateOnlineStreamWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(
-        env,
-        "You should pass an online recognizer pointer as the only argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream = SherpaOnnxCreateOnlineStream(recognizer);
-
-  return Napi::External<SherpaOnnxOnlineStream>::New(
-      env, stream, [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
-        SherpaOnnxDestroyOnlineStream(stream);
-      });
-}
-
-static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOnlineStream *stream =
-      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("samples")) {
-    Napi::TypeError::New(env, "The argument object should have a field samples")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Get("samples").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Has("sampleRate")) {
-    Napi::TypeError::New(env,
-                         "The argument object should have a field sampleRate")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!obj.Get("sampleRate").IsNumber()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a number")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
-  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
-
-  SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples.Data(),
-                                       samples.ElementLength());
-}
-
-static Napi::Boolean IsOnlineStreamReadyWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an online recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream);
-
-  return Napi::Boolean::New(env, is_ready);
-}
-
-static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an online recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  SherpaOnnxDecodeOnlineStream(recognizer, stream);
-}
-
-static Napi::String GetOnlineStreamResultAsJsonWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an online recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream);
-  Napi::String s = Napi::String::New(env, json);
-
-  SherpaOnnxDestroyOnlineStreamResultJson(json);
-
-  return s;
-}
-
-static void InputFinishedWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOnlineStream *stream =
-      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  SherpaOnnxOnlineStreamInputFinished(stream);
-}
-
-static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an online recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  SherpaOnnxOnlineStreamReset(recognizer, stream);
-}
-
-static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env,
-                         "Argument 0 should be an online recognizer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxOnlineRecognizer *recognizer =
-      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();
-
-  SherpaOnnxOnlineStream *stream =
-      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
-
-  int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream);
-
-  return Napi::Boolean::New(env, is_endpoint);
-}
-
-static Napi::External<SherpaOnnxDisplay> CreateDisplayWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsNumber()) {
-    Napi::TypeError::New(env, "Expect a number as the argument")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-  int32_t max_word_per_line = info[0].As<Napi::Number>().Int32Value();
-
-  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(max_word_per_line);
-
-  return Napi::External<SherpaOnnxDisplay>::New(
-      env, const_cast<SherpaOnnxDisplay *>(display),
-      [](Napi::Env env, SherpaOnnxDisplay *display) {
-        SherpaOnnxDestroyDisplay(display);
-      });
-}
-
-static void PrintWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 3) {
-    std::ostringstream os;
-    os << "Expect only 3 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[1].IsNumber()) {
-    Napi::TypeError::New(env, "Argument 1 should be a number.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[2].IsString()) {
-    Napi::TypeError::New(env, "Argument 2 should be a string.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxDisplay *display =
-      info[0].As<Napi::External<SherpaOnnxDisplay>>().Data();
-
-  int32_t idx = info[1].As<Napi::Number>().Int32Value();
-
-  Napi::String text = info[2].As<Napi::String>();
-  std::string s = text.Utf8Value();
-  SherpaOnnxPrint(display, idx, s.c_str());
-}
-
-void InitStreamingAsr(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createOnlineRecognizer"),
-              Napi::Function::New(env, CreateOnlineRecognizerWrapper));
-
-  exports.Set(Napi::String::New(env, "createOnlineStream"),
-              Napi::Function::New(env, CreateOnlineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "acceptWaveformOnline"),
-              Napi::Function::New(env, AcceptWaveformWrapper));
-
-  exports.Set(Napi::String::New(env, "isOnlineStreamReady"),
-              Napi::Function::New(env, IsOnlineStreamReadyWrapper));
-
-  exports.Set(Napi::String::New(env, "decodeOnlineStream"),
-              Napi::Function::New(env, DecodeOnlineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "getOnlineStreamResultAsJson"),
-              Napi::Function::New(env, GetOnlineStreamResultAsJsonWrapper));
-
-  exports.Set(Napi::String::New(env, "inputFinished"),
-              Napi::Function::New(env, InputFinishedWrapper));
-
-  exports.Set(Napi::String::New(env, "reset"),
-              Napi::Function::New(env, ResetOnlineStreamWrapper));
-
-  exports.Set(Napi::String::New(env, "isEndpoint"),
-              Napi::Function::New(env, IsEndpointWrapper));
-
-  exports.Set(Napi::String::New(env, "createDisplay"),
-              Napi::Function::New(env, CreateDisplayWrapper));
-
-  exports.Set(Napi::String::New(env, "print"),
-              Napi::Function::New(env, PrintWrapper));
-}
diff --git a/scripts/node-addon-api/src/streaming-asr.cc b/scripts/node-addon-api/src/streaming-asr.cc
new file mode 120000
index 0000000000..a3d3201fb1
--- /dev/null
+++ b/scripts/node-addon-api/src/streaming-asr.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/vad.cc b/scripts/node-addon-api/src/vad.cc
deleted file mode 100644
index eaed2aeea1..0000000000
--- a/scripts/node-addon-api/src/vad.cc
+++ /dev/null
@@ -1,668 +0,0 @@
-// scripts/node-addon-api/src/vad.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-
-#include <algorithm>
-#include <sstream>
-
-#include "macros.h"  // NOLINT
-#include "napi.h"    // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsNumber()) {
-    Napi::TypeError::New(env, "You should pass an integer as the argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());
-
-  return Napi::External<SherpaOnnxCircularBuffer>::New(
-      env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
-        SherpaOnnxDestroyCircularBuffer(p);
-      });
-}
-
-static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  if (!info[1].IsTypedArray()) {
-    Napi::TypeError::New(env, "Argument 1 should be a Float32Array.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Float32Array data = info[1].As<Napi::Float32Array>();
-  SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
-}
-
-// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md
-// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc
-static Napi::Float32Array CircularBufferGetWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 3 && info.Length() != 4) {
-    std::ostringstream os;
-    os << "Expect only 3 or 4 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  if (!info[1].IsNumber()) {
-    Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[2].IsNumber()) {
-    Napi::TypeError::New(env, "Argument 2 should be an integer (n).")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  bool enable_external_buffer = true;
-  if (info.Length() == 4) {
-    if (info[3].IsBoolean()) {
-      enable_external_buffer = info[3].As<Napi::Boolean>().Value();
-    } else {
-      Napi::TypeError::New(env, "Argument 3 should be a boolean.")
-          .ThrowAsJavaScriptException();
-    }
-  }
-
-  int32_t start_index = info[1].As<Napi::Number>().Int32Value();
-  int32_t n = info[2].As<Napi::Number>().Int32Value();
-
-  const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n);
-
-  if (enable_external_buffer) {
-    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
-        env, const_cast<float *>(data), sizeof(float) * n,
-        [](Napi::Env /*env*/, void *p) {
-          SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p));
-        });
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, n, arrayBuffer, 0);
-
-    return float32Array;
-  } else {
-    // don't use external buffer
-    Napi::ArrayBuffer arrayBuffer =
-        Napi::ArrayBuffer::New(env, sizeof(float) * n);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, n, arrayBuffer, 0);
-
-    std::copy(data, data + n, float32Array.Data());
-
-    SherpaOnnxCircularBufferFree(data);
-
-    return float32Array;
-  }
-}
-
-static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  if (!info[1].IsNumber()) {
-    Napi::TypeError::New(env, "Argument 1 should be an integer (n).")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  int32_t n = info[1].As<Napi::Number>().Int32Value();
-
-  SherpaOnnxCircularBufferPop(buf, n);
-}
-
-static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  int32_t size = SherpaOnnxCircularBufferSize(buf);
-
-  return Napi::Number::New(env, size);
-}
-
-static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  int32_t size = SherpaOnnxCircularBufferHead(buf);
-
-  return Napi::Number::New(env, size);
-}
-
-static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxCircularBuffer *buf =
-      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();
-
-  SherpaOnnxCircularBufferReset(buf);
-}
-
-static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
-    const Napi::Object &obj) {
-  SherpaOnnxSileroVadModelConfig c;
-  memset(&c, 0, sizeof(c));
-
-  if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) {
-    return c;
-  }
-
-  Napi::Object o = obj.Get("sileroVad").As<Napi::Object>();
-  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
-  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);
-
-  return c;
-}
-
-static Napi::External<SherpaOnnxVoiceActivityDetector>
-CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsObject()) {
-    Napi::TypeError::New(env,
-                         "You should pass an object as the first argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsNumber()) {
-    Napi::TypeError::New(env,
-                         "You should pass an integer as the second argument.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object o = info[0].As<Napi::Object>();
-
-  SherpaOnnxVadModelConfig c;
-  memset(&c, 0, sizeof(c));
-  c.silero_vad = GetSileroVadConfig(o);
-
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
-  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
-  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
-
-  if (o.Has("debug") &&
-      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
-    if (o.Get("debug").IsBoolean()) {
-      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
-    } else {
-      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
-    }
-  }
-
-  float buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
-
-  if (c.silero_vad.model) {
-    delete[] c.silero_vad.model;
-  }
-
-  if (c.provider) {
-    delete[] c.provider;
-  }
-
-  return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
-      env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
-        SherpaOnnxDestroyVoiceActivityDetector(p);
-      });
-}
-
-static void VoiceActivityDetectorAcceptWaveformWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  if (!info[1].IsTypedArray()) {
-    Napi::TypeError::New(
-        env, "Argument 1 should be a Float32Array containing samples")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
-
-  SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
-                                                samples.ElementLength());
-}
-
-static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);
-
-  return Napi::Boolean::New(env, is_empty);
-}
-
-static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);
-
-  return Napi::Boolean::New(env, is_detected);
-}
-
-static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  SherpaOnnxVoiceActivityDetectorPop(vad);
-}
-
-static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  SherpaOnnxVoiceActivityDetectorClear(vad);
-}
-
-static Napi::Object VoiceActivityDetectorFrontWrapper(
-    const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1 && info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 1 or 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  bool enable_external_buffer = true;
-  if (info.Length() == 2) {
-    if (info[1].IsBoolean()) {
-      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
-    } else {
-      Napi::TypeError::New(env, "Argument 1 should be a boolean.")
-          .ThrowAsJavaScriptException();
-    }
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  const SherpaOnnxSpeechSegment *segment =
-      SherpaOnnxVoiceActivityDetectorFront(vad);
-
-  if (enable_external_buffer) {
-    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
-        env, const_cast<float *>(segment->samples), sizeof(float) * segment->n,
-        [](Napi::Env /*env*/, void * /*data*/,
-           const SherpaOnnxSpeechSegment *hint) {
-          SherpaOnnxDestroySpeechSegment(hint);
-        },
-        segment);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
-
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set(Napi::String::New(env, "start"), segment->start);
-    obj.Set(Napi::String::New(env, "samples"), float32Array);
-
-    return obj;
-  } else {
-    Napi::ArrayBuffer arrayBuffer =
-        Napi::ArrayBuffer::New(env, sizeof(float) * segment->n);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);
-
-    std::copy(segment->samples, segment->samples + segment->n,
-              float32Array.Data());
-
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set(Napi::String::New(env, "start"), segment->start);
-    obj.Set(Napi::String::New(env, "samples"), float32Array);
-
-    SherpaOnnxDestroySpeechSegment(segment);
-
-    return obj;
-  }
-}
-
-static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  SherpaOnnxVoiceActivityDetectorReset(vad);
-}
-
-static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 1) {
-    std::ostringstream os;
-    os << "Expect only 1 argument. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  if (!info[0].IsExternal()) {
-    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
-        .ThrowAsJavaScriptException();
-
-    return;
-  }
-
-  SherpaOnnxVoiceActivityDetector *vad =
-      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
-
-  SherpaOnnxVoiceActivityDetectorFlush(vad);
-}
-
-void InitVad(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "createCircularBuffer"),
-              Napi::Function::New(env, CreateCircularBufferWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferPush"),
-              Napi::Function::New(env, CircularBufferPushWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferGet"),
-              Napi::Function::New(env, CircularBufferGetWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferPop"),
-              Napi::Function::New(env, CircularBufferPopWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferSize"),
-              Napi::Function::New(env, CircularBufferSizeWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferHead"),
-              Napi::Function::New(env, CircularBufferHeadWrapper));
-
-  exports.Set(Napi::String::New(env, "circularBufferReset"),
-              Napi::Function::New(env, CircularBufferResetWrapper));
-
-  exports.Set(Napi::String::New(env, "createVoiceActivityDetector"),
-              Napi::Function::New(env, CreateVoiceActivityDetectorWrapper));
-
-  exports.Set(
-      Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"),
-      Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"),
-              Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"),
-              Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"),
-              Napi::Function::New(env, VoiceActivityDetectorPopWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"),
-              Napi::Function::New(env, VoiceActivityDetectorClearWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"),
-              Napi::Function::New(env, VoiceActivityDetectorFrontWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
-              Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
-
-  exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
-              Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
-}
diff --git a/scripts/node-addon-api/src/vad.cc b/scripts/node-addon-api/src/vad.cc
new file mode 120000
index 0000000000..5650274b8f
--- /dev/null
+++ b/scripts/node-addon-api/src/vad.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/wave-reader.cc b/scripts/node-addon-api/src/wave-reader.cc
deleted file mode 100644
index 874f61babb..0000000000
--- a/scripts/node-addon-api/src/wave-reader.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// scripts/node-addon-api/src/wave-reader.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-
-#include <algorithm>
-#include <sstream>
-
-#include "napi.h"  // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-  if (info.Length() > 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsString()) {
-    Napi::TypeError::New(env, "Argument 0 should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  std::string filename = info[0].As<Napi::String>().Utf8Value();
-
-  bool enable_external_buffer = true;
-  if (info.Length() == 2) {
-    if (info[1].IsBoolean()) {
-      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
-    } else {
-      Napi::TypeError::New(env, "Argument 1 should be a boolean")
-          .ThrowAsJavaScriptException();
-
-      return {};
-    }
-  }
-
-  const SherpaOnnxWave *wave = SherpaOnnxReadWave(filename.c_str());
-  if (!wave) {
-    std::ostringstream os;
-    os << "Failed to read '" << filename << "'";
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (enable_external_buffer) {
-    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
-        env, const_cast<float *>(wave->samples),
-        sizeof(float) * wave->num_samples,
-        [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) {
-          SherpaOnnxFreeWave(hint);
-        },
-        wave);
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
-
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set(Napi::String::New(env, "samples"), float32Array);
-    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
-    return obj;
-  } else {
-    // don't use external buffer
-    Napi::ArrayBuffer arrayBuffer =
-        Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples);
-
-    Napi::Float32Array float32Array =
-        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);
-
-    std::copy(wave->samples, wave->samples + wave->num_samples,
-              float32Array.Data());
-
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set(Napi::String::New(env, "samples"), float32Array);
-    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
-
-    SherpaOnnxFreeWave(wave);
-
-    return obj;
-  }
-}
-
-void InitWaveReader(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "readWave"),
-              Napi::Function::New(env, ReadWaveWrapper));
-}
diff --git a/scripts/node-addon-api/src/wave-reader.cc b/scripts/node-addon-api/src/wave-reader.cc
new file mode 120000
index 0000000000..839b562d0b
--- /dev/null
+++ b/scripts/node-addon-api/src/wave-reader.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc
\ No newline at end of file
diff --git a/scripts/node-addon-api/src/wave-writer.cc b/scripts/node-addon-api/src/wave-writer.cc
deleted file mode 100644
index 3ade695a08..0000000000
--- a/scripts/node-addon-api/src/wave-writer.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// scripts/node-addon-api/src/wave-writer.cc
-//
-// Copyright (c)  2024  Xiaomi Corporation
-
-#include <sstream>
-
-#include "napi.h"  // NOLINT
-#include "sherpa-onnx/c-api/c-api.h"
-
-// (filename, {samples: samples, sampleRate: sampleRate}
-static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
-  Napi::Env env = info.Env();
-
-  if (info.Length() != 2) {
-    std::ostringstream os;
-    os << "Expect only 2 arguments. Given: " << info.Length();
-
-    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[0].IsString()) {
-    Napi::TypeError::New(env, "Argument 0 should be a string")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!info[1].IsObject()) {
-    Napi::TypeError::New(env, "Argument 1 should be an object")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Object obj = info[1].As<Napi::Object>();
-
-  if (!obj.Has("samples")) {
-    Napi::TypeError::New(env, "The argument object should have a field samples")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("samples").IsTypedArray()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Has("sampleRate")) {
-    Napi::TypeError::New(env,
-                         "The argument object should have a field sampleRate")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  if (!obj.Get("sampleRate").IsNumber()) {
-    Napi::TypeError::New(env, "The object['samples'] should be a number")
-        .ThrowAsJavaScriptException();
-
-    return {};
-  }
-
-  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
-  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
-
-  int32_t ok =
-      SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
-                          info[0].As<Napi::String>().Utf8Value().c_str());
-
-  return Napi::Boolean::New(env, ok);
-}
-
-void InitWaveWriter(Napi::Env env, Napi::Object exports) {
-  exports.Set(Napi::String::New(env, "writeWave"),
-              Napi::Function::New(env, WriteWaveWrapper));
-}
diff --git a/scripts/node-addon-api/src/wave-writer.cc b/scripts/node-addon-api/src/wave-writer.cc
new file mode 120000
index 0000000000..03c3818c30
--- /dev/null
+++ b/scripts/node-addon-api/src/wave-writer.cc
@@ -0,0 +1 @@
+../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc
\ No newline at end of file
diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js
index 3f0789edbb..b1b77841c1 100644
--- a/scripts/nodejs/index.js
+++ b/scripts/nodejs/index.js
@@ -7,6 +7,8 @@ const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
 const sherpa_onnx_kws = require('./sherpa-onnx-kws.js');
 const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
 const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
+const sherpa_onnx_speaker_diarization =
+    require('./sherpa-onnx-speaker-diarization.js');
 
 function createOnlineRecognizer(config) {
   return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
@@ -32,6 +34,11 @@ function createVad(config) {
   return sherpa_onnx_vad.createVad(wasmModule, config);
 }
 
+function createOfflineSpeakerDiarization(config) {
+  return sherpa_onnx_speaker_diarization.createOfflineSpeakerDiarization(
+      wasmModule, config);
+}
+
 function readWave(filename) {
   return sherpa_onnx_wave.readWave(filename, wasmModule);
 }
@@ -51,4 +58,5 @@ module.exports = {
   writeWave,
   createCircularBuffer,
   createVad,
+  createOfflineSpeakerDiarization,
 };
diff --git a/scripts/pyannote/segmentation/README.md b/scripts/pyannote/segmentation/README.md
index a2e35b2de4..a9c5230d1d 100644
--- a/scripts/pyannote/segmentation/README.md
+++ b/scripts/pyannote/segmentation/README.md
@@ -3,12 +3,9 @@
 Please download test wave files from
 https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
 
-## 0-two-speakers-zh.wav
+## 0-four-speakers-zh.wav
 
-This file is from
-https://www.modelscope.cn/models/iic/speech_campplus_speaker-diarization_common/file/view/master?fileName=examples%252F2speakers_example.wav&status=0
-
-Note that we have renamed it from `2speakers_example.wav` to `0-two-speakers-zh.wav`.
+It is recorded by @csukuangfj
 
 ## 1-two-speakers-en.wav
 
@@ -40,5 +37,5 @@ commands to convert it to `3-two-speakers-en.wav`
 
 
 ```bash
-sox ML16091-Audio.mp3 3-two-speakers-en.wav
+sox ML16091-Audio.mp3 -r 16k 3-two-speakers-en.wav
 ```
diff --git a/scripts/pyannote/segmentation/export-onnx.py b/scripts/pyannote/segmentation/export-onnx.py
index 5f6e79c7ee..e360f0e701 100755
--- a/scripts/pyannote/segmentation/export-onnx.py
+++ b/scripts/pyannote/segmentation/export-onnx.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
 
+import os
 from typing import Any, Dict
 
 import onnx
@@ -35,6 +37,8 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]):
 def main():
     # You can download ./pytorch_model.bin from
     # https://hf-mirror.com/csukuangfj/pyannote-models/tree/main/segmentation-3.0
+    # or from
+    # https://huggingface.co/Revai/reverb-diarization-v1/tree/main
     pt_filename = "./pytorch_model.bin"
     model = Model.from_pretrained(pt_filename)
     model.eval()
@@ -72,7 +76,7 @@ def main():
         model.receptive_field.duration * 16000
     )
 
-    opset_version = 18
+    opset_version = 13
 
     filename = "model.onnx"
     torch.onnx.export(
@@ -94,6 +98,22 @@ def main():
     receptive_field_size = int(model.receptive_field.duration * 16000)
     receptive_field_shift = int(model.receptive_field.step * 16000)
 
+    is_revai = os.getenv("SHERPA_ONNX_IS_REVAI", "")
+    if is_revai == "":
+        url_1 = "https://huggingface.co/pyannote/segmentation-3.0"
+        url_2 = "https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0"
+        license_url = (
+            "https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE"
+        )
+        model_author = "pyannote-audio"
+    else:
+        url_1 = "https://huggingface.co/Revai/reverb-diarization-v1"
+        url_2 = "https://huggingface.co/csukuangfj/sherpa-onnx-reverb-diarization-v1"
+        license_url = (
+            "https://huggingface.co/Revai/reverb-diarization-v1/blob/main/LICENSE"
+        )
+        model_author = "Revai"
+
     meta_data = {
         "num_speakers": len(model.specifications.classes),
         "powerset_max_classes": model.specifications.powerset_max_classes,
@@ -104,11 +124,11 @@ def main():
         "receptive_field_shift": receptive_field_shift,
         "model_type": "pyannote-segmentation-3.0",
         "version": "1",
-        "model_author": "pyannote",
+        "model_author": model_author,
         "maintainer": "k2-fsa",
-        "url_1": "https://huggingface.co/pyannote/segmentation-3.0",
-        "url_2": "https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0",
-        "license": "https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE",
+        "url_1": url_1,
+        "url_2": url_2,
+        "license": license_url,
     }
     add_meta_data(filename=filename, meta_data=meta_data)
 
diff --git a/scripts/pyannote/segmentation/preprocess.sh b/scripts/pyannote/segmentation/preprocess.sh
index 703420b158..aa423ee088 100755
--- a/scripts/pyannote/segmentation/preprocess.sh
+++ b/scripts/pyannote/segmentation/preprocess.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
 
 
 python3 -m onnxruntime.quantization.preprocess --input model.onnx --output tmp.preprocessed.onnx
diff --git a/scripts/pyannote/segmentation/run-revai.sh b/scripts/pyannote/segmentation/run-revai.sh
new file mode 100755
index 0000000000..61f4fec29f
--- /dev/null
+++ b/scripts/pyannote/segmentation/run-revai.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+export SHERPA_ONNX_IS_REVAI=1
+
+set -ex
+function install_pyannote() {
+  pip install pyannote.audio onnx onnxruntime
+}
+
+function download_test_files() {
+  curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/pytorch_model.bin
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
+}
+
+install_pyannote
+download_test_files
+
+./export-onnx.py
+./preprocess.sh
+
+echo "----------torch----------"
+./vad-torch.py
+
+echo "----------onnx model.onnx----------"
+./vad-onnx.py --model ./model.onnx --wav ./lei-jun-test.wav
+
+echo "----------onnx model.int8.onnx----------"
+./vad-onnx.py --model ./model.int8.onnx --wav ./lei-jun-test.wav
+
+curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/LICENSE
+
+cat >README.md << EOF
+# Introduction
+
+Models in this file are converted from
+https://huggingface.co/Revai/reverb-diarization-v1/tree/main
+
+Note that it is accessible under a non-commercial license.
+
+Please see ./LICENSE for details.
+
+See also
+https://www.rev.com/blog/speech-to-text-technology/introducing-reverb-open-source-asr-diarization
+
+EOF
+
+
diff --git a/scripts/pyannote/segmentation/speaker-diarization-torch.py b/scripts/pyannote/segmentation/speaker-diarization-torch.py
index 18a50ec08e..ac64bf4ce8 100755
--- a/scripts/pyannote/segmentation/speaker-diarization-torch.py
+++ b/scripts/pyannote/segmentation/speaker-diarization-torch.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
 
 """
 Please refer to
diff --git a/scripts/pyannote/segmentation/vad-onnx.py b/scripts/pyannote/segmentation/vad-onnx.py
index 417b8a842f..4a95f3bd0f 100755
--- a/scripts/pyannote/segmentation/vad-onnx.py
+++ b/scripts/pyannote/segmentation/vad-onnx.py
@@ -216,6 +216,8 @@ def main():
 
     is_active = classification[0] > onset
     start = None
+    if is_active:
+        start = 0
 
     scale = m.receptive_field_shift / m.sample_rate
     scale_offset = m.receptive_field_size / m.sample_rate * 0.5
diff --git a/scripts/sense-voice/export-onnx.py b/scripts/sense-voice/export-onnx.py
index 97d9a506ea..48b6863692 100755
--- a/scripts/sense-voice/export-onnx.py
+++ b/scripts/sense-voice/export-onnx.py
@@ -119,7 +119,7 @@ def display_params(params):
 
 
 def main():
-    model, params = SenseVoiceSmall.from_pretrained(model="iic/SenseVoiceSmall")
+    model, params = SenseVoiceSmall.from_pretrained(model="iic/SenseVoiceSmall", device="cpu")
     display_params(params)
 
     generate_tokens(params)
diff --git a/scripts/wasm/generate-vad-asr.py b/scripts/wasm/generate-vad-asr.py
index 4c0099af8e..6176e08408 100755
--- a/scripts/wasm/generate-vad-asr.py
+++ b/scripts/wasm/generate-vad-asr.py
@@ -51,6 +51,24 @@ def get_models():
             git diff
             """,
         ),
+        Model(
+            model_name="sherpa-onnx-moonshine-tiny-en-int8",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny",
+            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny",
+            short_name="vad-asr-en-moonshine_tiny",
+            cmd="""
+            pushd $model_name
+            mv -v preprocess.onnx ../moonshine-preprocessor.onnx
+            mv -v encode.int8.onnx ../moonshine-encoder.onnx
+            mv -v uncached_decode.int8.onnx ../moonshine-uncached-decoder.onnx
+            mv -v cached_decode.int8.onnx ../moonshine-cached-decoder.onnx
+            mv -v tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Moonshine tiny supporting English 英文/g' ../index.html
+            git diff
+            """,
+        ),
         Model(
             model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice",
diff --git a/sherpa-onnx/c-api/CMakeLists.txt b/sherpa-onnx/c-api/CMakeLists.txt
index d87f849aa4..9c8a597714 100644
--- a/sherpa-onnx/c-api/CMakeLists.txt
+++ b/sherpa-onnx/c-api/CMakeLists.txt
@@ -3,12 +3,25 @@ add_library(sherpa-onnx-c-api c-api.cc)
 target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)
 
 if(BUILD_SHARED_LIBS)
-  target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
-  target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
+  target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_BUILD_SHARED_LIBS=1)
+  target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_BUILD_MAIN_LIB=1)
 endif()
 
-install(TARGETS sherpa-onnx-c-api DESTINATION lib)
+add_library(sherpa-onnx-cxx-api cxx-api.cc)
+target_link_libraries(sherpa-onnx-cxx-api sherpa-onnx-c-api)
 
-install(FILES c-api.h
-  DESTINATION include/sherpa-onnx/c-api
+install(
+  TARGETS
+    sherpa-onnx-c-api
+    sherpa-onnx-cxx-api
+  DESTINATION
+    lib
+)
+
+install(
+  FILES
+    c-api.h
+    cxx-api.h
+  DESTINATION
+    include/sherpa-onnx/c-api
 )
diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc
index 176557c751..b6a2c9a8a1 100644
--- a/sherpa-onnx/c-api/c-api.cc
+++ b/sherpa-onnx/c-api/c-api.cc
@@ -5,11 +5,17 @@
 #include "sherpa-onnx/c-api/c-api.h"
 
 #include <algorithm>
+#include <cstring>
 #include <memory>
 #include <string>
+#include <strstream>
 #include <utility>
 #include <vector>
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/audio-tagging.h"
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/display.h"
@@ -18,6 +24,7 @@
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-punctuation.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
+#include "sherpa-onnx/csrc/online-punctuation.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
 #include "sherpa-onnx/csrc/resample.h"
 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -31,6 +38,10 @@
 #include "sherpa-onnx/csrc/offline-tts.h"
 #endif
 
+#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+#endif
+
 struct SherpaOnnxOnlineRecognizer {
   std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
 };
@@ -47,7 +58,7 @@ struct SherpaOnnxDisplay {
 
 #define SHERPA_ONNX_OR(x, y) (x ? x : y)
 
-SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
+static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
     const SherpaOnnxOnlineRecognizerConfig *config) {
   sherpa_onnx::OnlineRecognizerConfig recognizer_config;
 
@@ -142,9 +153,21 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
   recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
 
   if (config->model_config.debug) {
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str());
+#else
     SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
+#endif
   }
 
+  return recognizer_config;
+}
+
+const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
+    const SherpaOnnxOnlineRecognizerConfig *config) {
+  sherpa_onnx::OnlineRecognizerConfig recognizer_config =
+      GetOnlineRecognizerConfig(config);
+
   if (!recognizer_config.Validate()) {
     SHERPA_ONNX_LOGE("Errors in config!");
     return nullptr;
@@ -163,14 +186,14 @@ void SherpaOnnxDestroyOnlineRecognizer(
   delete recognizer;
 }
 
-SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
+const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
     const SherpaOnnxOnlineRecognizer *recognizer) {
   SherpaOnnxOnlineStream *stream =
       new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());
   return stream;
 }
 
-SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords(
+const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords(
     const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) {
   SherpaOnnxOnlineStream *stream =
       new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords));
@@ -343,27 +366,7 @@ struct SherpaOnnxOfflineStream {
       : impl(std::move(p)) {}
 };
 
-static sherpa_onnx::OfflineRecognizerConfig convertConfig(
-    const SherpaOnnxOfflineRecognizerConfig *config);
-
-SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer(
-    const SherpaOnnxOfflineRecognizerConfig *config) {
-  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
-      convertConfig(config);
-
-  if (!recognizer_config.Validate()) {
-    SHERPA_ONNX_LOGE("Errors in config");
-    return nullptr;
-  }
-
-  SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
-
-  recognizer->impl =
-      std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config);
-
-  return recognizer;
-}
-sherpa_onnx::OfflineRecognizerConfig convertConfig(
+static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
     const SherpaOnnxOfflineRecognizerConfig *config) {
   sherpa_onnx::OfflineRecognizerConfig recognizer_config;
 
@@ -445,6 +448,18 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
   recognizer_config.model_config.sense_voice.use_itn =
       config->model_config.sense_voice.use_itn;
 
+  recognizer_config.model_config.moonshine.preprocessor =
+      SHERPA_ONNX_OR(config->model_config.moonshine.preprocessor, "");
+
+  recognizer_config.model_config.moonshine.encoder =
+      SHERPA_ONNX_OR(config->model_config.moonshine.encoder, "");
+
+  recognizer_config.model_config.moonshine.uncached_decoder =
+      SHERPA_ONNX_OR(config->model_config.moonshine.uncached_decoder, "");
+
+  recognizer_config.model_config.moonshine.cached_decoder =
+      SHERPA_ONNX_OR(config->model_config.moonshine.cached_decoder, "");
+
   recognizer_config.lm_config.model =
       SHERPA_ONNX_OR(config->lm_config.model, "");
   recognizer_config.lm_config.scale =
@@ -470,26 +485,48 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
   recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
 
   if (config->model_config.debug) {
-    SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
+#endif
   }
 
   return recognizer_config;
 }
 
+const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer(
+    const SherpaOnnxOfflineRecognizerConfig *config) {
+  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
+      GetOfflineRecognizerConfig(config);
+
+  if (!recognizer_config.Validate()) {
+    SHERPA_ONNX_LOGE("Errors in config");
+    return nullptr;
+  }
+
+  SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
+
+  recognizer->impl =
+      std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config);
+
+  return recognizer;
+}
+
 void SherpaOnnxOfflineRecognizerSetConfig(
     const SherpaOnnxOfflineRecognizer *recognizer,
     const SherpaOnnxOfflineRecognizerConfig *config) {
   sherpa_onnx::OfflineRecognizerConfig recognizer_config =
-      convertConfig(config);
+      GetOfflineRecognizerConfig(config);
   recognizer->impl->SetConfig(recognizer_config);
 }
 
 void SherpaOnnxDestroyOfflineRecognizer(
-    SherpaOnnxOfflineRecognizer *recognizer) {
+    const SherpaOnnxOfflineRecognizer *recognizer) {
   delete recognizer;
 }
 
-SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
+const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
     const SherpaOnnxOfflineRecognizer *recognizer) {
   SherpaOnnxOfflineStream *stream =
       new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
@@ -513,8 +550,8 @@ void SherpaOnnxDecodeOfflineStream(
 }
 
 void SherpaOnnxDecodeMultipleOfflineStreams(
-    SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,
-    int32_t n) {
+    const SherpaOnnxOfflineRecognizer *recognizer,
+    const SherpaOnnxOfflineStream **streams, int32_t n) {
   std::vector<sherpa_onnx::OfflineStream *> ss(n);
   for (int32_t i = 0; i != n; ++i) {
     ss[i] = streams[i]->impl.get();
@@ -641,7 +678,7 @@ struct SherpaOnnxKeywordSpotter {
   std::unique_ptr<sherpa_onnx::KeywordSpotter> impl;
 };
 
-SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
+static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig(
     const SherpaOnnxKeywordSpotterConfig *config) {
   sherpa_onnx::KeywordSpotterConfig spotter_config;
 
@@ -702,10 +739,20 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
         std::string(config->keywords_buf, config->keywords_buf_size);
   }
 
-  if (config->model_config.debug) {
+  if (spotter_config.model_config.debug) {
+#if OHOS
+    SHERPA_ONNX_LOGE("%{public}s\n", spotter_config.ToString().c_str());
+#else
     SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str());
+#endif
   }
 
+  return spotter_config;
+}
+
+const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
+    const SherpaOnnxKeywordSpotterConfig *config) {
+  auto spotter_config = GetKeywordSpotterConfig(config);
   if (!spotter_config.Validate()) {
     SHERPA_ONNX_LOGE("Errors in config!");
     return nullptr;
@@ -718,37 +765,42 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
   return spotter;
 }
 
-void SherpaOnnxDestroyKeywordSpotter(SherpaOnnxKeywordSpotter *spotter) {
+void SherpaOnnxDestroyKeywordSpotter(const SherpaOnnxKeywordSpotter *spotter) {
   delete spotter;
 }
 
-SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
+const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
     const SherpaOnnxKeywordSpotter *spotter) {
   SherpaOnnxOnlineStream *stream =
       new SherpaOnnxOnlineStream(spotter->impl->CreateStream());
   return stream;
 }
 
-SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStreamWithKeywords(
+const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStreamWithKeywords(
     const SherpaOnnxKeywordSpotter *spotter, const char *keywords) {
   SherpaOnnxOnlineStream *stream =
       new SherpaOnnxOnlineStream(spotter->impl->CreateStream(keywords));
   return stream;
 }
 
-int32_t SherpaOnnxIsKeywordStreamReady(SherpaOnnxKeywordSpotter *spotter,
-                                       SherpaOnnxOnlineStream *stream) {
+int32_t SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter,
+                                       const SherpaOnnxOnlineStream *stream) {
   return spotter->impl->IsReady(stream->impl.get());
 }
 
-void SherpaOnnxDecodeKeywordStream(SherpaOnnxKeywordSpotter *spotter,
-                                   SherpaOnnxOnlineStream *stream) {
-  return spotter->impl->DecodeStream(stream->impl.get());
+void SherpaOnnxDecodeKeywordStream(const SherpaOnnxKeywordSpotter *spotter,
+                                   const SherpaOnnxOnlineStream *stream) {
+  spotter->impl->DecodeStream(stream->impl.get());
+}
+
+void SherpaOnnxResetKeywordStream(const SherpaOnnxKeywordSpotter *spotter,
+                                  const SherpaOnnxOnlineStream *stream) {
+  spotter->impl->Reset(stream->impl.get());
 }
 
-void SherpaOnnxDecodeMultipleKeywordStreams(SherpaOnnxKeywordSpotter *spotter,
-                                            SherpaOnnxOnlineStream **streams,
-                                            int32_t n) {
+void SherpaOnnxDecodeMultipleKeywordStreams(
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream **streams, int32_t n) {
   std::vector<sherpa_onnx::OnlineStream *> ss(n);
   for (int32_t i = 0; i != n; ++i) {
     ss[i] = streams[i]->impl.get();
@@ -757,7 +809,8 @@ void SherpaOnnxDecodeMultipleKeywordStreams(SherpaOnnxKeywordSpotter *spotter,
 }
 
 const SherpaOnnxKeywordResult *SherpaOnnxGetKeywordResult(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream) {
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream) {
   const sherpa_onnx::KeywordResult &result =
       spotter->impl->GetResult(stream->impl.get());
   const auto &keyword = result.keyword;
@@ -832,8 +885,9 @@ void SherpaOnnxDestroyKeywordResult(const SherpaOnnxKeywordResult *r) {
   }
 }
 
-const char *SherpaOnnxGetKeywordResultAsJson(SherpaOnnxKeywordSpotter *spotter,
-                                             SherpaOnnxOnlineStream *stream) {
+const char *SherpaOnnxGetKeywordResultAsJson(
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream) {
   const sherpa_onnx::KeywordResult &result =
       spotter->impl->GetResult(stream->impl.get());
 
@@ -900,8 +954,8 @@ struct SherpaOnnxVoiceActivityDetector {
   std::unique_ptr<sherpa_onnx::VoiceActivityDetector> impl;
 };
 
-SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
-    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
+sherpa_onnx::VadModelConfig GetVadModelConfig(
+    const SherpaOnnxVadModelConfig *config) {
   sherpa_onnx::VadModelConfig vad_config;
 
   vad_config.silero_vad.model = SHERPA_ONNX_OR(config->silero_vad.model, "");
@@ -930,9 +984,20 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
   vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
 
   if (vad_config.debug) {
-    SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", vad_config.ToString().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s\n", vad_config.ToString().c_str());
+#endif
   }
 
+  return vad_config;
+}
+
+SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
+    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
+  auto vad_config = GetVadModelConfig(config);
+
   if (!vad_config.Validate()) {
     SHERPA_ONNX_LOGE("Errors in config");
     return nullptr;
@@ -1006,10 +1071,11 @@ struct SherpaOnnxOfflineTts {
   std::unique_ptr<sherpa_onnx::OfflineTts> impl;
 };
 
-SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
     const SherpaOnnxOfflineTtsConfig *config) {
   sherpa_onnx::OfflineTtsConfig tts_config;
 
+  // vits
   tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
   tts_config.model.vits.lexicon =
       SHERPA_ONNX_OR(config->model.vits.lexicon, "");
@@ -1025,6 +1091,40 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
   tts_config.model.vits.dict_dir =
       SHERPA_ONNX_OR(config->model.vits.dict_dir, "");
 
+  // matcha
+  tts_config.model.matcha.acoustic_model =
+      SHERPA_ONNX_OR(config->model.matcha.acoustic_model, "");
+  tts_config.model.matcha.vocoder =
+      SHERPA_ONNX_OR(config->model.matcha.vocoder, "");
+  tts_config.model.matcha.lexicon =
+      SHERPA_ONNX_OR(config->model.matcha.lexicon, "");
+  tts_config.model.matcha.tokens =
+      SHERPA_ONNX_OR(config->model.matcha.tokens, "");
+  tts_config.model.matcha.data_dir =
+      SHERPA_ONNX_OR(config->model.matcha.data_dir, "");
+  tts_config.model.matcha.noise_scale =
+      SHERPA_ONNX_OR(config->model.matcha.noise_scale, 0.667);
+  tts_config.model.matcha.length_scale =
+      SHERPA_ONNX_OR(config->model.matcha.length_scale, 1.0);
+  tts_config.model.matcha.dict_dir =
+      SHERPA_ONNX_OR(config->model.matcha.dict_dir, "");
+
+  // kokoro
+  tts_config.model.kokoro.model =
+      SHERPA_ONNX_OR(config->model.kokoro.model, "");
+  tts_config.model.kokoro.voices =
+      SHERPA_ONNX_OR(config->model.kokoro.voices, "");
+  tts_config.model.kokoro.tokens =
+      SHERPA_ONNX_OR(config->model.kokoro.tokens, "");
+  tts_config.model.kokoro.data_dir =
+      SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
+  tts_config.model.kokoro.length_scale =
+      SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);
+  tts_config.model.kokoro.dict_dir =
+      SHERPA_ONNX_OR(config->model.kokoro.dict_dir, "");
+  tts_config.model.kokoro.lexicon =
+      SHERPA_ONNX_OR(config->model.kokoro.lexicon, "");
+
   tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
   tts_config.model.debug = config->model.debug;
   tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
@@ -1034,12 +1134,23 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
 
   tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
   tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
-  tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
+  tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 1);
 
   if (tts_config.model.debug) {
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", tts_config.ToString().c_str());
+#else
     SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
+#endif
   }
 
+  return tts_config;
+}
+
+const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+    const SherpaOnnxOfflineTtsConfig *config) {
+  auto tts_config = GetOfflineTtsConfig(config);
+
   if (!tts_config.Validate()) {
     SHERPA_ONNX_LOGE("Errors in config");
     return nullptr;
@@ -1052,7 +1163,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
   return tts;
 }
 
-void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
+void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts) {
+  delete tts;
+}
 
 int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
   return tts->impl->SampleRate();
@@ -1111,6 +1224,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
   return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
 
+const SherpaOnnxGeneratedAudio *
+SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
+  auto wrapper = [callback, arg](const float *samples, int32_t n,
+                                 float progress) {
+    return callback(samples, n, progress, arg);
+  };
+  return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
+}
+
 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
     SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
@@ -1129,6 +1253,69 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
     delete p;
   }
 }
+#else
+const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+    const SherpaOnnxOfflineTtsConfig *config) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+}
+
+int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return 0;
+}
+
+int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return 0;
+}
+
+const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
+    float speed) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioCallback callback) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxGeneratedAudio *
+SherpaOnnxOfflineTtsGenerateWithProgressCallback(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioProgressCallback callback) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxGeneratedAudio *
+SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
+    const SherpaOnnxGeneratedAudio *p) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+}
+
 #endif  // SHERPA_ONNX_ENABLE_TTS == 1
 
 int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
@@ -1155,6 +1342,28 @@ const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
   return wave;
 }
 
+const SherpaOnnxWave *SherpaOnnxReadWaveFromBinaryData(const char *data,
+                                                       int32_t n) {
+  int32_t sample_rate = -1;
+  bool is_ok = false;
+
+  std::istrstream is(data, n);
+
+  std::vector<float> samples = sherpa_onnx::ReadWave(is, &sample_rate, &is_ok);
+  if (!is_ok) {
+    return nullptr;
+  }
+
+  float *c_samples = new float[samples.size()];
+  std::copy(samples.begin(), samples.end(), c_samples);
+
+  SherpaOnnxWave *wave = new SherpaOnnxWave;
+  wave->samples = c_samples;
+  wave->sample_rate = sample_rate;
+  wave->num_samples = samples.size();
+  return wave;
+}
+
 void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
   if (wave) {
     delete[] wave->samples;
@@ -1237,8 +1446,8 @@ struct SherpaOnnxSpeakerEmbeddingExtractor {
   std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl;
 };
 
-const SherpaOnnxSpeakerEmbeddingExtractor *
-SherpaOnnxCreateSpeakerEmbeddingExtractor(
+static sherpa_onnx::SpeakerEmbeddingExtractorConfig
+GetSpeakerEmbeddingExtractorConfig(
     const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
   sherpa_onnx::SpeakerEmbeddingExtractorConfig c;
   c.model = SHERPA_ONNX_OR(config->model, "");
@@ -1251,9 +1460,21 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
   }
 
   if (config->debug) {
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
+#else
     SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
+#endif
   }
 
+  return c;
+}
+
+const SherpaOnnxSpeakerEmbeddingExtractor *
+SherpaOnnxCreateSpeakerEmbeddingExtractor(
+    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
+  auto c = GetSpeakerEmbeddingExtractorConfig(config);
+
   if (!c.Validate()) {
     SHERPA_ONNX_LOGE("Errors in config!");
     return nullptr;
@@ -1614,6 +1835,57 @@ const char *SherpaOfflinePunctuationAddPunct(
 
 void SherpaOfflinePunctuationFreeText(const char *text) { delete[] text; }
 
+struct SherpaOnnxOnlinePunctuation {
+  std::unique_ptr<sherpa_onnx::OnlinePunctuation> impl;
+};
+
+const SherpaOnnxOnlinePunctuation *SherpaOnnxCreateOnlinePunctuation(
+    const SherpaOnnxOnlinePunctuationConfig *config) {
+  auto p = new SherpaOnnxOnlinePunctuation;
+  try {
+    sherpa_onnx::OnlinePunctuationConfig punctuation_config;
+    punctuation_config.model.cnn_bilstm =
+        SHERPA_ONNX_OR(config->model.cnn_bilstm, "");
+    punctuation_config.model.bpe_vocab =
+        SHERPA_ONNX_OR(config->model.bpe_vocab, "");
+    punctuation_config.model.num_threads =
+        SHERPA_ONNX_OR(config->model.num_threads, 1);
+    punctuation_config.model.debug = config->model.debug;
+    punctuation_config.model.provider =
+        SHERPA_ONNX_OR(config->model.provider, "cpu");
+
+    p->impl =
+        std::make_unique<sherpa_onnx::OnlinePunctuation>(punctuation_config);
+  } catch (const std::exception &e) {
+    SHERPA_ONNX_LOGE("Failed to create online punctuation: %s", e.what());
+    delete p;
+    return nullptr;
+  }
+  return p;
+}
+
+void SherpaOnnxDestroyOnlinePunctuation(const SherpaOnnxOnlinePunctuation *p) {
+  delete p;
+}
+
+const char *SherpaOnnxOnlinePunctuationAddPunct(
+    const SherpaOnnxOnlinePunctuation *punctuation, const char *text) {
+  if (!punctuation || !text) return nullptr;
+
+  try {
+    std::string s = punctuation->impl->AddPunctuationWithCase(text);
+    char *p = new char[s.size() + 1];
+    std::copy(s.begin(), s.end(), p);
+    p[s.size()] = '\0';
+    return p;
+  } catch (const std::exception &e) {
+    SHERPA_ONNX_LOGE("Failed to add punctuation: %s", e.what());
+    return nullptr;
+  }
+}
+
+void SherpaOnnxOnlinePunctuationFreeText(const char *text) { delete[] text; }
+
 struct SherpaOnnxLinearResampler {
   std::unique_ptr<sherpa_onnx::LinearResample> impl;
 };
@@ -1670,3 +1942,419 @@ void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) {
 int32_t SherpaOnnxFileExists(const char *filename) {
   return sherpa_onnx::FileExists(filename);
 }
+
+#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
+
+struct SherpaOnnxOfflineSpeakerDiarization {
+  std::unique_ptr<sherpa_onnx::OfflineSpeakerDiarization> impl;
+};
+
+struct SherpaOnnxOfflineSpeakerDiarizationResult {
+  sherpa_onnx::OfflineSpeakerDiarizationResult impl;
+};
+
+static sherpa_onnx::OfflineSpeakerDiarizationConfig
+GetOfflineSpeakerDiarizationConfig(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
+  sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;
+
+  sd_config.segmentation.pyannote.model =
+      SHERPA_ONNX_OR(config->segmentation.pyannote.model, "");
+  sd_config.segmentation.num_threads =
+      SHERPA_ONNX_OR(config->segmentation.num_threads, 1);
+  sd_config.segmentation.debug = config->segmentation.debug;
+  sd_config.segmentation.provider =
+      SHERPA_ONNX_OR(config->segmentation.provider, "cpu");
+  if (sd_config.segmentation.provider.empty()) {
+    sd_config.segmentation.provider = "cpu";
+  }
+
+  sd_config.embedding.model = SHERPA_ONNX_OR(config->embedding.model, "");
+  sd_config.embedding.num_threads =
+      SHERPA_ONNX_OR(config->embedding.num_threads, 1);
+  sd_config.embedding.debug = config->embedding.debug;
+  sd_config.embedding.provider =
+      SHERPA_ONNX_OR(config->embedding.provider, "cpu");
+  if (sd_config.embedding.provider.empty()) {
+    sd_config.embedding.provider = "cpu";
+  }
+
+  sd_config.clustering.num_clusters =
+      SHERPA_ONNX_OR(config->clustering.num_clusters, -1);
+
+  sd_config.clustering.threshold =
+      SHERPA_ONNX_OR(config->clustering.threshold, 0.5);
+
+  sd_config.min_duration_on = SHERPA_ONNX_OR(config->min_duration_on, 0.3);
+
+  sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5);
+
+  if (sd_config.segmentation.debug || sd_config.embedding.debug) {
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", sd_config.ToString().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
+#endif
+  }
+
+  return sd_config;
+}
+
+const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
+  auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
+
+  if (!sd_config.Validate()) {
+    SHERPA_ONNX_LOGE("Errors in config");
+    return nullptr;
+  }
+
+  SherpaOnnxOfflineSpeakerDiarization *sd =
+      new SherpaOnnxOfflineSpeakerDiarization;
+
+  sd->impl =
+      std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config);
+
+  return sd;
+}
+
+void SherpaOnnxDestroyOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarization *sd) {
+  delete sd;
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
+    const SherpaOnnxOfflineSpeakerDiarization *sd) {
+  return sd->impl->SampleRate();
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
+    const SherpaOnnxOfflineSpeakerDiarization *sd,
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
+  sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;
+
+  sd_config.clustering.num_clusters =
+      SHERPA_ONNX_OR(config->clustering.num_clusters, -1);
+
+  sd_config.clustering.threshold =
+      SHERPA_ONNX_OR(config->clustering.threshold, 0.5);
+
+  sd->impl->SetConfig(sd_config);
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  return r->impl.NumSpeakers();
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  return r->impl.NumSegments();
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationSegment *
+SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  if (r->impl.NumSegments() == 0) {
+    return nullptr;
+  }
+
+  auto segments = r->impl.SortByStartTime();
+
+  int32_t n = segments.size();
+  SherpaOnnxOfflineSpeakerDiarizationSegment *ans =
+      new SherpaOnnxOfflineSpeakerDiarizationSegment[n];
+
+  for (int32_t i = 0; i != n; ++i) {
+    const auto &s = segments[i];
+
+    ans[i].start = s.Start();
+    ans[i].end = s.End();
+    ans[i].speaker = s.Speaker();
+  }
+
+  return ans;
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
+    const SherpaOnnxOfflineSpeakerDiarizationSegment *s) {
+  delete[] s;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcess(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n) {
+  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
+  ans->impl = sd->impl->Process(samples, n);
+
+  return ans;
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  delete r;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
+    void *arg) {
+  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
+  ans->impl = sd->impl->Process(samples, n, callback, arg);
+
+  return ans;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n,
+    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
+  auto wrapper = [callback](int32_t num_processed_chunks,
+                            int32_t num_total_chunks, void *) {
+    return callback(num_processed_chunks, num_total_chunks);
+  };
+
+  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
+  ans->impl = sd->impl->Process(samples, n, wrapper);
+
+  return ans;
+}
+#else
+
+const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+void SherpaOnnxDestroyOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarization *sd) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
+    const SherpaOnnxOfflineSpeakerDiarization *sd) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return 0;
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
+    const SherpaOnnxOfflineSpeakerDiarization *sd,
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return 0;
+}
+
+int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return 0;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationSegment *
+SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
+    const SherpaOnnxOfflineSpeakerDiarizationSegment *s) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcess(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
+    void *arg) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n,
+    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+}
+
+#endif
+
+#ifdef __OHOS__
+
+const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
+    const SherpaOnnxOnlineRecognizerConfig *config,
+    NativeResourceManager *mgr) {
+  sherpa_onnx::OnlineRecognizerConfig recognizer_config =
+      GetOnlineRecognizerConfig(config);
+
+  SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
+
+  recognizer->impl =
+      std::make_unique<sherpa_onnx::OnlineRecognizer>(mgr, recognizer_config);
+
+  return recognizer;
+}
+
+const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizerOHOS(
+    const SherpaOnnxOfflineRecognizerConfig *config,
+    NativeResourceManager *mgr) {
+  if (mgr == nullptr) {
+    return SherpaOnnxCreateOfflineRecognizer(config);
+  }
+
+  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
+      GetOfflineRecognizerConfig(config);
+
+  SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
+
+  recognizer->impl =
+      std::make_unique<sherpa_onnx::OfflineRecognizer>(mgr, recognizer_config);
+
+  return recognizer;
+}
+
+SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS(
+    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
+    NativeResourceManager *mgr) {
+  if (mgr == nullptr) {
+    return SherpaOnnxCreateVoiceActivityDetector(config,
+                                                 buffer_size_in_seconds);
+  }
+
+  auto vad_config = GetVadModelConfig(config);
+
+  SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
+  p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
+      mgr, vad_config, buffer_size_in_seconds);
+
+  return p;
+}
+
+const SherpaOnnxSpeakerEmbeddingExtractor *
+SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
+    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
+    NativeResourceManager *mgr) {
+  if (!mgr) {
+    return SherpaOnnxCreateSpeakerEmbeddingExtractor(config);
+  }
+
+  auto c = GetSpeakerEmbeddingExtractorConfig(config);
+
+  auto p = new SherpaOnnxSpeakerEmbeddingExtractor;
+
+  p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(mgr, c);
+
+  return p;
+}
+
+const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotterOHOS(
+    const SherpaOnnxKeywordSpotterConfig *config, NativeResourceManager *mgr) {
+  if (!mgr) {
+    return SherpaOnnxCreateKeywordSpotter(config);
+  }
+
+  auto spotter_config = GetKeywordSpotterConfig(config);
+
+  SherpaOnnxKeywordSpotter *spotter = new SherpaOnnxKeywordSpotter;
+
+  spotter->impl =
+      std::make_unique<sherpa_onnx::KeywordSpotter>(mgr, spotter_config);
+
+  return spotter;
+}
+
+#if SHERPA_ONNX_ENABLE_TTS == 1
+const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
+    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
+  if (!mgr) {
+    return SherpaOnnxCreateOfflineTts(config);
+  }
+
+  auto tts_config = GetOfflineTtsConfig(config);
+
+  SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
+
+  tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(mgr, tts_config);
+
+  return tts;
+}
+#else
+const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
+    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
+  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+#endif  // #if SHERPA_ONNX_ENABLE_TTS == 1
+        //
+#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
+const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
+    NativeResourceManager *mgr) {
+  if (!mgr) {
+    return SherpaOnnxCreateOfflineSpeakerDiarization(config);
+  }
+
+  auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
+
+  SherpaOnnxOfflineSpeakerDiarization *sd =
+      new SherpaOnnxOfflineSpeakerDiarization;
+
+  sd->impl =
+      std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(mgr, sd_config);
+
+  return sd;
+}
+#else
+
+const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
+    NativeResourceManager *mgr) {
+  SHERPA_ONNX_LOGE(
+      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
+  return nullptr;
+}
+
+#endif  // #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
+
+#endif  // #ifdef __OHOS__
diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h
index 58615fe487..cabfc15cc6 100644
--- a/sherpa-onnx/c-api/c-api.h
+++ b/sherpa-onnx/c-api/c-api.h
@@ -205,7 +205,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
 /// @param config  Config for the recognizer.
 /// @return Return a pointer to the recognizer. The user has to invoke
 //          SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
+SHERPA_ONNX_API const SherpaOnnxOnlineRecognizer *
+SherpaOnnxCreateOnlineRecognizer(
     const SherpaOnnxOnlineRecognizerConfig *config);
 
 /// Free a pointer returned by SherpaOnnxCreateOnlineRecognizer()
@@ -219,7 +220,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer(
 /// @param recognizer  A pointer returned by SherpaOnnxCreateOnlineRecognizer()
 /// @return Return a pointer to an OnlineStream. The user has to invoke
 ///         SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
+SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
     const SherpaOnnxOnlineRecognizer *recognizer);
 
 /// Create an online stream for accepting wave samples with the specified hot
@@ -228,7 +229,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
 /// @param recognizer  A pointer returned by SherpaOnnxCreateOnlineRecognizer()
 /// @return Return a pointer to an OnlineStream. The user has to invoke
 ///         SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOnlineStream *
+SHERPA_ONNX_API const SherpaOnnxOnlineStream *
 SherpaOnnxCreateOnlineStreamWithHotwords(
     const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords);
 
@@ -388,6 +389,13 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
   int32_t tail_paddings;
 } SherpaOnnxOfflineWhisperModelConfig;
 
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineMoonshineModelConfig {
+  const char *preprocessor;
+  const char *encoder;
+  const char *uncached_decoder;
+  const char *cached_decoder;
+} SherpaOnnxOfflineMoonshineModelConfig;
+
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig {
   const char *model;
 } SherpaOnnxOfflineTdnnModelConfig;
@@ -423,6 +431,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
   const char *bpe_vocab;
   const char *telespeech_ctc;
   SherpaOnnxOfflineSenseVoiceModelConfig sense_voice;
+  SherpaOnnxOfflineMoonshineModelConfig moonshine;
 } SherpaOnnxOfflineModelConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
@@ -452,7 +461,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
 /// @return Return a pointer to the recognizer. The user has to invoke
 //          SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory
 //          leak.
-SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer(
+SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer *
+SherpaOnnxCreateOfflineRecognizer(
     const SherpaOnnxOfflineRecognizerConfig *config);
 
 /// @param config  Config for the recognizer.
@@ -464,14 +474,14 @@ SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig(
 ///
 /// @param p A pointer returned by SherpaOnnxCreateOfflineRecognizer()
 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer(
-    SherpaOnnxOfflineRecognizer *recognizer);
+    const SherpaOnnxOfflineRecognizer *recognizer);
 
 /// Create an offline stream for accepting wave samples.
 ///
 /// @param recognizer  A pointer returned by SherpaOnnxCreateOfflineRecognizer()
 /// @return Return a pointer to an OfflineStream. The user has to invoke
 ///         SherpaOnnxDestroyOfflineStream() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
+SHERPA_ONNX_API const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
     const SherpaOnnxOfflineRecognizer *recognizer);
 
 /// Destroy an offline stream.
@@ -517,8 +527,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream(
 ///                by SherpaOnnxCreateOfflineStream().
 /// @param n Number of entries in the given streams.
 SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams(
-    SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,
-    int32_t n);
+    const SherpaOnnxOfflineRecognizer *recognizer,
+    const SherpaOnnxOfflineStream **streams, int32_t n);
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
   const char *text;
@@ -590,7 +600,7 @@ SHERPA_ONNX_API const char *SherpaOnnxGetOfflineStreamResultAsJson(
 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineStreamResultJson(const char *s);
 
 // ============================================================
-// For Keyword Spot
+// For Keyword Spotter
 // ============================================================
 SHERPA_ONNX_API typedef struct SherpaOnnxKeywordResult {
   /// The triggered keyword.
@@ -650,21 +660,21 @@ SHERPA_ONNX_API typedef struct SherpaOnnxKeywordSpotter
 /// @param config  Config for the keyword spotter.
 /// @return Return a pointer to the spotter. The user has to invoke
 ///         SherpaOnnxDestroyKeywordSpotter() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
+SHERPA_ONNX_API const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
     const SherpaOnnxKeywordSpotterConfig *config);
 
 /// Free a pointer returned by SherpaOnnxCreateKeywordSpotter()
 ///
 /// @param p A pointer returned by SherpaOnnxCreateKeywordSpotter()
 SHERPA_ONNX_API void SherpaOnnxDestroyKeywordSpotter(
-    SherpaOnnxKeywordSpotter *spotter);
+    const SherpaOnnxKeywordSpotter *spotter);
 
 /// Create an online stream for accepting wave samples.
 ///
 /// @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter()
 /// @return Return a pointer to an OnlineStream. The user has to invoke
 ///         SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
+SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
     const SherpaOnnxKeywordSpotter *spotter);
 
 /// Create an online stream for accepting wave samples with the specified hot
@@ -674,7 +684,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
 /// @param keywords A pointer points to the keywords that you set
 /// @return Return a pointer to an OnlineStream. The user has to invoke
 ///         SherpaOnnxDestroyOnlineStream() to free it to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOnlineStream *
+SHERPA_ONNX_API const SherpaOnnxOnlineStream *
 SherpaOnnxCreateKeywordStreamWithKeywords(
     const SherpaOnnxKeywordSpotter *spotter, const char *keywords);
 
@@ -683,15 +693,22 @@ SherpaOnnxCreateKeywordStreamWithKeywords(
 ///
 /// @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter
 /// @param stream  A pointer returned by SherpaOnnxCreateKeywordStream
-SHERPA_ONNX_API int32_t SherpaOnnxIsKeywordStreamReady(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream);
+SHERPA_ONNX_API int32_t
+SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter,
+                               const SherpaOnnxOnlineStream *stream);
 
 /// Call this function to run the neural network model and decoding.
 //
 /// Precondition for this function: SherpaOnnxIsKeywordStreamReady() MUST
 /// return 1.
 SHERPA_ONNX_API void SherpaOnnxDecodeKeywordStream(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream);
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream);
+
+/// Please call it right after a keyword is detected
+SHERPA_ONNX_API void SherpaOnnxResetKeywordStream(
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream);
 
 /// This function is similar to SherpaOnnxDecodeKeywordStream(). It decodes
 /// multiple OnlineStream in parallel.
@@ -704,8 +721,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeKeywordStream(
 ///                 SherpaOnnxCreateKeywordStream()
 /// @param n  Number of elements in the given streams array.
 SHERPA_ONNX_API void SherpaOnnxDecodeMultipleKeywordStreams(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream **streams,
-    int32_t n);
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream **streams, int32_t n);
 
 /// Get the decoding results so far for an OnlineStream.
 ///
@@ -715,7 +732,8 @@ SHERPA_ONNX_API void SherpaOnnxDecodeMultipleKeywordStreams(
 ///         SherpaOnnxDestroyKeywordResult() to free the returned pointer to
 ///         avoid memory leak.
 SHERPA_ONNX_API const SherpaOnnxKeywordResult *SherpaOnnxGetKeywordResult(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream);
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream);
 
 /// Destroy the pointer returned by SherpaOnnxGetKeywordResult().
 ///
@@ -726,7 +744,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroyKeywordResult(
 // the user has to call SherpaOnnxFreeKeywordResultJson() to free the returned
 // pointer to avoid memory leak
 SHERPA_ONNX_API const char *SherpaOnnxGetKeywordResultAsJson(
-    SherpaOnnxKeywordSpotter *spotter, SherpaOnnxOnlineStream *stream);
+    const SherpaOnnxKeywordSpotter *spotter,
+    const SherpaOnnxOnlineStream *stream);
 
 SHERPA_ONNX_API void SherpaOnnxFreeKeywordResultJson(const char *s);
 
@@ -884,15 +903,40 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
 
   float noise_scale;
   float noise_scale_w;
-  float length_scale;  // < 1, faster in speed; > 1, slower in speed
+  float length_scale;  // < 1, faster in speech speed; > 1, slower in speed
   const char *dict_dir;
 } SherpaOnnxOfflineTtsVitsModelConfig;
 
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig {
+  const char *acoustic_model;
+  const char *vocoder;
+  const char *lexicon;
+  const char *tokens;
+  const char *data_dir;
+
+  float noise_scale;
+  float length_scale;  // < 1, faster in speech speed; > 1, slower in speed
+  const char *dict_dir;
+} SherpaOnnxOfflineTtsMatchaModelConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
+  const char *model;
+  const char *voices;
+  const char *tokens;
+  const char *data_dir;
+
+  float length_scale;  // < 1, faster in speech speed; > 1, slower in speed
+  const char *dict_dir;
+  const char *lexicon;
+} SherpaOnnxOfflineTtsKokoroModelConfig;
+
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
   SherpaOnnxOfflineTtsVitsModelConfig vits;
   int32_t num_threads;
   int32_t debug;
   const char *provider;
+  SherpaOnnxOfflineTtsMatchaModelConfig matcha;
+  SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
 } SherpaOnnxOfflineTtsModelConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
@@ -920,15 +964,19 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
 typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
     const float *samples, int32_t n, float p);
 
+typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)(
+    const float *samples, int32_t n, float p, void *arg);
+
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
 
 // Create an instance of offline TTS. The user has to use DestroyOfflineTts()
 // to free the returned pointer to avoid memory leak.
-SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
     const SherpaOnnxOfflineTtsConfig *config);
 
-// Free the pointer returned by CreateOfflineTts()
-SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
+// Free the pointer returned by SherpaOnnxCreateOfflineTts()
+SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(
+    const SherpaOnnxOfflineTts *tts);
 
 // Return the sample rate of the current TTS object
 SHERPA_ONNX_API int32_t
@@ -954,6 +1002,18 @@ SherpaOnnxOfflineTtsGenerateWithCallback(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
     SherpaOnnxGeneratedAudioCallback callback);
 
+SHERPA_ONNX_API
+const SherpaOnnxGeneratedAudio *
+SherpaOnnxOfflineTtsGenerateWithProgressCallback(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioProgressCallback callback);
+
+SHERPA_ONNX_API
+const SherpaOnnxGeneratedAudio *
+SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
+    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);
+
 // Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional
 // `void* arg` to the callback.
 SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
@@ -986,6 +1046,14 @@ SHERPA_ONNX_API typedef struct SherpaOnnxWave {
 // SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
 SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
 
+// Similar to SherpaOnnxReadWave(), it has read the content of `filename`
+// into the array `data`.
+//
+// If the returned pointer is not NULL, the user has to invoke
+// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWaveFromBinaryData(
+    const char *data, int32_t n);
+
 SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
 
 // ============================================================
@@ -1335,6 +1403,41 @@ SHERPA_ONNX_API const char *SherpaOfflinePunctuationAddPunct(
 
 SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text);
 
+SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuationModelConfig {
+  const char *cnn_bilstm;
+  const char *bpe_vocab;
+  int32_t num_threads;
+  int32_t debug;
+  const char *provider;
+} SherpaOnnxOnlinePunctuationModelConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuationConfig {
+  SherpaOnnxOnlinePunctuationModelConfig model;
+} SherpaOnnxOnlinePunctuationConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuation
+    SherpaOnnxOnlinePunctuation;
+
+// Create an online punctuation processor. The user has to invoke
+// SherpaOnnxDestroyOnlinePunctuation() to free the returned pointer
+// to avoid memory leak
+SHERPA_ONNX_API const SherpaOnnxOnlinePunctuation *
+SherpaOnnxCreateOnlinePunctuation(
+    const SherpaOnnxOnlinePunctuationConfig *config);
+
+// Free a pointer returned by SherpaOnnxCreateOnlinePunctuation()
+SHERPA_ONNX_API void SherpaOnnxDestroyOnlinePunctuation(
+    const SherpaOnnxOnlinePunctuation *punctuation);
+
+// Add punctuations to the input text. The user has to invoke
+// SherpaOnnxOnlinePunctuationFreeText() to free the returned pointer
+// to avoid memory leak
+SHERPA_ONNX_API const char *SherpaOnnxOnlinePunctuationAddPunct(
+    const SherpaOnnxOnlinePunctuation *punctuation, const char *text);
+
+// Free a pointer returned by SherpaOnnxOnlinePunctuationAddPunct()
+SHERPA_ONNX_API void SherpaOnnxOnlinePunctuationFreeText(const char *text);
+
 // for resampling
 SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler
     SherpaOnnxLinearResampler;
@@ -1384,6 +1487,176 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
 // Return 1 if the file exists; return 0 if the file does not exist.
 SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
 
+// =========================================================================
+// For offline speaker diarization (i.e., non-streaming speaker diarization)
+// =========================================================================
+SHERPA_ONNX_API typedef struct
+    SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig {
+  const char *model;
+} SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeakerSegmentationModelConfig {
+  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+  int32_t num_threads;   // 1
+  int32_t debug;         // false
+  const char *provider;  // "cpu"
+} SherpaOnnxOfflineSpeakerSegmentationModelConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxFastClusteringConfig {
+  // If greater than 0, then threshold is ignored.
+  //
+  // We strongly recommend that you set it if you know the number of clusters
+  // in advance
+  int32_t num_clusters;
+
+  // distance threshold.
+  //
+  // The smaller, the more clusters it will generate.
+  // The larger, the fewer clusters it will generate.
+  float threshold;
+} SherpaOnnxFastClusteringConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeakerDiarizationConfig {
+  SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
+  SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
+  SherpaOnnxFastClusteringConfig clustering;
+
+  // if a segment is less than this value, then it is discarded
+  float min_duration_on;  // in seconds
+
+  // if the gap between to segments of the same speaker is less than this value,
+  // then these two segments are merged into a single segment.
+  // We do this recursively.
+  float min_duration_off;  // in seconds
+} SherpaOnnxOfflineSpeakerDiarizationConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeakerDiarization
+    SherpaOnnxOfflineSpeakerDiarization;
+
+// The users has to invoke SherpaOnnxDestroyOfflineSpeakerDiarization()
+// to free the returned pointer to avoid memory leak
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config);
+
+// Free the pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization()
+SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeakerDiarization(
+    const SherpaOnnxOfflineSpeakerDiarization *sd);
+
+// Expected sample rate of the input audio samples
+SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
+    const SherpaOnnxOfflineSpeakerDiarization *sd);
+
+// Only config->clustering is used. All other fields are ignored
+SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
+    const SherpaOnnxOfflineSpeakerDiarization *sd,
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config);
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeakerDiarizationResult
+    SherpaOnnxOfflineSpeakerDiarizationResult;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeakerDiarizationSegment {
+  float start;
+  float end;
+  int32_t speaker;
+} SherpaOnnxOfflineSpeakerDiarizationSegment;
+
+SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r);
+
+SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r);
+
+// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroySegment()
+// to free the returned pointer to avoid memory leak.
+//
+// The returned pointer is the start address of an array.
+// Number of entries in the array equals to the value
+// returned by SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments()
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationSegment *
+SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r);
+
+SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
+    const SherpaOnnxOfflineSpeakerDiarizationSegment *s);
+
+typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(
+    int32_t num_processed_chunks, int32_t num_total_chunks, void *arg);
+
+typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(
+    int32_t num_processed_chunks, int32_t num_total_chunks);
+
+// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
+// to free the returned pointer to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcess(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n);
+
+// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
+// to free the returned pointer to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
+    void *arg);
+
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
+SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
+    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
+    int32_t n,
+    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback);
+
+SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
+    const SherpaOnnxOfflineSpeakerDiarizationResult *r);
+
+#ifdef __OHOS__
+
+// It is for HarmonyOS
+typedef struct NativeResourceManager NativeResourceManager;
+
+/// @param config  Config for the recognizer.
+/// @return Return a pointer to the recognizer. The user has to invoke
+//          SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxOnlineRecognizer *
+SherpaOnnxCreateOnlineRecognizerOHOS(
+    const SherpaOnnxOnlineRecognizerConfig *config, NativeResourceManager *mgr);
+
+/// @param config  Config for the recognizer.
+/// @return Return a pointer to the recognizer. The user has to invoke
+//          SherpaOnnxDestroyOfflineRecognizer() to free it to avoid memory
+//          leak.
+SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer *
+SherpaOnnxCreateOfflineRecognizerOHOS(
+    const SherpaOnnxOfflineRecognizerConfig *config,
+    NativeResourceManager *mgr);
+
+// Return an instance of VoiceActivityDetector.
+// The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free
+// the returned pointer to avoid memory leak.
+SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector *
+SherpaOnnxCreateVoiceActivityDetectorOHOS(
+    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
+    NativeResourceManager *mgr);
+
+SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
+    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
+
+SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
+SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
+    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
+    NativeResourceManager *mgr);
+
+SHERPA_ONNX_API const SherpaOnnxKeywordSpotter *
+SherpaOnnxCreateKeywordSpotterOHOS(const SherpaOnnxKeywordSpotterConfig *config,
+                                   NativeResourceManager *mgr);
+
+SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
+SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
+    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
+    NativeResourceManager *mgr);
+#endif
+
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
diff --git a/sherpa-onnx/c-api/cxx-api.cc b/sherpa-onnx/c-api/cxx-api.cc
new file mode 100644
index 0000000000..7ce2b63a19
--- /dev/null
+++ b/sherpa-onnx/c-api/cxx-api.cc
@@ -0,0 +1,504 @@
+// sherpa-onnx/c-api/cxx-api.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include "sherpa-onnx/c-api/cxx-api.h"
+
+#include <algorithm>
+#include <cstring>
+
+namespace sherpa_onnx::cxx {
+
+Wave ReadWave(const std::string &filename) {
+  auto p = SherpaOnnxReadWave(filename.c_str());
+
+  Wave ans;
+  if (p) {
+    ans.samples.resize(p->num_samples);
+
+    std::copy(p->samples, p->samples + p->num_samples, ans.samples.data());
+
+    ans.sample_rate = p->sample_rate;
+    SherpaOnnxFreeWave(p);
+  }
+
+  return ans;
+}
+
+bool WriteWave(const std::string &filename, const Wave &wave) {
+  return SherpaOnnxWriteWave(wave.samples.data(), wave.samples.size(),
+                             wave.sample_rate, filename.c_str());
+}
+
+OnlineStream::OnlineStream(const SherpaOnnxOnlineStream *p)
+    : MoveOnly<OnlineStream, SherpaOnnxOnlineStream>(p) {}
+
+void OnlineStream::Destroy(const SherpaOnnxOnlineStream *p) const {
+  SherpaOnnxDestroyOnlineStream(p);
+}
+
+void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
+                                  int32_t n) const {
+  SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n);
+}
+
+void OnlineStream::InputFinished() const {
+  SherpaOnnxOnlineStreamInputFinished(p_);
+}
+
+OnlineRecognizer OnlineRecognizer::Create(
+    const OnlineRecognizerConfig &config) {
+  struct SherpaOnnxOnlineRecognizerConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.feat_config.sample_rate = config.feat_config.sample_rate;
+  c.feat_config.feature_dim = config.feat_config.feature_dim;
+
+  c.model_config.transducer.encoder =
+      config.model_config.transducer.encoder.c_str();
+  c.model_config.transducer.decoder =
+      config.model_config.transducer.decoder.c_str();
+  c.model_config.transducer.joiner =
+      config.model_config.transducer.joiner.c_str();
+
+  c.model_config.paraformer.encoder =
+      config.model_config.paraformer.encoder.c_str();
+  c.model_config.paraformer.decoder =
+      config.model_config.paraformer.decoder.c_str();
+
+  c.model_config.zipformer2_ctc.model =
+      config.model_config.zipformer2_ctc.model.c_str();
+
+  c.model_config.tokens = config.model_config.tokens.c_str();
+  c.model_config.num_threads = config.model_config.num_threads;
+  c.model_config.provider = config.model_config.provider.c_str();
+  c.model_config.debug = config.model_config.debug;
+  c.model_config.model_type = config.model_config.model_type.c_str();
+  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
+  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
+  c.model_config.tokens_buf = config.model_config.tokens_buf.c_str();
+  c.model_config.tokens_buf_size = config.model_config.tokens_buf.size();
+
+  c.decoding_method = config.decoding_method.c_str();
+  c.max_active_paths = config.max_active_paths;
+  c.enable_endpoint = config.enable_endpoint;
+  c.rule1_min_trailing_silence = config.rule1_min_trailing_silence;
+  c.rule2_min_trailing_silence = config.rule2_min_trailing_silence;
+  c.rule3_min_utterance_length = config.rule3_min_utterance_length;
+  c.hotwords_file = config.hotwords_file.c_str();
+  c.hotwords_score = config.hotwords_score;
+
+  c.ctc_fst_decoder_config.graph = config.ctc_fst_decoder_config.graph.c_str();
+  c.ctc_fst_decoder_config.max_active =
+      config.ctc_fst_decoder_config.max_active;
+
+  c.rule_fsts = config.rule_fsts.c_str();
+  c.rule_fars = config.rule_fars.c_str();
+
+  c.blank_penalty = config.blank_penalty;
+
+  c.hotwords_buf = config.hotwords_buf.c_str();
+  c.hotwords_buf_size = config.hotwords_buf.size();
+
+  auto p = SherpaOnnxCreateOnlineRecognizer(&c);
+  return OnlineRecognizer(p);
+}
+
+OnlineRecognizer::OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p)
+    : MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer>(p) {}
+
+void OnlineRecognizer::Destroy(const SherpaOnnxOnlineRecognizer *p) const {
+  SherpaOnnxDestroyOnlineRecognizer(p);
+}
+
+OnlineStream OnlineRecognizer::CreateStream() const {
+  auto s = SherpaOnnxCreateOnlineStream(p_);
+  return OnlineStream{s};
+}
+
+OnlineStream OnlineRecognizer::CreateStream(const std::string &hotwords) const {
+  auto s = SherpaOnnxCreateOnlineStreamWithHotwords(p_, hotwords.c_str());
+  return OnlineStream{s};
+}
+
+bool OnlineRecognizer::IsReady(const OnlineStream *s) const {
+  return SherpaOnnxIsOnlineStreamReady(p_, s->Get());
+}
+
+void OnlineRecognizer::Decode(const OnlineStream *s) const {
+  SherpaOnnxDecodeOnlineStream(p_, s->Get());
+}
+
+void OnlineRecognizer::Reset(const OnlineStream *s) const {
+  SherpaOnnxOnlineStreamReset(p_, s->Get());
+}
+
+bool OnlineRecognizer::IsEndpoint(const OnlineStream *s) const {
+  return SherpaOnnxOnlineStreamIsEndpoint(p_, s->Get());
+}
+
+void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const {
+  if (n <= 0) {
+    return;
+  }
+
+  std::vector<const SherpaOnnxOnlineStream *> streams(n);
+  for (int32_t i = 0; i != n; ++n) {
+    streams[i] = ss[i].Get();
+  }
+
+  SherpaOnnxDecodeMultipleOnlineStreams(p_, streams.data(), n);
+}
+
+OnlineRecognizerResult OnlineRecognizer::GetResult(
+    const OnlineStream *s) const {
+  auto r = SherpaOnnxGetOnlineStreamResult(p_, s->Get());
+
+  OnlineRecognizerResult ans;
+  ans.text = r->text;
+
+  ans.tokens.resize(r->count);
+  for (int32_t i = 0; i != r->count; ++i) {
+    ans.tokens[i] = r->tokens_arr[i];
+  }
+
+  if (r->timestamps) {
+    ans.timestamps.resize(r->count);
+    std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
+  }
+
+  ans.json = r->json;
+
+  SherpaOnnxDestroyOnlineRecognizerResult(r);
+
+  return ans;
+}
+
+// ============================================================================
+// Non-streaming ASR
+// ============================================================================
+OfflineStream::OfflineStream(const SherpaOnnxOfflineStream *p)
+    : MoveOnly<OfflineStream, SherpaOnnxOfflineStream>(p) {}
+
+void OfflineStream::Destroy(const SherpaOnnxOfflineStream *p) const {
+  SherpaOnnxDestroyOfflineStream(p);
+}
+
+void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
+                                   int32_t n) const {
+  SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n);
+}
+
+OfflineRecognizer OfflineRecognizer::Create(
+    const OfflineRecognizerConfig &config) {
+  struct SherpaOnnxOfflineRecognizerConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.feat_config.sample_rate = config.feat_config.sample_rate;
+  c.feat_config.feature_dim = config.feat_config.feature_dim;
+  c.model_config.transducer.encoder =
+      config.model_config.transducer.encoder.c_str();
+  c.model_config.transducer.decoder =
+      config.model_config.transducer.decoder.c_str();
+  c.model_config.transducer.joiner =
+      config.model_config.transducer.joiner.c_str();
+
+  c.model_config.paraformer.model =
+      config.model_config.paraformer.model.c_str();
+
+  c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
+
+  c.model_config.whisper.encoder = config.model_config.whisper.encoder.c_str();
+  c.model_config.whisper.decoder = config.model_config.whisper.decoder.c_str();
+  c.model_config.whisper.language =
+      config.model_config.whisper.language.c_str();
+  c.model_config.whisper.task = config.model_config.whisper.task.c_str();
+  c.model_config.whisper.tail_paddings =
+      config.model_config.whisper.tail_paddings;
+
+  c.model_config.tdnn.model = config.model_config.tdnn.model.c_str();
+
+  c.model_config.tokens = config.model_config.tokens.c_str();
+  c.model_config.num_threads = config.model_config.num_threads;
+  c.model_config.debug = config.model_config.debug;
+  c.model_config.provider = config.model_config.provider.c_str();
+  c.model_config.model_type = config.model_config.model_type.c_str();
+  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
+  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
+  c.model_config.telespeech_ctc = config.model_config.telespeech_ctc.c_str();
+
+  c.model_config.sense_voice.model =
+      config.model_config.sense_voice.model.c_str();
+  c.model_config.sense_voice.language =
+      config.model_config.sense_voice.language.c_str();
+  c.model_config.sense_voice.use_itn = config.model_config.sense_voice.use_itn;
+
+  c.model_config.moonshine.preprocessor =
+      config.model_config.moonshine.preprocessor.c_str();
+  c.model_config.moonshine.encoder =
+      config.model_config.moonshine.encoder.c_str();
+  c.model_config.moonshine.uncached_decoder =
+      config.model_config.moonshine.uncached_decoder.c_str();
+  c.model_config.moonshine.cached_decoder =
+      config.model_config.moonshine.cached_decoder.c_str();
+
+  c.lm_config.model = config.lm_config.model.c_str();
+  c.lm_config.scale = config.lm_config.scale;
+
+  c.decoding_method = config.decoding_method.c_str();
+  c.max_active_paths = config.max_active_paths;
+  c.hotwords_file = config.hotwords_file.c_str();
+  c.hotwords_score = config.hotwords_score;
+
+  c.rule_fsts = config.rule_fsts.c_str();
+  c.rule_fars = config.rule_fars.c_str();
+
+  c.blank_penalty = config.blank_penalty;
+
+  auto p = SherpaOnnxCreateOfflineRecognizer(&c);
+  return OfflineRecognizer(p);
+}
+
+OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p)
+    : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {}
+
+void OfflineRecognizer::Destroy(const SherpaOnnxOfflineRecognizer *p) const {
+  SherpaOnnxDestroyOfflineRecognizer(p_);
+}
+
+OfflineStream OfflineRecognizer::CreateStream() const {
+  auto p = SherpaOnnxCreateOfflineStream(p_);
+  return OfflineStream{p};
+}
+
+void OfflineRecognizer::Decode(const OfflineStream *s) const {
+  SherpaOnnxDecodeOfflineStream(p_, s->Get());
+}
+
+void OfflineRecognizer::Decode(const OfflineStream *ss, int32_t n) const {
+  if (n <= 0) {
+    return;
+  }
+
+  std::vector<const SherpaOnnxOfflineStream *> streams(n);
+  for (int32_t i = 0; i != n; ++i) {
+    streams[i] = ss[i].Get();
+  }
+
+  SherpaOnnxDecodeMultipleOfflineStreams(p_, streams.data(), n);
+}
+
+OfflineRecognizerResult OfflineRecognizer::GetResult(
+    const OfflineStream *s) const {
+  auto r = SherpaOnnxGetOfflineStreamResult(s->Get());
+
+  OfflineRecognizerResult ans;
+  if (r) {
+    ans.text = r->text;
+
+    if (r->timestamps) {
+      ans.timestamps.resize(r->count);
+      std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
+    }
+
+    ans.tokens.resize(r->count);
+    for (int32_t i = 0; i != r->count; ++i) {
+      ans.tokens[i] = r->tokens_arr[i];
+    }
+
+    ans.json = r->json;
+    ans.lang = r->lang ? r->lang : "";
+    ans.emotion = r->emotion ? r->emotion : "";
+    ans.event = r->event ? r->event : "";
+  }
+
+  SherpaOnnxDestroyOfflineRecognizerResult(r);
+
+  return ans;
+}
+
+OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) {
+  struct SherpaOnnxOfflineTtsConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.model.vits.model = config.model.vits.model.c_str();
+  c.model.vits.lexicon = config.model.vits.lexicon.c_str();
+  c.model.vits.tokens = config.model.vits.tokens.c_str();
+  c.model.vits.data_dir = config.model.vits.data_dir.c_str();
+  c.model.vits.noise_scale = config.model.vits.noise_scale;
+  c.model.vits.noise_scale_w = config.model.vits.noise_scale_w;
+  c.model.vits.length_scale = config.model.vits.length_scale;
+  c.model.vits.dict_dir = config.model.vits.dict_dir.c_str();
+
+  c.model.matcha.acoustic_model = config.model.matcha.acoustic_model.c_str();
+  c.model.matcha.vocoder = config.model.matcha.vocoder.c_str();
+  c.model.matcha.lexicon = config.model.matcha.lexicon.c_str();
+  c.model.matcha.tokens = config.model.matcha.tokens.c_str();
+  c.model.matcha.data_dir = config.model.matcha.data_dir.c_str();
+  c.model.matcha.noise_scale = config.model.matcha.noise_scale;
+  c.model.matcha.length_scale = config.model.matcha.length_scale;
+  c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str();
+
+  c.model.kokoro.model = config.model.kokoro.model.c_str();
+  c.model.kokoro.voices = config.model.kokoro.voices.c_str();
+  c.model.kokoro.tokens = config.model.kokoro.tokens.c_str();
+  c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str();
+  c.model.kokoro.length_scale = config.model.kokoro.length_scale;
+  c.model.kokoro.dict_dir = config.model.kokoro.dict_dir.c_str();
+  c.model.kokoro.lexicon = config.model.kokoro.lexicon.c_str();
+
+  c.model.num_threads = config.model.num_threads;
+  c.model.debug = config.model.debug;
+  c.model.provider = config.model.provider.c_str();
+
+  c.rule_fsts = config.rule_fsts.c_str();
+  c.max_num_sentences = config.max_num_sentences;
+  c.rule_fars = config.rule_fars.c_str();
+
+  auto p = SherpaOnnxCreateOfflineTts(&c);
+  return OfflineTts(p);
+}
+
+OfflineTts::OfflineTts(const SherpaOnnxOfflineTts *p)
+    : MoveOnly<OfflineTts, SherpaOnnxOfflineTts>(p) {}
+
+void OfflineTts::Destroy(const SherpaOnnxOfflineTts *p) const {
+  SherpaOnnxDestroyOfflineTts(p);
+}
+
+int32_t OfflineTts::SampleRate() const {
+  return SherpaOnnxOfflineTtsSampleRate(p_);
+}
+
+int32_t OfflineTts::NumSpeakers() const {
+  return SherpaOnnxOfflineTtsNumSpeakers(p_);
+}
+
+GeneratedAudio OfflineTts::Generate(const std::string &text,
+                                    int32_t sid /*= 0*/, float speed /*= 1.0*/,
+                                    OfflineTtsCallback callback /*= nullptr*/,
+                                    void *arg /*= nullptr*/) const {
+  const SherpaOnnxGeneratedAudio *audio;
+  if (!callback) {
+    audio = SherpaOnnxOfflineTtsGenerate(p_, text.c_str(), sid, speed);
+  } else {
+    audio = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
+        p_, text.c_str(), sid, speed, callback, arg);
+  }
+
+  GeneratedAudio ans;
+  ans.samples = std::vector<float>{audio->samples, audio->samples + audio->n};
+  ans.sample_rate = audio->sample_rate;
+
+  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+  return ans;
+}
+
+KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) {
+  struct SherpaOnnxKeywordSpotterConfig c;
+  memset(&c, 0, sizeof(c));
+
+  c.feat_config.sample_rate = config.feat_config.sample_rate;
+
+  c.model_config.transducer.encoder =
+      config.model_config.transducer.encoder.c_str();
+  c.model_config.transducer.decoder =
+      config.model_config.transducer.decoder.c_str();
+  c.model_config.transducer.joiner =
+      config.model_config.transducer.joiner.c_str();
+  c.feat_config.feature_dim = config.feat_config.feature_dim;
+
+  c.model_config.paraformer.encoder =
+      config.model_config.paraformer.encoder.c_str();
+  c.model_config.paraformer.decoder =
+      config.model_config.paraformer.decoder.c_str();
+
+  c.model_config.zipformer2_ctc.model =
+      config.model_config.zipformer2_ctc.model.c_str();
+
+  c.model_config.tokens = config.model_config.tokens.c_str();
+  c.model_config.num_threads = config.model_config.num_threads;
+  c.model_config.provider = config.model_config.provider.c_str();
+  c.model_config.debug = config.model_config.debug;
+  c.model_config.model_type = config.model_config.model_type.c_str();
+  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
+  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
+  c.model_config.tokens_buf = config.model_config.tokens_buf.c_str();
+  c.model_config.tokens_buf_size = config.model_config.tokens_buf.size();
+
+  c.max_active_paths = config.max_active_paths;
+  c.num_trailing_blanks = config.num_trailing_blanks;
+  c.keywords_score = config.keywords_score;
+  c.keywords_threshold = config.keywords_threshold;
+  c.keywords_file = config.keywords_file.c_str();
+
+  auto p = SherpaOnnxCreateKeywordSpotter(&c);
+  return KeywordSpotter(p);
+}
+
+KeywordSpotter::KeywordSpotter(const SherpaOnnxKeywordSpotter *p)
+    : MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter>(p) {}
+
+void KeywordSpotter::Destroy(const SherpaOnnxKeywordSpotter *p) const {
+  SherpaOnnxDestroyKeywordSpotter(p);
+}
+
+OnlineStream KeywordSpotter::CreateStream() const {
+  auto s = SherpaOnnxCreateKeywordStream(p_);
+  return OnlineStream{s};
+}
+
+OnlineStream KeywordSpotter::CreateStream(const std::string &keywords) const {
+  auto s = SherpaOnnxCreateKeywordStreamWithKeywords(p_, keywords.c_str());
+  return OnlineStream{s};
+}
+
+bool KeywordSpotter::IsReady(const OnlineStream *s) const {
+  return SherpaOnnxIsKeywordStreamReady(p_, s->Get());
+}
+
+void KeywordSpotter::Decode(const OnlineStream *s) const {
+  return SherpaOnnxDecodeKeywordStream(p_, s->Get());
+}
+
+void KeywordSpotter::Decode(const OnlineStream *ss, int32_t n) const {
+  if (n <= 0) {
+    return;
+  }
+
+  std::vector<const SherpaOnnxOnlineStream *> streams(n);
+  for (int32_t i = 0; i != n; ++n) {
+    streams[i] = ss[i].Get();
+  }
+
+  SherpaOnnxDecodeMultipleKeywordStreams(p_, streams.data(), n);
+}
+
+KeywordResult KeywordSpotter::GetResult(const OnlineStream *s) const {
+  auto r = SherpaOnnxGetKeywordResult(p_, s->Get());
+
+  KeywordResult ans;
+  ans.keyword = r->keyword;
+
+  ans.tokens.resize(r->count);
+  for (int32_t i = 0; i < r->count; ++i) {
+    ans.tokens[i] = r->tokens_arr[i];
+  }
+
+  if (r->timestamps) {
+    ans.timestamps.resize(r->count);
+    std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
+  }
+
+  ans.start_time = r->start_time;
+  ans.json = r->json;
+
+  SherpaOnnxDestroyKeywordResult(r);
+
+  return ans;
+}
+
+void KeywordSpotter::Reset(const OnlineStream *s) const {
+  SherpaOnnxResetKeywordStream(p_, s->Get());
+}
+
+}  // namespace sherpa_onnx::cxx
diff --git a/sherpa-onnx/c-api/cxx-api.h b/sherpa-onnx/c-api/cxx-api.h
new file mode 100644
index 0000000000..66133f6d29
--- /dev/null
+++ b/sherpa-onnx/c-api/cxx-api.h
@@ -0,0 +1,460 @@
+// sherpa-onnx/c-api/cxx-api.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+// C++ Wrapper of the C API for sherpa-onnx
+#ifndef SHERPA_ONNX_C_API_CXX_API_H_
+#define SHERPA_ONNX_C_API_CXX_API_H_
+
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+namespace sherpa_onnx::cxx {
+
+// ============================================================================
+// Streaming ASR
+// ============================================================================
+struct OnlineTransducerModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string joiner;
+};
+
+struct OnlineParaformerModelConfig {
+  std::string encoder;
+  std::string decoder;
+};
+
+struct OnlineZipformer2CtcModelConfig {
+  std::string model;
+};
+
+struct OnlineModelConfig {
+  OnlineTransducerModelConfig transducer;
+  OnlineParaformerModelConfig paraformer;
+  OnlineZipformer2CtcModelConfig zipformer2_ctc;
+  std::string tokens;
+  int32_t num_threads = 1;
+  std::string provider = "cpu";
+  bool debug = false;
+  std::string model_type;
+  std::string modeling_unit = "cjkchar";
+  std::string bpe_vocab;
+  std::string tokens_buf;
+};
+
+struct FeatureConfig {
+  int32_t sample_rate = 16000;
+  int32_t feature_dim = 80;
+};
+
+struct OnlineCtcFstDecoderConfig {
+  std::string graph;
+  int32_t max_active = 3000;
+};
+
+struct OnlineRecognizerConfig {
+  FeatureConfig feat_config;
+  OnlineModelConfig model_config;
+
+  std::string decoding_method = "greedy_search";
+
+  int32_t max_active_paths = 4;
+
+  bool enable_endpoint = false;
+
+  float rule1_min_trailing_silence = 2.4;
+
+  float rule2_min_trailing_silence = 1.2;
+
+  float rule3_min_utterance_length = 20;
+
+  std::string hotwords_file;
+
+  float hotwords_score = 1.5;
+
+  OnlineCtcFstDecoderConfig ctc_fst_decoder_config;
+  std::string rule_fsts;
+  std::string rule_fars;
+  float blank_penalty = 0;
+
+  std::string hotwords_buf;
+};
+
+struct OnlineRecognizerResult {
+  std::string text;
+  std::vector<std::string> tokens;
+  std::vector<float> timestamps;
+  std::string json;
+};
+
+struct Wave {
+  std::vector<float> samples;
+  int32_t sample_rate;
+};
+
+SHERPA_ONNX_API Wave ReadWave(const std::string &filename);
+
+// Return true on success;
+// Return false on failure
+SHERPA_ONNX_API bool WriteWave(const std::string &filename, const Wave &wave);
+
+template <typename Derived, typename T>
+class SHERPA_ONNX_API MoveOnly {
+ public:
+  explicit MoveOnly(const T *p) : p_(p) {}
+
+  ~MoveOnly() { Destroy(); }
+
+  MoveOnly(const MoveOnly &) = delete;
+
+  MoveOnly &operator=(const MoveOnly &) = delete;
+
+  MoveOnly(MoveOnly &&other) : p_(other.Release()) {}
+
+  MoveOnly &operator=(MoveOnly &&other) {
+    if (&other == this) {
+      return *this;
+    }
+
+    Destroy();
+
+    p_ = other.Release();
+
+    return *this;
+  }
+
+  const T *Get() const { return p_; }
+
+  const T *Release() {
+    const T *p = p_;
+    p_ = nullptr;
+    return p;
+  }
+
+ private:
+  void Destroy() {
+    if (p_ == nullptr) {
+      return;
+    }
+
+    static_cast<Derived *>(this)->Destroy(p_);
+
+    p_ = nullptr;
+  }
+
+ protected:
+  const T *p_ = nullptr;
+};
+
+class SHERPA_ONNX_API OnlineStream
+    : public MoveOnly<OnlineStream, SherpaOnnxOnlineStream> {
+ public:
+  explicit OnlineStream(const SherpaOnnxOnlineStream *p);
+
+  void AcceptWaveform(int32_t sample_rate, const float *samples,
+                      int32_t n) const;
+
+  void InputFinished() const;
+
+  void Destroy(const SherpaOnnxOnlineStream *p) const;
+};
+
+class SHERPA_ONNX_API OnlineRecognizer
+    : public MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer> {
+ public:
+  static OnlineRecognizer Create(const OnlineRecognizerConfig &config);
+
+  void Destroy(const SherpaOnnxOnlineRecognizer *p) const;
+
+  OnlineStream CreateStream() const;
+
+  OnlineStream CreateStream(const std::string &hotwords) const;
+
+  bool IsReady(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *ss, int32_t n) const;
+
+  OnlineRecognizerResult GetResult(const OnlineStream *s) const;
+
+  void Reset(const OnlineStream *s) const;
+
+  bool IsEndpoint(const OnlineStream *s) const;
+
+ private:
+  explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p);
+};
+
+// ============================================================================
+// Non-streaming ASR
+// ============================================================================
+struct SHERPA_ONNX_API OfflineTransducerModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string joiner;
+};
+
+struct SHERPA_ONNX_API OfflineParaformerModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineNemoEncDecCtcModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineWhisperModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string language;
+  std::string task = "transcribe";
+  int32_t tail_paddings = -1;
+};
+
+struct SHERPA_ONNX_API OfflineTdnnModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig {
+  std::string model;
+  std::string language;
+  bool use_itn = false;
+};
+
+struct SHERPA_ONNX_API OfflineMoonshineModelConfig {
+  std::string preprocessor;
+  std::string encoder;
+  std::string uncached_decoder;
+  std::string cached_decoder;
+};
+
+struct SHERPA_ONNX_API OfflineModelConfig {
+  OfflineTransducerModelConfig transducer;
+  OfflineParaformerModelConfig paraformer;
+  OfflineNemoEncDecCtcModelConfig nemo_ctc;
+  OfflineWhisperModelConfig whisper;
+  OfflineTdnnModelConfig tdnn;
+
+  std::string tokens;
+  int32_t num_threads = 1;
+  bool debug = false;
+  std::string provider = "cpu";
+  std::string model_type;
+  std::string modeling_unit = "cjkchar";
+  std::string bpe_vocab;
+  std::string telespeech_ctc;
+  OfflineSenseVoiceModelConfig sense_voice;
+  OfflineMoonshineModelConfig moonshine;
+};
+
+struct SHERPA_ONNX_API OfflineLMConfig {
+  std::string model;
+  float scale = 1.0;
+};
+
+struct SHERPA_ONNX_API OfflineRecognizerConfig {
+  FeatureConfig feat_config;
+  OfflineModelConfig model_config;
+  OfflineLMConfig lm_config;
+
+  std::string decoding_method = "greedy_search";
+  int32_t max_active_paths = 4;
+
+  std::string hotwords_file;
+
+  float hotwords_score = 1.5;
+  std::string rule_fsts;
+  std::string rule_fars;
+  float blank_penalty = 0;
+};
+
+struct SHERPA_ONNX_API OfflineRecognizerResult {
+  std::string text;
+  std::vector<float> timestamps;
+  std::vector<std::string> tokens;
+  std::string json;
+  std::string lang;
+  std::string emotion;
+  std::string event;
+};
+
+class SHERPA_ONNX_API OfflineStream
+    : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> {
+ public:
+  explicit OfflineStream(const SherpaOnnxOfflineStream *p);
+
+  void AcceptWaveform(int32_t sample_rate, const float *samples,
+                      int32_t n) const;
+
+  void Destroy(const SherpaOnnxOfflineStream *p) const;
+};
+
+class SHERPA_ONNX_API OfflineRecognizer
+    : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> {
+ public:
+  static OfflineRecognizer Create(const OfflineRecognizerConfig &config);
+
+  void Destroy(const SherpaOnnxOfflineRecognizer *p) const;
+
+  OfflineStream CreateStream() const;
+
+  void Decode(const OfflineStream *s) const;
+
+  void Decode(const OfflineStream *ss, int32_t n) const;
+
+  OfflineRecognizerResult GetResult(const OfflineStream *s) const;
+
+ private:
+  explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
+};
+
+// ============================================================================
+// Non-streaming TTS
+// ============================================================================
+struct OfflineTtsVitsModelConfig {
+  std::string model;
+  std::string lexicon;
+  std::string tokens;
+  std::string data_dir;
+  std::string dict_dir;
+
+  float noise_scale = 0.667;
+  float noise_scale_w = 0.8;
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsMatchaModelConfig {
+  std::string acoustic_model;
+  std::string vocoder;
+  std::string lexicon;
+  std::string tokens;
+  std::string data_dir;
+  std::string dict_dir;
+
+  float noise_scale = 0.667;
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsKokoroModelConfig {
+  std::string model;
+  std::string voices;
+  std::string tokens;
+  std::string data_dir;
+  std::string dict_dir;
+  std::string lexicon;
+
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsModelConfig {
+  OfflineTtsVitsModelConfig vits;
+  OfflineTtsMatchaModelConfig matcha;
+  OfflineTtsKokoroModelConfig kokoro;
+  int32_t num_threads = 1;
+  bool debug = false;
+  std::string provider = "cpu";
+};
+
+struct OfflineTtsConfig {
+  OfflineTtsModelConfig model;
+  std::string rule_fsts;
+  std::string rule_fars;
+  int32_t max_num_sentences = 1;
+};
+
+struct GeneratedAudio {
+  std::vector<float> samples;  // in the range [-1, 1]
+  int32_t sample_rate;
+};
+
+// Return 1 to continue generating
+// Return 0 to stop generating
+using OfflineTtsCallback = int32_t (*)(const float *samples,
+                                       int32_t num_samples, float progress,
+                                       void *arg);
+
+class SHERPA_ONNX_API OfflineTts
+    : public MoveOnly<OfflineTts, SherpaOnnxOfflineTts> {
+ public:
+  static OfflineTts Create(const OfflineTtsConfig &config);
+
+  void Destroy(const SherpaOnnxOfflineTts *p) const;
+
+  // Return the sample rate of the generated audio
+  int32_t SampleRate() const;
+
+  // Number of supported speakers.
+  // If it supports only a single speaker, then it return 0 or 1.
+  int32_t NumSpeakers() const;
+
+  // @param text A string containing words separated by spaces
+  // @param sid Speaker ID. Used only for multi-speaker models, e.g., models
+  //            trained using the VCTK dataset. It is not used for
+  //            single-speaker models, e.g., models trained using the ljspeech
+  //            dataset.
+  // @param speed The speed for the generated speech. E.g., 2 means 2x faster.
+  // @param callback If not NULL, it is called whenever config.max_num_sentences
+  //                 sentences have been processed. The callback is called in
+  //                 the current thread.
+  GeneratedAudio Generate(const std::string &text, int32_t sid = 0,
+                          float speed = 1.0,
+                          OfflineTtsCallback callback = nullptr,
+                          void *arg = nullptr) const;
+
+ private:
+  explicit OfflineTts(const SherpaOnnxOfflineTts *p);
+};
+
+// ============================================================
+// For Keyword Spotter
+// ============================================================
+
+struct KeywordResult {
+  std::string keyword;
+  std::vector<std::string> tokens;
+  std::vector<float> timestamps;
+  float start_time;
+  std::string json;
+};
+
+struct KeywordSpotterConfig {
+  FeatureConfig feat_config;
+  OnlineModelConfig model_config;
+  int32_t max_active_paths = 4;
+  int32_t num_trailing_blanks = 1;
+  float keywords_score = 1.0f;
+  float keywords_threshold = 0.25f;
+  std::string keywords_file;
+};
+
+class SHERPA_ONNX_API KeywordSpotter
+    : public MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter> {
+ public:
+  static KeywordSpotter Create(const KeywordSpotterConfig &config);
+
+  void Destroy(const SherpaOnnxKeywordSpotter *p) const;
+
+  OnlineStream CreateStream() const;
+
+  OnlineStream CreateStream(const std::string &keywords) const;
+
+  bool IsReady(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *ss, int32_t n) const;
+
+  void Reset(const OnlineStream *s) const;
+
+  KeywordResult GetResult(const OnlineStream *s) const;
+
+ private:
+  explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
+};
+
+}  // namespace sherpa_onnx::cxx
+
+#endif  // SHERPA_ONNX_C_API_CXX_API_H_
diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt
index e49fdeed4b..4976f58fb1 100644
--- a/sherpa-onnx/csrc/CMakeLists.txt
+++ b/sherpa-onnx/csrc/CMakeLists.txt
@@ -12,12 +12,14 @@ endif()
 
 set(sources
   base64-decode.cc
+  bbpe.cc
   cat.cc
   circular-buffer.cc
   context-graph.cc
   endpoint.cc
   features.cc
   file-utils.cc
+  fst-utils.cc
   hypothesis.cc
   keyword-spotter-impl.cc
   keyword-spotter.cc
@@ -28,6 +30,9 @@ set(sources
   offline-lm-config.cc
   offline-lm.cc
   offline-model-config.cc
+  offline-moonshine-greedy-search-decoder.cc
+  offline-moonshine-model-config.cc
+  offline-moonshine-model.cc
   offline-nemo-enc-dec-ctc-model-config.cc
   offline-nemo-enc-dec-ctc-model.cc
   offline-paraformer-greedy-search-decoder.cc
@@ -74,11 +79,11 @@ set(sources
   online-stream.cc
   online-transducer-decoder.cc
   online-transducer-greedy-search-decoder.cc
+  online-transducer-greedy-search-nemo-decoder.cc
   online-transducer-model-config.cc
   online-transducer-model.cc
   online-transducer-modified-beam-search-decoder.cc
   online-transducer-nemo-model.cc
-  online-transducer-greedy-search-nemo-decoder.cc
   online-wenet-ctc-model-config.cc
   online-wenet-ctc-model.cc
   online-zipformer-transducer-model.cc
@@ -146,12 +151,18 @@ list(APPEND sources
 
 if(SHERPA_ONNX_ENABLE_TTS)
   list(APPEND sources
+    hifigan-vocoder.cc
     jieba-lexicon.cc
+    kokoro-multi-lang-lexicon.cc
     lexicon.cc
     melo-tts-lexicon.cc
     offline-tts-character-frontend.cc
     offline-tts-frontend.cc
     offline-tts-impl.cc
+    offline-tts-kokoro-model-config.cc
+    offline-tts-kokoro-model.cc
+    offline-tts-matcha-model-config.cc
+    offline-tts-matcha-model.cc
     offline-tts-model-config.cc
     offline-tts-vits-model-config.cc
     offline-tts-vits-model.cc
@@ -164,6 +175,12 @@ if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
   list(APPEND sources
     fast-clustering-config.cc
     fast-clustering.cc
+    offline-speaker-diarization-impl.cc
+    offline-speaker-diarization-result.cc
+    offline-speaker-diarization.cc
+    offline-speaker-segmentation-model-config.cc
+    offline-speaker-segmentation-pyannote-model-config.cc
+    offline-speaker-segmentation-pyannote-model.cc
   )
 endif()
 
@@ -197,6 +214,12 @@ target_link_libraries(sherpa-onnx-core
   kaldi-decoder-core
   ssentencepiece_core
 )
+if(DEFINED OHOS AND x${OHOS} STREQUAL xOHOS)
+  target_link_libraries(sherpa-onnx-core
+    hilog_ndk.z
+    rawfile.z
+  )
+endif()
 
 if(SHERPA_ONNX_ENABLE_GPU)
   target_link_libraries(sherpa-onnx-core
@@ -204,7 +227,7 @@ if(SHERPA_ONNX_ENABLE_GPU)
   )
 endif()
 
-if(BUILD_SHARED_LIBS)
+if(BUILD_SHARED_LIBS AND NOT DEFINED onnxruntime_lib_files)
   target_link_libraries(sherpa-onnx-core onnxruntime)
 else()
   target_link_libraries(sherpa-onnx-core ${onnxruntime_lib_files})
@@ -260,6 +283,10 @@ if(SHERPA_ONNX_ENABLE_BINARY)
     add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
   endif()
 
+  if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+    add_executable(sherpa-onnx-offline-speaker-diarization sherpa-onnx-offline-speaker-diarization.cc)
+  endif()
+
   set(main_exes
     sherpa-onnx
     sherpa-onnx-keyword-spotter
@@ -276,6 +303,12 @@ if(SHERPA_ONNX_ENABLE_BINARY)
     )
   endif()
 
+  if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+    list(APPEND main_exes
+      sherpa-onnx-offline-speaker-diarization
+    )
+  endif()
+
   foreach(exe IN LISTS main_exes)
     target_link_libraries(${exe} sherpa-onnx-core)
   endforeach()
@@ -518,6 +551,7 @@ if(SHERPA_ONNX_ENABLE_TESTS)
     pad-sequence-test.cc
     slice-test.cc
     stack-test.cc
+    text-utils-test.cc
     text2token-test.cc
     transpose-test.cc
     unbind-test.cc
diff --git a/sherpa-onnx/csrc/bbpe.cc b/sherpa-onnx/csrc/bbpe.cc
new file mode 100644
index 0000000000..1aa67ffa82
--- /dev/null
+++ b/sherpa-onnx/csrc/bbpe.cc
@@ -0,0 +1,61 @@
+// sherpa-onnx/csrc/bbpe.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+// Auto-generated! DO NOT EDIT
+
+#include "sherpa-onnx/csrc/bbpe.h"
+
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+
+const std::unordered_map<std::string, uint8_t> &GetByteBpeTable() {
+  static const std::unordered_map<std::string, uint8_t> table = {
+      {"Ā", 0},   {"ā", 1},   {"Ă", 2},   {"ă", 3},   {"Ą", 4},   {"ą", 5},
+      {"Ć", 6},   {"ć", 7},   {"Ĉ", 8},   {"ĉ", 9},   {"Ċ", 10},  {"ċ", 11},
+      {"Č", 12},  {"č", 13},  {"Ď", 14},  {"ď", 15},  {"Đ", 16},  {"đ", 17},
+      {"Ē", 18},  {"ē", 19},  {"Ĕ", 20},  {"ĕ", 21},  {"Ė", 22},  {"ė", 23},
+      {"Ę", 24},  {"ę", 25},  {"Ě", 26},  {"ě", 27},  {"Ĝ", 28},  {"ĝ", 29},
+      {"Ğ", 30},  {"ğ", 31},  {" ", 32},  {"!", 33},  {"\"", 34}, {"#", 35},
+      {"$", 36},  {"%", 37},  {"&", 38},  {"'", 39},  {"(", 40},  {")", 41},
+      {"*", 42},  {"+", 43},  {",", 44},  {"-", 45},  {".", 46},  {"/", 47},
+      {"0", 48},  {"1", 49},  {"2", 50},  {"3", 51},  {"4", 52},  {"5", 53},
+      {"6", 54},  {"7", 55},  {"8", 56},  {"9", 57},  {":", 58},  {";", 59},
+      {"<", 60},  {"=", 61},  {">", 62},  {"?", 63},  {"@", 64},  {"A", 65},
+      {"B", 66},  {"C", 67},  {"D", 68},  {"E", 69},  {"F", 70},  {"G", 71},
+      {"H", 72},  {"I", 73},  {"J", 74},  {"K", 75},  {"L", 76},  {"M", 77},
+      {"N", 78},  {"O", 79},  {"P", 80},  {"Q", 81},  {"R", 82},  {"S", 83},
+      {"T", 84},  {"U", 85},  {"V", 86},  {"W", 87},  {"X", 88},  {"Y", 89},
+      {"Z", 90},  {"[", 91},  {"\\", 92}, {"]", 93},  {"^", 94},  {"_", 95},
+      {"`", 96},  {"a", 97},  {"b", 98},  {"c", 99},  {"d", 100}, {"e", 101},
+      {"f", 102}, {"g", 103}, {"h", 104}, {"i", 105}, {"j", 106}, {"k", 107},
+      {"l", 108}, {"m", 109}, {"n", 110}, {"o", 111}, {"p", 112}, {"q", 113},
+      {"r", 114}, {"s", 115}, {"t", 116}, {"u", 117}, {"v", 118}, {"w", 119},
+      {"x", 120}, {"y", 121}, {"z", 122}, {"{", 123}, {"|", 124}, {"}", 125},
+      {"~", 126}, {"Ġ", 127}, {"ġ", 128}, {"Ģ", 129}, {"ģ", 130}, {"Ĥ", 131},
+      {"ĥ", 132}, {"Ħ", 133}, {"ħ", 134}, {"Ĩ", 135}, {"ĩ", 136}, {"Ī", 137},
+      {"ī", 138}, {"Ĭ", 139}, {"ĭ", 140}, {"Į", 141}, {"į", 142}, {"İ", 143},
+      {"ı", 144}, {"Ĵ", 145}, {"ĵ", 146}, {"Ķ", 147}, {"ķ", 148}, {"ĸ", 149},
+      {"Ĺ", 150}, {"ĺ", 151}, {"Ļ", 152}, {"ļ", 153}, {"Ľ", 154}, {"ľ", 155},
+      {"Ł", 156}, {"ł", 157}, {"Ń", 158}, {"ń", 159}, {"Ņ", 160}, {"ņ", 161},
+      {"Ň", 162}, {"ň", 163}, {"Ŋ", 164}, {"ŋ", 165}, {"Ō", 166}, {"ō", 167},
+      {"Ŏ", 168}, {"ŏ", 169}, {"Ő", 170}, {"ő", 171}, {"Œ", 172}, {"œ", 173},
+      {"Ŕ", 174}, {"ŕ", 175}, {"Ŗ", 176}, {"ŗ", 177}, {"Ř", 178}, {"ř", 179},
+      {"Ś", 180}, {"ś", 181}, {"Ŝ", 182}, {"ŝ", 183}, {"Ş", 184}, {"ş", 185},
+      {"Š", 186}, {"š", 187}, {"Ţ", 188}, {"ţ", 189}, {"Ť", 190}, {"ť", 191},
+      {"Ŧ", 192}, {"ŧ", 193}, {"Ũ", 194}, {"ũ", 195}, {"Ū", 196}, {"ū", 197},
+      {"Ŭ", 198}, {"ŭ", 199}, {"Ů", 200}, {"ů", 201}, {"Ű", 202}, {"ű", 203},
+      {"Ų", 204}, {"ų", 205}, {"Ŵ", 206}, {"ŵ", 207}, {"Ŷ", 208}, {"ŷ", 209},
+      {"Ÿ", 210}, {"Ź", 211}, {"ź", 212}, {"Ż", 213}, {"ż", 214}, {"Ž", 215},
+      {"ž", 216}, {"ƀ", 217}, {"Ɓ", 218}, {"Ƃ", 219}, {"ƃ", 220}, {"Ƅ", 221},
+      {"ƅ", 222}, {"Ɔ", 223}, {"Ƈ", 224}, {"ƈ", 225}, {"Ɖ", 226}, {"Ɗ", 227},
+      {"Ƌ", 228}, {"ƌ", 229}, {"ƍ", 230}, {"Ǝ", 231}, {"Ə", 232}, {"Ɛ", 233},
+      {"Ƒ", 234}, {"ƒ", 235}, {"Ɠ", 236}, {"Ɣ", 237}, {"ƕ", 238}, {"Ɩ", 239},
+      {"Ɨ", 240}, {"Ƙ", 241}, {"ƙ", 242}, {"ƚ", 243}, {"ƛ", 244}, {"Ɯ", 245},
+      {"Ɲ", 246}, {"ƞ", 247}, {"Ɵ", 248}, {"Ơ", 249}, {"ơ", 250}, {"Ƣ", 251},
+      {"ƣ", 252}, {"Ƥ", 253}, {"ƥ", 254}, {"Ʀ", 255}, {"⁇", 32},
+  };
+
+  return table;
+}
diff --git a/sherpa-onnx/csrc/bbpe.h b/sherpa-onnx/csrc/bbpe.h
new file mode 100644
index 0000000000..0b6a4ecf3c
--- /dev/null
+++ b/sherpa-onnx/csrc/bbpe.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/csrc/bbpe.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_BBPE_H_
+#define SHERPA_ONNX_CSRC_BBPE_H_
+#include <cstdint>
+#include <string>
+#include <unordered_map>
+
+// It is equivalent to the map BCHAR_TO_BYTE
+// from
+// https://github.com/k2-fsa/icefall/blob/master/icefall/byte_utils.py#L280
+const std::unordered_map<std::string, uint8_t> &GetByteBpeTable();
+
+#endif  // SHERPA_ONNX_CSRC_BBPE_H_
diff --git a/sherpa-onnx/csrc/circular-buffer.cc b/sherpa-onnx/csrc/circular-buffer.cc
index 2fd19cdfa1..2ba81807ba 100644
--- a/sherpa-onnx/csrc/circular-buffer.cc
+++ b/sherpa-onnx/csrc/circular-buffer.cc
@@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
 void CircularBuffer::Resize(int32_t new_capacity) {
   int32_t capacity = static_cast<int32_t>(buffer_.size());
   if (new_capacity <= capacity) {
+#if __OHOS__
+    SHERPA_ONNX_LOGE(
+        "new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.",
+        new_capacity, capacity);
+#else
     SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
                      new_capacity, capacity);
+#endif
     return;
   }
 
@@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) {
   int32_t size = Size();
   if (n + size > capacity) {
     int32_t new_capacity = std::max(capacity * 2, n + size);
+#if __OHOS__
+    SHERPA_ONNX_LOGE(
+        "Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, "
+        "capacity: %{public}d. Increase "
+        "capacity to: %{public}d. (Original data is copied. No data loss!)",
+        n, size, n + size, capacity, new_capacity);
+#else
     SHERPA_ONNX_LOGE(
         "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
-        "capacity to: %d",
+        "capacity to: %d. (Original data is copied. No data loss!)",
         n, size, n + size, capacity, new_capacity);
+#endif
     Resize(new_capacity);
 
     capacity = new_capacity;
diff --git a/sherpa-onnx/csrc/fast-clustering-config.cc b/sherpa-onnx/csrc/fast-clustering-config.cc
index e8382e5980..e4f64fbbb4 100644
--- a/sherpa-onnx/csrc/fast-clustering-config.cc
+++ b/sherpa-onnx/csrc/fast-clustering-config.cc
@@ -21,18 +21,16 @@ std::string FastClusteringConfig::ToString() const {
 }
 
 void FastClusteringConfig::Register(ParseOptions *po) {
-  std::string prefix = "ctc";
-  ParseOptions p(prefix, po);
-
-  p.Register("num-clusters", &num_clusters,
-             "Number of cluster. If greater than 0, then --cluster-thresold is "
-             "ignored. Please provide it if you know the actual number of "
-             "clusters in advance.");
-
-  p.Register("cluster-threshold", &threshold,
-             "If --num-clusters is not specified, then it specifies the "
-             "distance threshold for clustering. smaller value -> more "
-             "clusters. larger value -> fewer clusters");
+  po->Register(
+      "num-clusters", &num_clusters,
+      "Number of cluster. If greater than 0, then cluster threshold is "
+      "ignored. Please provide it if you know the actual number of "
+      "clusters in advance.");
+
+  po->Register("cluster-threshold", &threshold,
+               "If num_clusters is not specified, then it specifies the "
+               "distance threshold for clustering. smaller value -> more "
+               "clusters. larger value -> fewer clusters");
 }
 
 bool FastClusteringConfig::Validate() const {
diff --git a/sherpa-onnx/csrc/fast-clustering-config.h b/sherpa-onnx/csrc/fast-clustering-config.h
index 9b190d46bd..4abf2b128b 100644
--- a/sherpa-onnx/csrc/fast-clustering-config.h
+++ b/sherpa-onnx/csrc/fast-clustering-config.h
@@ -20,8 +20,8 @@ struct FastClusteringConfig {
 
   // distance threshold.
   //
-  // The lower, the more clusters it will generate.
-  // The higher, the fewer clusters it will generate.
+  // The smaller, the more clusters it will generate.
+  // The larger, the fewer clusters it will generate.
   float threshold = 0.5;
 
   FastClusteringConfig() = default;
diff --git a/sherpa-onnx/csrc/features.cc b/sherpa-onnx/csrc/features.cc
index ed806f3922..5b50d5f24d 100644
--- a/sherpa-onnx/csrc/features.cc
+++ b/sherpa-onnx/csrc/features.cc
@@ -193,6 +193,7 @@ class FeatureExtractor::Impl {
     opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
     opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
     opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
+    opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
     opts_.frame_opts.window_type = config_.window_type;
 
     opts_.mel_opts.num_bins = config_.feature_dim;
@@ -211,6 +212,7 @@ class FeatureExtractor::Impl {
     mfcc_opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
     mfcc_opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
     mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
+    mfcc_opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
     mfcc_opts_.frame_opts.window_type = config_.window_type;
 
     mfcc_opts_.mel_opts.num_bins = config_.feature_dim;
diff --git a/sherpa-onnx/csrc/features.h b/sherpa-onnx/csrc/features.h
index afbacd2ecb..fb5ff2fe35 100644
--- a/sherpa-onnx/csrc/features.h
+++ b/sherpa-onnx/csrc/features.h
@@ -57,6 +57,7 @@ struct FeatureExtractorConfig {
   float frame_length_ms = 25.0f;  // in milliseconds.
   bool is_librosa = false;
   bool remove_dc_offset = true;       // Subtract mean of wave before FFT.
+  float preemph_coeff = 0.97f;        // Preemphasis coefficient.
   std::string window_type = "povey";  // e.g. Hamming window
 
   // For models from NeMo
diff --git a/sherpa-onnx/csrc/fst-utils.cc b/sherpa-onnx/csrc/fst-utils.cc
new file mode 100644
index 0000000000..5fcf5235a7
--- /dev/null
+++ b/sherpa-onnx/csrc/fst-utils.cc
@@ -0,0 +1,53 @@
+// sherpa-onnx/csrc/fst-utils.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/fst-utils.h"
+
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+// This function is copied from kaldi.
+//
+// @param filename Path to a StdVectorFst or StdConstFst graph
+// @return The caller should free the returned pointer using `delete` to
+//         avoid memory leak.
+fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename) {
+  // read decoding network FST
+  std::ifstream is(filename, std::ios::binary);
+  if (!is.good()) {
+    SHERPA_ONNX_LOGE("Could not open decoding-graph FST %s", filename.c_str());
+  }
+
+  fst::FstHeader hdr;
+  if (!hdr.Read(is, "<unknown>")) {
+    SHERPA_ONNX_LOGE("Reading FST: error reading FST header.");
+  }
+
+  if (hdr.ArcType() != fst::StdArc::Type()) {
+    SHERPA_ONNX_LOGE("FST with arc type %s not supported",
+                     hdr.ArcType().c_str());
+  }
+  fst::FstReadOptions ropts("<unspecified>", &hdr);
+
+  fst::Fst<fst::StdArc> *decode_fst = nullptr;
+
+  if (hdr.FstType() == "vector") {
+    decode_fst = fst::VectorFst<fst::StdArc>::Read(is, ropts);
+  } else if (hdr.FstType() == "const") {
+    decode_fst = fst::ConstFst<fst::StdArc>::Read(is, ropts);
+  } else {
+    SHERPA_ONNX_LOGE("Reading FST: unsupported FST type: %s",
+                     hdr.FstType().c_str());
+  }
+
+  if (decode_fst == nullptr) {  // fst code will warn.
+    SHERPA_ONNX_LOGE("Error reading FST (after reading header).");
+    return nullptr;
+  } else {
+    return decode_fst;
+  }
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/fst-utils.h b/sherpa-onnx/csrc/fst-utils.h
new file mode 100644
index 0000000000..92e6a7aad8
--- /dev/null
+++ b/sherpa-onnx/csrc/fst-utils.h
@@ -0,0 +1,18 @@
+// sherpa-onnx/csrc/fst-utils.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_FST_UTILS_H_
+#define SHERPA_ONNX_CSRC_FST_UTILS_H_
+
+#include <string>
+
+#include "fst/fstlib.h"
+
+namespace sherpa_onnx {
+
+fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename);
+
+}
+
+#endif  // SHERPA_ONNX_CSRC_FST_UTILS_H_
diff --git a/sherpa-onnx/csrc/hifigan-vocoder.cc b/sherpa-onnx/csrc/hifigan-vocoder.cc
new file mode 100644
index 0000000000..b2ff207889
--- /dev/null
+++ b/sherpa-onnx/csrc/hifigan-vocoder.cc
@@ -0,0 +1,107 @@
+// sherpa-onnx/csrc/hifigan-vocoder.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/hifigan-vocoder.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/session.h"
+
+namespace sherpa_onnx {
+
+class HifiganVocoder::Impl {
+ public:
+  explicit Impl(int32_t num_threads, const std::string &provider,
+                const std::string &model)
+      : env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(num_threads, provider)),
+        allocator_{} {
+    auto buf = ReadFile(model);
+    Init(buf.data(), buf.size());
+  }
+
+  template <typename Manager>
+  explicit Impl(Manager *mgr, int32_t num_threads, const std::string &provider,
+                const std::string &model)
+      : env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(num_threads, provider)),
+        allocator_{} {
+    auto buf = ReadFile(mgr, model);
+    Init(buf.data(), buf.size());
+  }
+
+  Ort::Value Run(Ort::Value mel) const {
+    auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1,
+                          output_names_ptr_.data(), output_names_ptr_.size());
+
+    return std::move(out[0]);
+  }
+
+ private:
+  void Init(void *model_data, size_t model_data_length) {
+    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
+                                           sess_opts_);
+
+    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
+
+    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
+  }
+
+ private:
+  Ort::Env env_;
+  Ort::SessionOptions sess_opts_;
+  Ort::AllocatorWithDefaultOptions allocator_;
+
+  std::unique_ptr<Ort::Session> sess_;
+
+  std::vector<std::string> input_names_;
+  std::vector<const char *> input_names_ptr_;
+
+  std::vector<std::string> output_names_;
+  std::vector<const char *> output_names_ptr_;
+};
+
+HifiganVocoder::HifiganVocoder(int32_t num_threads, const std::string &provider,
+                               const std::string &model)
+    : impl_(std::make_unique<Impl>(num_threads, provider, model)) {}
+
+template <typename Manager>
+HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads,
+                               const std::string &provider,
+                               const std::string &model)
+    : impl_(std::make_unique<Impl>(mgr, num_threads, provider, model)) {}
+
+HifiganVocoder::~HifiganVocoder() = default;
+
+Ort::Value HifiganVocoder::Run(Ort::Value mel) const {
+  return impl_->Run(std::move(mel));
+}
+
+#if __ANDROID_API__ >= 9
+template HifiganVocoder::HifiganVocoder(AAssetManager *mgr, int32_t num_threads,
+                                        const std::string &provider,
+                                        const std::string &model);
+#endif
+
+#if __OHOS__
+template HifiganVocoder::HifiganVocoder(NativeResourceManager *mgr,
+                                        int32_t num_threads,
+                                        const std::string &provider,
+                                        const std::string &model);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/hifigan-vocoder.h b/sherpa-onnx/csrc/hifigan-vocoder.h
new file mode 100644
index 0000000000..3d10a2428c
--- /dev/null
+++ b/sherpa-onnx/csrc/hifigan-vocoder.h
@@ -0,0 +1,38 @@
+// sherpa-onnx/csrc/hifigan-vocoder.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_
+#define SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_
+
+#include <memory>
+#include <string>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+
+namespace sherpa_onnx {
+
+class HifiganVocoder {
+ public:
+  ~HifiganVocoder();
+
+  HifiganVocoder(int32_t num_threads, const std::string &provider,
+                 const std::string &model);
+
+  template <typename Manager>
+  HifiganVocoder(Manager *mgr, int32_t num_threads, const std::string &provider,
+                 const std::string &model);
+
+  /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
+   *  @return Return a float32 tensor of shape (batch_size, num_samples).
+   */
+  Ort::Value Run(Ort::Value mel) const;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_
diff --git a/sherpa-onnx/csrc/jieba-lexicon.cc b/sherpa-onnx/csrc/jieba-lexicon.cc
index e62324103a..189520c4d5 100644
--- a/sherpa-onnx/csrc/jieba-lexicon.cc
+++ b/sherpa-onnx/csrc/jieba-lexicon.cc
@@ -6,28 +6,41 @@
 
 #include <fstream>
 #include <regex>  // NOLINT
+#include <strstream>
+#include <unordered_set>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "cppjieba/Jieba.hpp"
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/symbol-table.h"
 #include "sherpa-onnx/csrc/text-utils.h"
 
 namespace sherpa_onnx {
 
-// implemented in ./lexicon.cc
-std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is);
-
-std::vector<int32_t> ConvertTokensToIds(
-    const std::unordered_map<std::string, int32_t> &token2id,
-    const std::vector<std::string> &tokens);
+static bool IsPunct(const std::string &s) {
+  static const std::unordered_set<std::string> puncts = {
+      ",",  ".",  "!",  "?", ":", "\"", "'", "，",
+      "。", "！", "？", "“", "”", "‘",  "’",
+  };
+  return puncts.count(s);
+}
 
 class JiebaLexicon::Impl {
  public:
   Impl(const std::string &lexicon, const std::string &tokens,
-       const std::string &dict_dir,
-       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
-      : meta_data_(meta_data), debug_(debug) {
+       const std::string &dict_dir, bool debug)
+      : debug_(debug) {
     std::string dict = dict_dir + "/jieba.dict.utf8";
     std::string hmm = dict_dir + "/hmm_model.utf8";
     std::string user_dict = dict_dir + "/user.dict.utf8";
@@ -54,6 +67,39 @@ class JiebaLexicon::Impl {
     }
   }
 
+  template <typename Manager>
+  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
+       const std::string &dict_dir, bool debug)
+      : debug_(debug) {
+    std::string dict = dict_dir + "/jieba.dict.utf8";
+    std::string hmm = dict_dir + "/hmm_model.utf8";
+    std::string user_dict = dict_dir + "/user.dict.utf8";
+    std::string idf = dict_dir + "/idf.utf8";
+    std::string stop_word = dict_dir + "/stop_words.utf8";
+
+    AssertFileExists(dict);
+    AssertFileExists(hmm);
+    AssertFileExists(user_dict);
+    AssertFileExists(idf);
+    AssertFileExists(stop_word);
+
+    jieba_ =
+        std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
+
+    {
+      auto buf = ReadFile(mgr, tokens);
+      std::istrstream is(buf.data(), buf.size());
+
+      InitTokens(is);
+    }
+
+    {
+      auto buf = ReadFile(mgr, lexicon);
+      std::istrstream is(buf.data(), buf.size());
+      InitLexicon(is);
+    }
+  }
+
   std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
     // see
     // https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
@@ -74,9 +120,53 @@ class JiebaLexicon::Impl {
     jieba_->Cut(text, words, is_hmm);
 
     if (debug_) {
-      SHERPA_ONNX_LOGE("input text: %s", text.c_str());
-      SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("input text:\n%{public}s", text.c_str());
+      SHERPA_ONNX_LOGE("after replacing punctuations:\n%{public}s", s.c_str());
+#else
+      SHERPA_ONNX_LOGE("input text:\n%s", text.c_str());
+      SHERPA_ONNX_LOGE("after replacing punctuations:\n%s", s.c_str());
+#endif
+
+      std::ostringstream os;
+      std::string sep = "";
+      for (const auto &w : words) {
+        os << sep << w;
+        sep = "_";
+      }
 
+#if __OHOS__
+      SHERPA_ONNX_LOGE("after jieba processing:\n%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("after jieba processing:\n%s", os.str().c_str());
+#endif
+    }
+
+    // remove spaces after punctuations
+    std::vector<std::string> words2 = std::move(words);
+    words.reserve(words2.size());
+
+    for (int32_t i = 0; i < words2.size(); ++i) {
+      if (i == 0) {
+        words.push_back(std::move(words2[i]));
+      } else if (words2[i] == " ") {
+        if (words.back() == " " || IsPunct(words.back())) {
+          continue;
+        } else {
+          words.push_back(std::move(words2[i]));
+        }
+      } else if (IsPunct(words2[i])) {
+        if (words.back() == " " || IsPunct(words.back())) {
+          continue;
+        } else {
+          words.push_back(std::move(words2[i]));
+        }
+      } else {
+        words.push_back(std::move(words2[i]));
+      }
+    }
+
+    if (debug_) {
       std::ostringstream os;
       std::string sep = "";
       for (const auto &w : words) {
@@ -84,24 +174,32 @@ class JiebaLexicon::Impl {
         sep = "_";
       }
 
-      SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%{public}s",
+                       os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%s",
+                       os.str().c_str());
+#endif
     }
 
     std::vector<TokenIDs> ans;
     std::vector<int64_t> this_sentence;
 
-    int32_t blank = token2id_.at(" ");
     for (const auto &w : words) {
       auto ids = ConvertWordToIds(w);
       if (ids.empty()) {
+#if __OHOS__
+        SHERPA_ONNX_LOGE("Ignore OOV '%{public}s'", w.c_str());
+#else
         SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
+#endif
         continue;
       }
 
       this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
-      this_sentence.push_back(blank);
 
-      if (w == "。" || w == "！" || w == "？" || w == "，") {
+      if (IsPunct(w)) {
         ans.emplace_back(std::move(this_sentence));
         this_sentence = {};
       }
@@ -141,7 +239,9 @@ class JiebaLexicon::Impl {
     token2id_ = ReadTokens(is);
 
     std::vector<std::pair<std::string, std::string>> puncts = {
-        {",", "，"}, {".", "。"}, {"!", "！"}, {"?", "？"}};
+        {",", "，"}, {".", "。"}, {"!", "！"}, {"?", "？"}, {":", "："},
+        {"\"", "“"}, {"\"", "”"}, {"'", "‘"},  {"'", "’"},  {";", "；"},
+    };
 
     for (const auto &p : puncts) {
       if (token2id_.count(p.first) && !token2id_.count(p.second)) {
@@ -156,6 +256,10 @@ class JiebaLexicon::Impl {
     if (!token2id_.count("、") && token2id_.count("，")) {
       token2id_["、"] = token2id_["，"];
     }
+
+    if (!token2id_.count(";") && token2id_.count(",")) {
+      token2id_[";"] = token2id_[","];
+    }
   }
 
   void InitLexicon(std::istream &is) {
@@ -176,8 +280,15 @@ class JiebaLexicon::Impl {
       ToLowerCase(&word);
 
       if (word2ids_.count(word)) {
+#if __OHOS__
+        SHERPA_ONNX_LOGE(
+            "Duplicated word: %{public}s at line %{public}d:%{public}s. Ignore "
+            "it.",
+            word.c_str(), line_num, line.c_str());
+#else
         SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                          word.c_str(), line_num, line.c_str());
+#endif
         continue;
       }
 
@@ -201,8 +312,6 @@ class JiebaLexicon::Impl {
   // tokens.txt is saved in token2id_
   std::unordered_map<std::string, int32_t> token2id_;
 
-  OfflineTtsVitsModelMetaData meta_data_;
-
   std::unique_ptr<cppjieba::Jieba> jieba_;
   bool debug_ = false;
 };
@@ -211,15 +320,32 @@ JiebaLexicon::~JiebaLexicon() = default;
 
 JiebaLexicon::JiebaLexicon(const std::string &lexicon,
                            const std::string &tokens,
-                           const std::string &dict_dir,
-                           const OfflineTtsVitsModelMetaData &meta_data,
-                           bool debug)
-    : impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
-                                   debug)) {}
+                           const std::string &dict_dir, bool debug)
+    : impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
+
+template <typename Manager>
+JiebaLexicon::JiebaLexicon(Manager *mgr, const std::string &lexicon,
+                           const std::string &tokens,
+                           const std::string &dict_dir, bool debug)
+    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, debug)) {}
 
 std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
     const std::string &text, const std::string & /*unused_voice = ""*/) const {
   return impl_->ConvertTextToTokenIds(text);
 }
 
+#if __ANDROID_API__ >= 9
+template JiebaLexicon::JiebaLexicon(AAssetManager *mgr,
+                                    const std::string &lexicon,
+                                    const std::string &tokens,
+                                    const std::string &dict_dir, bool debug);
+#endif
+
+#if __OHOS__
+template JiebaLexicon::JiebaLexicon(NativeResourceManager *mgr,
+                                    const std::string &lexicon,
+                                    const std::string &tokens,
+                                    const std::string &dict_dir, bool debug);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/jieba-lexicon.h b/sherpa-onnx/csrc/jieba-lexicon.h
index d02e0ee5d6..b810a084cf 100644
--- a/sherpa-onnx/csrc/jieba-lexicon.h
+++ b/sherpa-onnx/csrc/jieba-lexicon.h
@@ -11,16 +11,20 @@
 #include <vector>
 
 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
-#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
 
 namespace sherpa_onnx {
 
 class JiebaLexicon : public OfflineTtsFrontend {
  public:
   ~JiebaLexicon() override;
+
   JiebaLexicon(const std::string &lexicon, const std::string &tokens,
-               const std::string &dict_dir,
-               const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+               const std::string &dict_dir, bool debug);
+
+  template <typename Manager>
+  JiebaLexicon(Manager *mgr, const std::string &lexicon,
+               const std::string &tokens, const std::string &dict_dir,
+               bool debug);
 
   std::vector<TokenIDs> ConvertTextToTokenIds(
       const std::string &text,
diff --git a/sherpa-onnx/csrc/keyword-spotter-impl.cc b/sherpa-onnx/csrc/keyword-spotter-impl.cc
index 1c9d594855..affb212c05 100644
--- a/sherpa-onnx/csrc/keyword-spotter-impl.cc
+++ b/sherpa-onnx/csrc/keyword-spotter-impl.cc
@@ -6,6 +6,15 @@
 
 #include "sherpa-onnx/csrc/keyword-spotter-transducer-impl.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 namespace sherpa_onnx {
 
 std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
@@ -18,9 +27,9 @@ std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
   exit(-1);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
-    AAssetManager *mgr, const KeywordSpotterConfig &config) {
+    Manager *mgr, const KeywordSpotterConfig &config) {
   if (!config.model_config.transducer.encoder.empty()) {
     return std::make_unique<KeywordSpotterTransducerImpl>(mgr, config);
   }
@@ -28,6 +37,15 @@ std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
   SHERPA_ONNX_LOGE("Please specify a model");
   exit(-1);
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
+    AAssetManager *mgr, const KeywordSpotterConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
+    NativeResourceManager *mgr, const KeywordSpotterConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/keyword-spotter-impl.h b/sherpa-onnx/csrc/keyword-spotter-impl.h
index ded735ff58..dafb0a8bb7 100644
--- a/sherpa-onnx/csrc/keyword-spotter-impl.h
+++ b/sherpa-onnx/csrc/keyword-spotter-impl.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/keyword-spotter.h"
 #include "sherpa-onnx/csrc/online-stream.h"
 
@@ -24,10 +19,9 @@ class KeywordSpotterImpl {
   static std::unique_ptr<KeywordSpotterImpl> Create(
       const KeywordSpotterConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   static std::unique_ptr<KeywordSpotterImpl> Create(
-      AAssetManager *mgr, const KeywordSpotterConfig &config);
-#endif
+      Manager *mgr, const KeywordSpotterConfig &config);
 
   virtual ~KeywordSpotterImpl() = default;
 
@@ -38,6 +32,8 @@ class KeywordSpotterImpl {
 
   virtual bool IsReady(OnlineStream *s) const = 0;
 
+  virtual void Reset(OnlineStream *s) const = 0;
+
   virtual void DecodeStreams(OnlineStream **ss, int32_t n) const = 0;
 
   virtual KeywordResult GetResult(OnlineStream *s) const = 0;
diff --git a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
index 7596391842..e62b3b2c07 100644
--- a/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
+++ b/sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
@@ -9,16 +9,10 @@
 #include <memory>
 #include <regex>  // NOLINT
 #include <string>
+#include <strstream>
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include <strstream>
-
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/keyword-spotter-impl.h"
 #include "sherpa-onnx/csrc/keyword-spotter.h"
@@ -91,9 +85,8 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
         unk_id_);
   }
 
-#if __ANDROID_API__ >= 9
-  KeywordSpotterTransducerImpl(AAssetManager *mgr,
-                               const KeywordSpotterConfig &config)
+  template <typename Manager>
+  KeywordSpotterTransducerImpl(Manager *mgr, const KeywordSpotterConfig &config)
       : config_(config),
         model_(OnlineTransducerModel::Create(mgr, config.model_config)),
         sym_(mgr, config.model_config.tokens) {
@@ -109,7 +102,6 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
         model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
         unk_id_);
   }
-#endif
 
   std::unique_ptr<OnlineStream> CreateStream() const override {
     auto stream =
@@ -130,7 +122,11 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
 
     if (!EncodeKeywords(is, sym_, &current_ids, &current_kws, &current_scores,
                         &current_thresholds)) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Encode keywords %{public}s failed.", keywords.c_str());
+#else
       SHERPA_ONNX_LOGE("Encode keywords %s failed.", keywords.c_str());
+#endif
       return nullptr;
     }
 
@@ -195,8 +191,24 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
     return s->GetNumProcessedFrames() + model_->ChunkSize() <
            s->NumFramesReady();
   }
+  void Reset(OnlineStream *s) const override { InitOnlineStream(s); }
 
   void DecodeStreams(OnlineStream **ss, int32_t n) const override {
+    for (int32_t i = 0; i < n; ++i) {
+      auto s = ss[i];
+      auto r = s->GetKeywordResult(true);
+      int32_t num_trailing_blanks = r.num_trailing_blanks;
+      // assume subsampling_factor is 4
+      // assume frameshift is 0.01 second
+      float trailing_slience = num_trailing_blanks * 4 * 0.01;
+
+      // it resets automatically after detecting 1.5 seconds of silence
+      float threshold = 1.5;
+      if (trailing_slience > threshold) {
+        Reset(s);
+      }
+    }
+
     int32_t chunk_size = model_->ChunkSize();
     int32_t chunk_shift = model_->ChunkShift();
 
@@ -290,16 +302,21 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
     // each line in keywords_file contains space-separated words
     std::ifstream is(config_.keywords_file);
     if (!is) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Open keywords file failed: %{public}s",
+                       config_.keywords_file.c_str());
+#else
       SHERPA_ONNX_LOGE("Open keywords file failed: %s",
                        config_.keywords_file.c_str());
+#endif
       exit(-1);
     }
     InitKeywords(is);
 #endif
   }
 
-#if __ANDROID_API__ >= 9
-  void InitKeywords(AAssetManager *mgr) {
+  template <typename Manager>
+  void InitKeywords(Manager *mgr) {
     // each line in keywords_file contains space-separated words
 
     auto buf = ReadFile(mgr, config_.keywords_file);
@@ -307,13 +324,17 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
     std::istrstream is(buf.data(), buf.size());
 
     if (!is) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Open keywords file failed: %{public}s",
+                       config_.keywords_file.c_str());
+#else
       SHERPA_ONNX_LOGE("Open keywords file failed: %s",
                        config_.keywords_file.c_str());
+#endif
       exit(-1);
     }
     InitKeywords(is);
   }
-#endif
 
   void InitKeywordsFromBufStr() {
     // keywords_buf's content is supposed to be same as the keywords_file's
diff --git a/sherpa-onnx/csrc/keyword-spotter.cc b/sherpa-onnx/csrc/keyword-spotter.cc
index d1bf6d63ba..615aab9cdc 100644
--- a/sherpa-onnx/csrc/keyword-spotter.cc
+++ b/sherpa-onnx/csrc/keyword-spotter.cc
@@ -13,6 +13,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/keyword-spotter-impl.h"
 
 namespace sherpa_onnx {
@@ -136,11 +145,9 @@ std::string KeywordSpotterConfig::ToString() const {
 KeywordSpotter::KeywordSpotter(const KeywordSpotterConfig &config)
     : impl_(KeywordSpotterImpl::Create(config)) {}
 
-#if __ANDROID_API__ >= 9
-KeywordSpotter::KeywordSpotter(AAssetManager *mgr,
-                               const KeywordSpotterConfig &config)
+template <typename Manager>
+KeywordSpotter::KeywordSpotter(Manager *mgr, const KeywordSpotterConfig &config)
     : impl_(KeywordSpotterImpl::Create(mgr, config)) {}
-#endif
 
 KeywordSpotter::~KeywordSpotter() = default;
 
@@ -157,6 +164,8 @@ bool KeywordSpotter::IsReady(OnlineStream *s) const {
   return impl_->IsReady(s);
 }
 
+void KeywordSpotter::Reset(OnlineStream *s) const { impl_->Reset(s); }
+
 void KeywordSpotter::DecodeStreams(OnlineStream **ss, int32_t n) const {
   impl_->DecodeStreams(ss, n);
 }
@@ -165,4 +174,14 @@ KeywordResult KeywordSpotter::GetResult(OnlineStream *s) const {
   return impl_->GetResult(s);
 }
 
+#if __ANDROID_API__ >= 9
+template KeywordSpotter::KeywordSpotter(AAssetManager *mgr,
+                                        const KeywordSpotterConfig &config);
+#endif
+
+#if __OHOS__
+template KeywordSpotter::KeywordSpotter(NativeResourceManager *mgr,
+                                        const KeywordSpotterConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/keyword-spotter.h b/sherpa-onnx/csrc/keyword-spotter.h
index f0c31bdb42..e494b3e56f 100644
--- a/sherpa-onnx/csrc/keyword-spotter.h
+++ b/sherpa-onnx/csrc/keyword-spotter.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/features.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/online-stream.h"
@@ -101,9 +96,8 @@ class KeywordSpotter {
  public:
   explicit KeywordSpotter(const KeywordSpotterConfig &config);
 
-#if __ANDROID_API__ >= 9
-  KeywordSpotter(AAssetManager *mgr, const KeywordSpotterConfig &config);
-#endif
+  template <typename Manager>
+  KeywordSpotter(Manager *mgr, const KeywordSpotterConfig &config);
 
   ~KeywordSpotter();
 
@@ -129,6 +123,9 @@ class KeywordSpotter {
    */
   bool IsReady(OnlineStream *s) const;
 
+  // Remember to call it after detecting a keyword
+  void Reset(OnlineStream *s) const;
+
   /** Decode a single stream. */
   void DecodeStream(OnlineStream *s) const {
     OnlineStream *ss[1] = {s};
diff --git a/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
new file mode 100644
index 0000000000..1dab60c406
--- /dev/null
+++ b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
@@ -0,0 +1,522 @@
+// sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h"
+
+#include <codecvt>
+#include <fstream>
+#include <locale>
+#include <regex>  // NOLINT
+#include <sstream>
+#include <strstream>
+#include <unordered_map>
+#include <utility>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "cppjieba/Jieba.hpp"
+#include "espeak-ng/speak_lib.h"
+#include "phoneme_ids.hpp"
+#include "phonemize.hpp"
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/symbol-table.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+void CallPhonemizeEspeak(const std::string &text,
+                         piper::eSpeakPhonemeConfig &config,  // NOLINT
+                         std::vector<std::vector<piper::Phoneme>> *phonemes);
+
+static std::wstring ToWideString(const std::string &s) {
+  // see
+  // https://stackoverflow.com/questions/2573834/c-convert-string-or-char-to-wstring-or-wchar-t
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+  return converter.from_bytes(s);
+}
+
+static std::string ToString(const std::wstring &s) {
+  // see
+  // https://stackoverflow.com/questions/2573834/c-convert-string-or-char-to-wstring-or-wchar-t
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+  return converter.to_bytes(s);
+}
+
+class KokoroMultiLangLexicon::Impl {
+ public:
+  Impl(const std::string &tokens, const std::string &lexicon,
+       const std::string &dict_dir, const std::string &data_dir,
+       const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
+      : meta_data_(meta_data), debug_(debug) {
+    InitTokens(tokens);
+
+    InitLexicon(lexicon);
+
+    InitJieba(dict_dir);
+
+    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const std::string &tokens, const std::string &lexicon,
+       const std::string &dict_dir, const std::string &data_dir,
+       const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
+      : meta_data_(meta_data), debug_(debug) {
+    InitTokens(mgr, tokens);
+
+    InitLexicon(mgr, lexicon);
+
+    // we assume you have copied dict_dir and data_dir from assets to some path
+    InitJieba(dict_dir);
+
+    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
+  }
+
+  std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
+    std::string text = ToLowerCase(_text);
+    if (debug_) {
+      SHERPA_ONNX_LOGE("After converting to lowercase:\n%s", text.c_str());
+    }
+
+    std::vector<std::pair<std::string, std::string>> replace_str_pairs = {
+        {"，", ","}, {":", ","},  {"、", ","}, {"；", ";"},   {"：", ":"},
+        {"。", "."}, {"？", "?"}, {"！", "!"}, {"\\s+", " "},
+    };
+    for (const auto &p : replace_str_pairs) {
+      std::regex re(p.first);
+      text = std::regex_replace(text, re, p.second);
+    }
+
+    if (debug_) {
+      SHERPA_ONNX_LOGE("After replacing punctuations and merging spaces:\n%s",
+                       text.c_str());
+    }
+
+    // https://en.cppreference.com/w/cpp/regex
+    // https://stackoverflow.com/questions/37989081/how-to-use-unicode-range-in-c-regex
+    std::string expr =
+        "([;:,.?!'\"…\\(\\)“”])|([\\u4e00-\\u9fff]+)|([\\u0000-\\u007f]+)";
+
+    auto ws = ToWideString(text);
+    std::wstring wexpr = ToWideString(expr);
+    std::wregex we(wexpr);
+
+    auto begin = std::wsregex_iterator(ws.begin(), ws.end(), we);
+    auto end = std::wsregex_iterator();
+
+    std::vector<TokenIDs> ans;
+
+    for (std::wsregex_iterator i = begin; i != end; ++i) {
+      std::wsmatch match = *i;
+      std::wstring match_str = match.str();
+      auto ms = ToString(match_str);
+      uint8_t c = reinterpret_cast<const uint8_t *>(ms.data())[0];
+
+      std::vector<std::vector<int32_t>> ids_vec;
+
+      if (c < 0x80) {
+        if (debug_) {
+          SHERPA_ONNX_LOGE("Non-Chinese: %s", ms.c_str());
+        }
+        ids_vec = ConvertEnglishToTokenIDs(ms);
+      } else {
+        if (debug_) {
+          SHERPA_ONNX_LOGE("Chinese: %s", ms.c_str());
+        }
+        ids_vec = ConvertChineseToTokenIDs(ms);
+      }
+
+      for (const auto &ids : ids_vec) {
+        if (ids.size() > 4) {
+          ans.emplace_back(ids);
+        } else {
+          if (ans.empty()) {
+            ans.emplace_back(ids);
+          } else {
+            ans.back().tokens.back() = ids[1];
+            ans.back().tokens.insert(ans.back().tokens.end(), ids.begin() + 2,
+                                     ids.end());
+          }
+        }
+      }
+    }
+
+    if (debug_) {
+      for (const auto &v : ans) {
+        std::ostringstream os;
+        os << "\n";
+        std::string sep;
+        for (auto i : v.tokens) {
+          os << sep << i;
+          sep = " ";
+        }
+        os << "\n";
+        SHERPA_ONNX_LOGE("%s", os.str().c_str());
+      }
+    }
+
+    return ans;
+  }
+
+ private:
+  bool IsPunctuation(const std::string &text) const {
+    if (text == ";" || text == ":" || text == "," || text == "." ||
+        text == "!" || text == "?" || text == "—" || text == "…" ||
+        text == "\"" || text == "(" || text == ")" || text == "“" ||
+        text == "”") {
+      return true;
+    }
+
+    return false;
+  }
+
+  std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
+    std::vector<int32_t> ans;
+    if (word2ids_.count(w)) {
+      ans = word2ids_.at(w);
+      return ans;
+    }
+
+    std::vector<std::string> words = SplitUtf8(w);
+    for (const auto &word : words) {
+      if (word2ids_.count(word)) {
+        auto ids = ConvertWordToIds(word);
+        ans.insert(ans.end(), ids.begin(), ids.end());
+      } else {
+        SHERPA_ONNX_LOGE("Skip OOV: '%s'", word.c_str());
+      }
+    }
+
+    return ans;
+  }
+
+  std::vector<std::vector<int32_t>> ConvertChineseToTokenIDs(
+      const std::string &text) const {
+    bool is_hmm = true;
+
+    std::vector<std::string> words;
+    jieba_->Cut(text, words, is_hmm);
+    if (debug_) {
+      std::ostringstream os;
+      os << "After jieba processing:\n";
+
+      std::string sep;
+      for (const auto &w : words) {
+        os << sep << w;
+        sep = "_";
+      }
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+    }
+
+    std::vector<std::vector<int32_t>> ans;
+    std::vector<int32_t> this_sentence;
+    int32_t max_len = meta_data_.max_token_len;
+
+    this_sentence.push_back(0);
+    for (const auto &w : words) {
+      auto ids = ConvertWordToIds(w);
+      if (this_sentence.size() + ids.size() > max_len - 2) {
+        this_sentence.push_back(0);
+        ans.push_back(std::move(this_sentence));
+
+        this_sentence.push_back(0);
+      }
+
+      this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
+    }
+
+    if (this_sentence.size() > 1) {
+      this_sentence.push_back(0);
+      ans.push_back(std::move(this_sentence));
+    }
+
+    if (debug_) {
+      for (const auto &v : ans) {
+        std::ostringstream os;
+        os << "\n";
+        std::string sep;
+        for (auto i : v) {
+          os << sep << i;
+          sep = " ";
+        }
+        os << "\n";
+        SHERPA_ONNX_LOGE("%s", os.str().c_str());
+      }
+    }
+
+    return ans;
+  }
+
+  std::vector<std::vector<int32_t>> ConvertEnglishToTokenIDs(
+      const std::string &text) const {
+    std::vector<std::string> words = SplitUtf8(text);
+    if (debug_) {
+      std::ostringstream os;
+      os << "After splitting to words: ";
+      std::string sep;
+      for (const auto &w : words) {
+        os << sep << w;
+        sep = "_";
+      }
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+    }
+
+    std::vector<std::vector<int32_t>> ans;
+    int32_t max_len = meta_data_.max_token_len;
+    std::vector<int32_t> this_sentence;
+
+    int32_t space_id = token2id_.at(" ");
+
+    this_sentence.push_back(0);
+
+    for (const auto &word : words) {
+      if (IsPunctuation(word)) {
+        this_sentence.push_back(token2id_.at(word));
+
+        if (this_sentence.size() > max_len - 2) {
+          // this sentence is too long, split it
+          this_sentence.push_back(0);
+          ans.push_back(std::move(this_sentence));
+
+          this_sentence.push_back(0);
+          continue;
+        }
+
+        if (word == "." || word == "!" || word == "?" || word == ";") {
+          // Note: You can add more punctuations here to split the text
+          // into sentences. We just use four here: .!?;
+          this_sentence.push_back(0);
+          ans.push_back(std::move(this_sentence));
+
+          this_sentence.push_back(0);
+        }
+      } else if (word2ids_.count(word)) {
+        const auto &ids = word2ids_.at(word);
+        if (this_sentence.size() + ids.size() + 3 > max_len - 2) {
+          this_sentence.push_back(0);
+          ans.push_back(std::move(this_sentence));
+
+          this_sentence.push_back(0);
+        }
+
+        this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
+        this_sentence.push_back(space_id);
+      } else {
+        SHERPA_ONNX_LOGE("Use espeak-ng to handle the OOV: '%s'", word.c_str());
+
+        piper::eSpeakPhonemeConfig config;
+
+        config.voice = "en-us";
+
+        std::vector<std::vector<piper::Phoneme>> phonemes;
+
+        CallPhonemizeEspeak(word, config, &phonemes);
+        // Note phonemes[i] contains a vector of unicode codepoints;
+        // we need to convert them to utf8
+
+        std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
+
+        std::vector<int32_t> ids;
+        for (const auto &v : phonemes) {
+          for (const auto p : v) {
+            auto token = conv.to_bytes(p);
+            if (token2id_.count(token)) {
+              ids.push_back(token2id_.at(token));
+            } else {
+              SHERPA_ONNX_LOGE("Skip OOV token '%s' from '%s'", token.c_str(),
+                               word.c_str());
+            }
+          }
+        }
+
+        if (this_sentence.size() + ids.size() + 3 > max_len - 2) {
+          this_sentence.push_back(0);
+          ans.push_back(std::move(this_sentence));
+
+          this_sentence.push_back(0);
+        }
+
+        this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
+        this_sentence.push_back(space_id);
+      }
+    }
+
+    if (this_sentence.size() > 1) {
+      this_sentence.push_back(0);
+      ans.push_back(std::move(this_sentence));
+    }
+
+    if (debug_) {
+      for (const auto &v : ans) {
+        std::ostringstream os;
+        os << "\n";
+        std::string sep;
+        for (auto i : v) {
+          os << sep << i;
+          sep = " ";
+        }
+        os << "\n";
+        SHERPA_ONNX_LOGE("%s", os.str().c_str());
+      }
+    }
+
+    return ans;
+  }
+
+  void InitTokens(const std::string &tokens) {
+    std::ifstream is(tokens);
+    InitTokens(is);
+  }
+
+  template <typename Manager>
+  void InitTokens(Manager *mgr, const std::string &tokens) {
+    auto buf = ReadFile(mgr, tokens);
+
+    std::istrstream is(buf.data(), buf.size());
+    InitTokens(is);
+  }
+
+  void InitTokens(std::istream &is) {
+    token2id_ = ReadTokens(is);  // defined in ./symbol-table.cc
+  }
+
+  void InitLexicon(const std::string &lexicon) {
+    std::vector<std::string> files;
+    SplitStringToVector(lexicon, ",", false, &files);
+    for (const auto &f : files) {
+      std::ifstream is(f);
+      InitLexicon(is);
+    }
+  }
+
+  template <typename Manager>
+  void InitLexicon(Manager *mgr, const std::string &lexicon) {
+    std::vector<std::string> files;
+    SplitStringToVector(lexicon, ",", false, &files);
+    for (const auto &f : files) {
+      auto buf = ReadFile(mgr, f);
+
+      std::istrstream is(buf.data(), buf.size());
+      InitLexicon(is);
+    }
+  }
+
+  void InitLexicon(std::istream &is) {
+    std::string word;
+    std::vector<std::string> token_list;
+    std::string token;
+
+    std::string line;
+    int32_t line_num = 0;
+    int32_t num_warn = 0;
+    while (std::getline(is, line)) {
+      ++line_num;
+      std::istringstream iss(line);
+
+      token_list.clear();
+      iss >> word;
+      ToLowerCase(&word);
+
+      if (word2ids_.count(word)) {
+        num_warn += 1;
+        if (num_warn < 10) {
+          SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
+                           word.c_str(), line_num, line.c_str());
+        }
+        continue;
+      }
+
+      while (iss >> token) {
+        token_list.push_back(std::move(token));
+      }
+
+      std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
+
+      if (ids.empty()) {
+        SHERPA_ONNX_LOGE(
+            "Invalid pronunciation for word '%s' at line %d:%s. Ignore it",
+            word.c_str(), line_num, line.c_str());
+        continue;
+      }
+
+      word2ids_.insert({std::move(word), std::move(ids)});
+    }
+  }
+
+  void InitJieba(const std::string &dict_dir) {
+    std::string dict = dict_dir + "/jieba.dict.utf8";
+    std::string hmm = dict_dir + "/hmm_model.utf8";
+    std::string user_dict = dict_dir + "/user.dict.utf8";
+    std::string idf = dict_dir + "/idf.utf8";
+    std::string stop_word = dict_dir + "/stop_words.utf8";
+
+    AssertFileExists(dict);
+    AssertFileExists(hmm);
+    AssertFileExists(user_dict);
+    AssertFileExists(idf);
+    AssertFileExists(stop_word);
+
+    jieba_ =
+        std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
+  }
+
+ private:
+  OfflineTtsKokoroModelMetaData meta_data_;
+
+  // word to token IDs
+  std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
+
+  // tokens.txt is saved in token2id_
+  std::unordered_map<std::string, int32_t> token2id_;
+
+  std::unique_ptr<cppjieba::Jieba> jieba_;
+  bool debug_ = false;
+};
+
+KokoroMultiLangLexicon::~KokoroMultiLangLexicon() = default;
+
+KokoroMultiLangLexicon::KokoroMultiLangLexicon(
+    const std::string &tokens, const std::string &lexicon,
+    const std::string &dict_dir, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
+    : impl_(std::make_unique<Impl>(tokens, lexicon, dict_dir, data_dir,
+                                   meta_data, debug)) {}
+
+template <typename Manager>
+KokoroMultiLangLexicon::KokoroMultiLangLexicon(
+    Manager *mgr, const std::string &tokens, const std::string &lexicon,
+    const std::string &dict_dir, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
+    : impl_(std::make_unique<Impl>(mgr, tokens, lexicon, dict_dir, data_dir,
+                                   meta_data, debug)) {}
+
+std::vector<TokenIDs> KokoroMultiLangLexicon::ConvertTextToTokenIds(
+    const std::string &text, const std::string & /*unused_voice = ""*/) const {
+  return impl_->ConvertTextToTokenIds(text);
+}
+
+#if __ANDROID_API__ >= 9
+template KokoroMultiLangLexicon::KokoroMultiLangLexicon(
+    AAssetManager *mgr, const std::string &tokens, const std::string &lexicon,
+    const std::string &dict_dir, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &meta_data, bool debug);
+#endif
+
+#if __OHOS__
+template KokoroMultiLangLexicon::KokoroMultiLangLexicon(
+    NativeResourceManager *mgr, const std::string &tokens,
+    const std::string &lexicon, const std::string &dict_dir,
+    const std::string &data_dir, const OfflineTtsKokoroModelMetaData &meta_data,
+    bool debug);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h
new file mode 100644
index 0000000000..db06678131
--- /dev/null
+++ b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h
@@ -0,0 +1,45 @@
+// sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_
+#define SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/csrc/offline-tts-frontend.h"
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"
+
+namespace sherpa_onnx {
+
+class KokoroMultiLangLexicon : public OfflineTtsFrontend {
+ public:
+  ~KokoroMultiLangLexicon() override;
+
+  KokoroMultiLangLexicon(const std::string &tokens, const std::string &lexicon,
+                         const std::string &dict_dir,
+                         const std::string &data_dir,
+                         const OfflineTtsKokoroModelMetaData &meta_data,
+                         bool debug);
+
+  template <typename Manager>
+  KokoroMultiLangLexicon(Manager *mgr, const std::string &tokens,
+                         const std::string &lexicon,
+                         const std::string &dict_dir,
+                         const std::string &data_dir,
+                         const OfflineTtsKokoroModelMetaData &meta_data,
+                         bool debug);
+
+  std::vector<TokenIDs> ConvertTextToTokenIds(
+      const std::string &text, const std::string &voice = "") const override;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_
diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc
index 24184f1802..505ea37a80 100644
--- a/sherpa-onnx/csrc/lexicon.cc
+++ b/sherpa-onnx/csrc/lexicon.cc
@@ -7,20 +7,24 @@
 #include <algorithm>
 #include <cctype>
 #include <fstream>
+#include <iomanip>
+#include <memory>
 #include <sstream>
+#include <strstream>
 #include <utility>
 
 #if __ANDROID_API__ >= 9
-#include <strstream>
-
 #include "android/asset_manager.h"
 #include "android/asset_manager_jni.h"
 #endif
 
-#include <memory>
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
 
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/symbol-table.h"
 #include "sherpa-onnx/csrc/text-utils.h"
 
 namespace sherpa_onnx {
@@ -74,45 +78,6 @@ static std::vector<std::string> ProcessHeteronyms(
   return ans;
 }
 
-// Note: We don't use SymbolTable here since tokens may contain a blank
-// in the first column
-std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
-  std::unordered_map<std::string, int32_t> token2id;
-
-  std::string line;
-
-  std::string sym;
-  int32_t id = -1;
-  while (std::getline(is, line)) {
-    std::istringstream iss(line);
-    iss >> sym;
-    if (iss.eof()) {
-      id = atoi(sym.c_str());
-      sym = " ";
-    } else {
-      iss >> id;
-    }
-
-    // eat the trailing \r\n on windows
-    iss >> std::ws;
-    if (!iss.eof()) {
-      SHERPA_ONNX_LOGE("Error: %s", line.c_str());
-      exit(-1);
-    }
-
-#if 0
-    if (token2id.count(sym)) {
-      SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
-                       sym.c_str(), line.c_str(), token2id.at(sym));
-      exit(-1);
-    }
-#endif
-    token2id.insert({std::move(sym), id});
-  }
-
-  return token2id;
-}
-
 std::vector<int32_t> ConvertTokensToIds(
     const std::unordered_map<std::string, int32_t> &token2id,
     const std::vector<std::string> &tokens) {
@@ -148,8 +113,8 @@ Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens,
   InitPunctuations(punctuations);
 }
 
-#if __ANDROID_API__ >= 9
-Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon,
+template <typename Manager>
+Lexicon::Lexicon(Manager *mgr, const std::string &lexicon,
                  const std::string &tokens, const std::string &punctuations,
                  const std::string &language, bool debug /*= false*/
                  )
@@ -170,7 +135,6 @@ Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon,
 
   InitPunctuations(punctuations);
 }
-#endif
 
 std::vector<TokenIDs> Lexicon::ConvertTextToTokenIds(
     const std::string &text, const std::string & /*voice*/ /*= ""*/) const {
@@ -196,17 +160,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese(
   words = ProcessHeteronyms(words);
 
   if (debug_) {
-    fprintf(stderr, "Input text in string: %s\n", text.c_str());
-    fprintf(stderr, "Input text in bytes:");
+    std::ostringstream os;
+
+    os << "Input text in string: " << text << "\n";
+    os << "Input text in bytes:";
     for (uint8_t c : text) {
-      fprintf(stderr, " %02x", c);
+      os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
+         << c;
     }
-    fprintf(stderr, "\n");
-    fprintf(stderr, "After splitting to words:");
+    os << "\n";
+    os << "After splitting to words:";
     for (const auto &w : words) {
-      fprintf(stderr, " %s", w.c_str());
+      os << " " << w;
     }
-    fprintf(stderr, "\n");
+    os << "\n";
+
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   std::vector<TokenIDs> ans;
@@ -296,17 +269,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese(
   std::vector<std::string> words = SplitUtf8(text);
 
   if (debug_) {
-    fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str());
-    fprintf(stderr, "Input text in bytes:");
+    std::ostringstream os;
+
+    os << "Input text (lowercase) in string: " << text << "\n";
+    os << "Input text in bytes:";
     for (uint8_t c : text) {
-      fprintf(stderr, " %02x", c);
+      os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
+         << c;
     }
-    fprintf(stderr, "\n");
-    fprintf(stderr, "After splitting to words:");
+    os << "\n";
+    os << "After splitting to words:";
     for (const auto &w : words) {
-      fprintf(stderr, " %s", w.c_str());
+      os << " " << w;
     }
-    fprintf(stderr, "\n");
+    os << "\n";
+
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   int32_t blank = token2id_.at(" ");
@@ -409,4 +391,18 @@ void Lexicon::InitPunctuations(const std::string &punctuations) {
   }
 }
 
+#if __ANDROID_API__ >= 9
+template Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon,
+                          const std::string &tokens,
+                          const std::string &punctuations,
+                          const std::string &language, bool debug = false);
+#endif
+
+#if __OHOS__
+template Lexicon::Lexicon(NativeResourceManager *mgr,
+                          const std::string &lexicon, const std::string &tokens,
+                          const std::string &punctuations,
+                          const std::string &language, bool debug = false);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/lexicon.h b/sherpa-onnx/csrc/lexicon.h
index 2c71ab7e8a..39329694be 100644
--- a/sherpa-onnx/csrc/lexicon.h
+++ b/sherpa-onnx/csrc/lexicon.h
@@ -6,17 +6,13 @@
 #define SHERPA_ONNX_CSRC_LEXICON_H_
 
 #include <cstdint>
+#include <istream>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
 
 namespace sherpa_onnx {
@@ -30,11 +26,10 @@ class Lexicon : public OfflineTtsFrontend {
           const std::string &punctuations, const std::string &language,
           bool debug = false);
 
-#if __ANDROID_API__ >= 9
-  Lexicon(AAssetManager *mgr, const std::string &lexicon,
-          const std::string &tokens, const std::string &punctuations,
-          const std::string &language, bool debug = false);
-#endif
+  template <typename Manager>
+  Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens,
+          const std::string &punctuations, const std::string &language,
+          bool debug = false);
 
   std::vector<TokenIDs> ConvertTextToTokenIds(
       const std::string &text, const std::string &voice = "") const override;
diff --git a/sherpa-onnx/csrc/macros.h b/sherpa-onnx/csrc/macros.h
index b5dfb99e32..2788292dfb 100644
--- a/sherpa-onnx/csrc/macros.h
+++ b/sherpa-onnx/csrc/macros.h
@@ -5,6 +5,19 @@
 #ifndef SHERPA_ONNX_CSRC_MACROS_H_
 #define SHERPA_ONNX_CSRC_MACROS_H_
 #include <stdio.h>
+#include <stdlib.h>
+
+#include <utility>
+#if __OHOS__
+#include "hilog/log.h"
+
+#undef LOG_DOMAIN
+#undef LOG_TAG
+
+// https://gitee.com/openharmony/docs/blob/145a084f0b742e4325915e32f8184817927d1251/en/contribute/OpenHarmony-Log-guide.md#hilog-api-usage-specifications
+#define LOG_DOMAIN 0x6666
+#define LOG_TAG "sherpa_onnx"
+#endif
 
 #if __ANDROID_API__ >= 8
 #include "android/log.h"
@@ -16,6 +29,8 @@
     fprintf(stderr, "\n");                                               \
     __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \
   } while (0)
+#elif defined(__OHOS__)
+#define SHERPA_ONNX_LOGE(...) OH_LOG_INFO(LOG_APP, ##__VA_ARGS__)
 #elif SHERPA_ONNX_ENABLE_WASM
 #define SHERPA_ONNX_LOGE(...)                        \
   do {                                               \
@@ -34,139 +49,140 @@
   } while (0)
 #endif
 
+#define SHERPA_ONNX_EXIT(code) exit(code)
+
 // Read an integer
-#define SHERPA_ONNX_READ_META_DATA(dst, src_key)                        \
-  do {                                                                  \
-    auto value =                                                        \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator); \
-    if (!value) {                                                       \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);   \
-      exit(-1);                                                         \
-    }                                                                   \
-                                                                        \
-    dst = atoi(value.get());                                            \
-    if (dst < 0) {                                                      \
-      SHERPA_ONNX_LOGE("Invalid value %d for %s", dst, src_key);        \
-      exit(-1);                                                         \
-    }                                                                   \
+#define SHERPA_ONNX_READ_META_DATA(dst, src_key)                           \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+    if (value.empty()) {                                                   \
+      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
+                                                                           \
+    dst = atoi(value.c_str());                                             \
+    if (dst < 0) {                                                         \
+      SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key);         \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
   } while (0)
 
 #define SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(dst, src_key, default_value) \
   do {                                                                       \
-    auto value =                                                             \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator);      \
-    if (!value) {                                                            \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);   \
+    if (value.empty()) {                                                     \
       dst = default_value;                                                   \
     } else {                                                                 \
-      dst = atoi(value.get());                                               \
+      dst = atoi(value.c_str());                                             \
       if (dst < 0) {                                                         \
-        SHERPA_ONNX_LOGE("Invalid value %d for %s", dst, src_key);           \
-        exit(-1);                                                            \
+        SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key);         \
+        SHERPA_ONNX_EXIT(-1);                                                \
       }                                                                      \
     }                                                                        \
   } while (0)
 
 // read a vector of integers
-#define SHERPA_ONNX_READ_META_DATA_VEC(dst, src_key)                     \
-  do {                                                                   \
-    auto value =                                                         \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator);  \
-    if (!value) {                                                        \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);    \
-      exit(-1);                                                          \
-    }                                                                    \
-                                                                         \
-    bool ret = SplitStringToIntegers(value.get(), ",", true, &dst);      \
-    if (!ret) {                                                          \
-      SHERPA_ONNX_LOGE("Invalid value %s for %s", value.get(), src_key); \
-      exit(-1);                                                          \
-    }                                                                    \
+#define SHERPA_ONNX_READ_META_DATA_VEC(dst, src_key)                           \
+  do {                                                                         \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);     \
+    if (value.empty()) {                                                       \
+      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);        \
+      SHERPA_ONNX_EXIT(-1);                                                    \
+    }                                                                          \
+                                                                               \
+    bool ret = SplitStringToIntegers(value.c_str(), ",", true, &dst);          \
+    if (!ret) {                                                                \
+      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
+      SHERPA_ONNX_EXIT(-1);                                                    \
+    }                                                                          \
   } while (0)
 
 // read a vector of floats
-#define SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(dst, src_key)               \
-  do {                                                                   \
-    auto value =                                                         \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator);  \
-    if (!value) {                                                        \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);    \
-      exit(-1);                                                          \
-    }                                                                    \
-                                                                         \
-    bool ret = SplitStringToFloats(value.get(), ",", true, &dst);        \
-    if (!ret) {                                                          \
-      SHERPA_ONNX_LOGE("Invalid value %s for %s", value.get(), src_key); \
-      exit(-1);                                                          \
-    }                                                                    \
+#define SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(dst, src_key)                     \
+  do {                                                                         \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);     \
+    if (value.empty()) {                                                       \
+      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);          \
+      SHERPA_ONNX_EXIT(-1);                                                    \
+    }                                                                          \
+                                                                               \
+    bool ret = SplitStringToFloats(value.c_str(), ",", true, &dst);            \
+    if (!ret) {                                                                \
+      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
+      SHERPA_ONNX_EXIT(-1);                                                    \
+    }                                                                          \
   } while (0)
 
 // read a vector of strings
-#define SHERPA_ONNX_READ_META_DATA_VEC_STRING(dst, src_key)                   \
-  do {                                                                        \
-    auto value =                                                              \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator);       \
-    if (!value) {                                                             \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);         \
-      exit(-1);                                                               \
-    }                                                                         \
-    SplitStringToVector(value.get(), ",", false, &dst);                       \
-                                                                              \
-    if (dst.empty()) {                                                        \
-      SHERPA_ONNX_LOGE("Invalid value %s for %s. Empty vector!", value.get(), \
-                       src_key);                                              \
-      exit(-1);                                                               \
-    }                                                                         \
+#define SHERPA_ONNX_READ_META_DATA_VEC_STRING(dst, src_key)                \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+    if (value.empty()) {                                                   \
+      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
+    SplitStringToVector(value.c_str(), ",", false, &dst);                  \
+                                                                           \
+    if (dst.empty()) {                                                     \
+      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!",       \
+                       value.c_str(), src_key);                            \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
   } while (0)
 
 // read a vector of strings separated by sep
-#define SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(dst, src_key, sep)          \
-  do {                                                                        \
-    auto value =                                                              \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator);       \
-    if (!value) {                                                             \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);         \
-      exit(-1);                                                               \
-    }                                                                         \
-    SplitStringToVector(value.get(), sep, false, &dst);                       \
-                                                                              \
-    if (dst.empty()) {                                                        \
-      SHERPA_ONNX_LOGE("Invalid value %s for %s. Empty vector!", value.get(), \
-                       src_key);                                              \
-      exit(-1);                                                               \
-    }                                                                         \
+#define SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(dst, src_key, sep)       \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+    if (value.empty()) {                                                   \
+      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
+    SplitStringToVector(value.c_str(), sep, false, &dst);                  \
+                                                                           \
+    if (dst.empty()) {                                                     \
+      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!",       \
+                       value.c_str(), src_key);                            \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
   } while (0)
 
 // Read a string
-#define SHERPA_ONNX_READ_META_DATA_STR(dst, src_key)                    \
-  do {                                                                  \
-    auto value =                                                        \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator); \
-    if (!value) {                                                       \
-      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);   \
-      exit(-1);                                                         \
-    }                                                                   \
-                                                                        \
-    dst = value.get();                                                  \
-    if (dst.empty()) {                                                  \
-      SHERPA_ONNX_LOGE("Invalid value for %s\n", src_key);              \
-      exit(-1);                                                         \
-    }                                                                   \
+#define SHERPA_ONNX_READ_META_DATA_STR(dst, src_key)                       \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+    if (value.empty()) {                                                   \
+      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
+                                                                           \
+    dst = std::move(value);                                                \
+    if (dst.empty()) {                                                     \
+      SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key);               \
+      SHERPA_ONNX_EXIT(-1);                                                \
+    }                                                                      \
+  } while (0)
+
+#define SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(dst, src_key)           \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+                                                                           \
+    dst = std::move(value);                                                \
   } while (0)
 
-#define SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(dst, src_key,       \
-                                                    default_value)      \
-  do {                                                                  \
-    auto value =                                                        \
-        meta_data.LookupCustomMetadataMapAllocated(src_key, allocator); \
-    if (!value) {                                                       \
-      dst = default_value;                                              \
-    } else {                                                            \
-      dst = value.get();                                                \
-      if (dst.empty()) {                                                \
-        SHERPA_ONNX_LOGE("Invalid value for %s\n", src_key);            \
-        exit(-1);                                                       \
-      }                                                                 \
-    }                                                                   \
+#define SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(dst, src_key,          \
+                                                    default_value)         \
+  do {                                                                     \
+    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
+    if (value.empty()) {                                                   \
+      dst = default_value;                                                 \
+    } else {                                                               \
+      dst = std::move(value);                                              \
+      if (dst.empty()) {                                                   \
+        SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key);             \
+        SHERPA_ONNX_EXIT(-1);                                              \
+      }                                                                    \
+    }                                                                      \
   } while (0)
 
 #endif  // SHERPA_ONNX_CSRC_MACROS_H_
diff --git a/sherpa-onnx/csrc/melo-tts-lexicon.cc b/sherpa-onnx/csrc/melo-tts-lexicon.cc
index e379b9c2fa..48b854f83c 100644
--- a/sherpa-onnx/csrc/melo-tts-lexicon.cc
+++ b/sherpa-onnx/csrc/melo-tts-lexicon.cc
@@ -6,22 +6,28 @@
 
 #include <fstream>
 #include <regex>  // NOLINT
+#include <sstream>
+#include <strstream>
+#include <unordered_map>
 #include <utility>
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
 
 #include "cppjieba/Jieba.hpp"
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/symbol-table.h"
 #include "sherpa-onnx/csrc/text-utils.h"
 
 namespace sherpa_onnx {
 
-// implemented in ./lexicon.cc
-std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is);
-
-std::vector<int32_t> ConvertTokensToIds(
-    const std::unordered_map<std::string, int32_t> &token2id,
-    const std::vector<std::string> &tokens);
-
 class MeloTtsLexicon::Impl {
  public:
   Impl(const std::string &lexicon, const std::string &tokens,
@@ -54,6 +60,74 @@ class MeloTtsLexicon::Impl {
     }
   }
 
+  Impl(const std::string &lexicon, const std::string &tokens,
+       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
+      : meta_data_(meta_data), debug_(debug) {
+    {
+      std::ifstream is(tokens);
+      InitTokens(is);
+    }
+
+    {
+      std::ifstream is(lexicon);
+      InitLexicon(is);
+    }
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
+       const std::string &dict_dir,
+       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
+      : meta_data_(meta_data), debug_(debug) {
+    std::string dict = dict_dir + "/jieba.dict.utf8";
+    std::string hmm = dict_dir + "/hmm_model.utf8";
+    std::string user_dict = dict_dir + "/user.dict.utf8";
+    std::string idf = dict_dir + "/idf.utf8";
+    std::string stop_word = dict_dir + "/stop_words.utf8";
+
+    AssertFileExists(dict);
+    AssertFileExists(hmm);
+    AssertFileExists(user_dict);
+    AssertFileExists(idf);
+    AssertFileExists(stop_word);
+
+    jieba_ =
+        std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
+
+    {
+      auto buf = ReadFile(mgr, tokens);
+
+      std::istrstream is(buf.data(), buf.size());
+      InitTokens(is);
+    }
+
+    {
+      auto buf = ReadFile(mgr, lexicon);
+
+      std::istrstream is(buf.data(), buf.size());
+      InitLexicon(is);
+    }
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
+       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
+      : meta_data_(meta_data), debug_(debug) {
+    {
+      auto buf = ReadFile(mgr, tokens);
+
+      std::istrstream is(buf.data(), buf.size());
+      InitTokens(is);
+    }
+
+    {
+      auto buf = ReadFile(mgr, lexicon);
+
+      std::istrstream is(buf.data(), buf.size());
+      InitLexicon(is);
+    }
+  }
+
   std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
     std::string text = ToLowerCase(_text);
     // see
@@ -71,21 +145,46 @@ class MeloTtsLexicon::Impl {
     s = std::regex_replace(s, punct_re4, "!");
 
     std::vector<std::string> words;
-    bool is_hmm = true;
-    jieba_->Cut(text, words, is_hmm);
-
-    if (debug_) {
-      SHERPA_ONNX_LOGE("input text: %s", text.c_str());
-      SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
-
-      std::ostringstream os;
-      std::string sep = "";
-      for (const auto &w : words) {
-        os << sep << w;
-        sep = "_";
+    if (jieba_) {
+      bool is_hmm = true;
+      jieba_->Cut(text, words, is_hmm);
+
+      if (debug_) {
+        std::ostringstream os;
+        std::string sep = "";
+        for (const auto &w : words) {
+          os << sep << w;
+          sep = "_";
+        }
+#if __OHOS__
+        SHERPA_ONNX_LOGE("input text: %{public}s", text.c_str());
+        SHERPA_ONNX_LOGE("after replacing punctuations: %{public}s", s.c_str());
+
+        SHERPA_ONNX_LOGE("after jieba processing: %{public}s",
+                         os.str().c_str());
+#else
+        SHERPA_ONNX_LOGE("input text: %s", text.c_str());
+        SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
+
+        SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
+#endif
+      }
+    } else {
+      words = SplitUtf8(text);
+
+      if (debug_) {
+        fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str());
+        fprintf(stderr, "Input text in bytes (lowercase):");
+        for (int8_t c : text) {
+          fprintf(stderr, " %02x", c);
+        }
+        fprintf(stderr, "\n");
+        fprintf(stderr, "After splitting to words:");
+        for (const auto &w : words) {
+          fprintf(stderr, " %s", w.c_str());
+        }
+        fprintf(stderr, "\n");
       }
-
-      SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
     }
 
     std::vector<TokenIDs> ans;
@@ -247,6 +346,7 @@ class MeloTtsLexicon::Impl {
           {std::move(word), TokenIDs{std::move(ids64), std::move(tone_list)}});
     }
 
+    // For Chinese+English MeloTTS
     word2ids_["呣"] = word2ids_["母"];
     word2ids_["嗯"] = word2ids_["恩"];
   }
@@ -274,9 +374,54 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
     : impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
                                    debug)) {}
 
+MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
+                               const std::string &tokens,
+                               const OfflineTtsVitsModelMetaData &meta_data,
+                               bool debug)
+    : impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {}
+
+template <typename Manager>
+MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
+                               const std::string &tokens,
+                               const std::string &dict_dir,
+                               const OfflineTtsVitsModelMetaData &meta_data,
+                               bool debug)
+    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, meta_data,
+                                   debug)) {}
+
+template <typename Manager>
+MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
+                               const std::string &tokens,
+                               const OfflineTtsVitsModelMetaData &meta_data,
+                               bool debug)
+    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, meta_data, debug)) {}
+
 std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds(
     const std::string &text, const std::string & /*unused_voice = ""*/) const {
   return impl_->ConvertTextToTokenIds(text);
 }
 
+#if __ANDROID_API__ >= 9
+template MeloTtsLexicon::MeloTtsLexicon(
+    AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
+    const std::string &dict_dir, const OfflineTtsVitsModelMetaData &meta_data,
+    bool debug);
+
+template MeloTtsLexicon::MeloTtsLexicon(
+    AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
+    const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+#endif
+
+#if __OHOS__
+template MeloTtsLexicon::MeloTtsLexicon(
+    NativeResourceManager *mgr, const std::string &lexicon,
+    const std::string &tokens, const std::string &dict_dir,
+    const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+
+template MeloTtsLexicon::MeloTtsLexicon(
+    NativeResourceManager *mgr, const std::string &lexicon,
+    const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data,
+    bool debug);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/melo-tts-lexicon.h b/sherpa-onnx/csrc/melo-tts-lexicon.h
index 261f3412e9..96b68c7a63 100644
--- a/sherpa-onnx/csrc/melo-tts-lexicon.h
+++ b/sherpa-onnx/csrc/melo-tts-lexicon.h
@@ -7,11 +7,10 @@
 
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
-#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
+#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"
 
 namespace sherpa_onnx {
 
@@ -22,6 +21,19 @@ class MeloTtsLexicon : public OfflineTtsFrontend {
                  const std::string &dict_dir,
                  const OfflineTtsVitsModelMetaData &meta_data, bool debug);
 
+  MeloTtsLexicon(const std::string &lexicon, const std::string &tokens,
+                 const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+
+  template <typename Manager>
+  MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
+                 const std::string &tokens, const std::string &dict_dir,
+                 const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+
+  template <typename Manager>
+  MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
+                 const std::string &tokens,
+                 const OfflineTtsVitsModelMetaData &meta_data, bool debug);
+
   std::vector<TokenIDs> ConvertTextToTokenIds(
       const std::string &text,
       const std::string &unused_voice = "") const override;
diff --git a/sherpa-onnx/csrc/offline-ced-model.cc b/sherpa-onnx/csrc/offline-ced-model.cc
index 538fe5bdb7..d6dd352909 100644
--- a/sherpa-onnx/csrc/offline-ced-model.cc
+++ b/sherpa-onnx/csrc/offline-ced-model.cc
@@ -46,7 +46,7 @@ class OfflineCEDModel::Impl {
 
   int32_t NumEventClasses() const { return num_event_classes_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
diff --git a/sherpa-onnx/csrc/offline-ct-transformer-model.cc b/sherpa-onnx/csrc/offline-ct-transformer-model.cc
index 2ce593b3e1..d616484b46 100644
--- a/sherpa-onnx/csrc/offline-ct-transformer-model.cc
+++ b/sherpa-onnx/csrc/offline-ct-transformer-model.cc
@@ -44,7 +44,7 @@ class OfflineCtTransformerModel::Impl {
     return std::move(ans[0]);
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   const OfflineCtTransformerModelMetaData &GetModelMetadata() const {
     return meta_data_;
diff --git a/sherpa-onnx/csrc/offline-ctc-fst-decoder.cc b/sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
index 6c9df3fd3c..ca3dcaee9d 100644
--- a/sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
+++ b/sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
@@ -11,52 +11,11 @@
 #include "kaldi-decoder/csrc/decodable-ctc.h"
 #include "kaldi-decoder/csrc/eigen.h"
 #include "kaldi-decoder/csrc/faster-decoder.h"
+#include "sherpa-onnx/csrc/fst-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 
 namespace sherpa_onnx {
 
-// This function is copied from kaldi.
-//
-// @param filename Path to a StdVectorFst or StdConstFst graph
-// @return The caller should free the returned pointer using `delete` to
-//         avoid memory leak.
-fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename) {
-  // read decoding network FST
-  std::ifstream is(filename, std::ios::binary);
-  if (!is.good()) {
-    SHERPA_ONNX_LOGE("Could not open decoding-graph FST %s", filename.c_str());
-  }
-
-  fst::FstHeader hdr;
-  if (!hdr.Read(is, "<unknown>")) {
-    SHERPA_ONNX_LOGE("Reading FST: error reading FST header.");
-  }
-
-  if (hdr.ArcType() != fst::StdArc::Type()) {
-    SHERPA_ONNX_LOGE("FST with arc type %s not supported",
-                     hdr.ArcType().c_str());
-  }
-  fst::FstReadOptions ropts("<unspecified>", &hdr);
-
-  fst::Fst<fst::StdArc> *decode_fst = nullptr;
-
-  if (hdr.FstType() == "vector") {
-    decode_fst = fst::VectorFst<fst::StdArc>::Read(is, ropts);
-  } else if (hdr.FstType() == "const") {
-    decode_fst = fst::ConstFst<fst::StdArc>::Read(is, ropts);
-  } else {
-    SHERPA_ONNX_LOGE("Reading FST: unsupported FST type: %s",
-                     hdr.FstType().c_str());
-  }
-
-  if (decode_fst == nullptr) {  // fst code will warn.
-    SHERPA_ONNX_LOGE("Error reading FST (after reading header).");
-    return nullptr;
-  } else {
-    return decode_fst;
-  }
-}
-
 /**
  * @param decoder
  * @param p Pointer to a 2-d array of shape (num_frames, vocab_size)
diff --git a/sherpa-onnx/csrc/offline-ctc-model.cc b/sherpa-onnx/csrc/offline-ctc-model.cc
index 66e67ecf18..6ca5f00548 100644
--- a/sherpa-onnx/csrc/offline-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-ctc-model.cc
@@ -9,6 +9,15 @@
 #include <sstream>
 #include <string>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
@@ -21,6 +30,7 @@ namespace {
 
 enum class ModelType : std::uint8_t {
   kEncDecCTCModelBPE,
+  kEncDecCTCModel,
   kEncDecHybridRNNTCTCBPEModel,
   kTdnn,
   kZipformerCtc,
@@ -47,13 +57,17 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
   if (debug) {
     std::ostringstream os;
     PrintModelMetadata(os, meta_data);
-    SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
+    SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;
   auto model_type =
-      meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
-  if (!model_type) {
+      LookupCustomModelMetaData(meta_data, "model_type", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n"
         "If you are using models from NeMo, please refer to\n"
@@ -73,20 +87,22 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
     return ModelType::kUnknown;
   }
 
-  if (model_type.get() == std::string("EncDecCTCModelBPE")) {
+  if (model_type == "EncDecCTCModelBPE") {
     return ModelType::kEncDecCTCModelBPE;
-  } else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) {
+  } else if (model_type == "EncDecCTCModel") {
+    return ModelType::kEncDecCTCModel;
+  } else if (model_type == "EncDecHybridRNNTCTCBPEModel") {
     return ModelType::kEncDecHybridRNNTCTCBPEModel;
-  } else if (model_type.get() == std::string("tdnn")) {
+  } else if (model_type == "tdnn") {
     return ModelType::kTdnn;
-  } else if (model_type.get() == std::string("zipformer2_ctc")) {
+  } else if (model_type == "zipformer2_ctc") {
     return ModelType::kZipformerCtc;
-  } else if (model_type.get() == std::string("wenet_ctc")) {
+  } else if (model_type == "wenet_ctc") {
     return ModelType::kWenetCtc;
-  } else if (model_type.get() == std::string("telespeech_ctc")) {
+  } else if (model_type == "telespeech_ctc") {
     return ModelType::kTeleSpeechCtc;
   } else {
-    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.get());
+    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
     return ModelType::kUnknown;
   }
 }
@@ -120,23 +136,18 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
 
   switch (model_type) {
     case ModelType::kEncDecCTCModelBPE:
+    case ModelType::kEncDecCTCModel:
       return std::make_unique<OfflineNemoEncDecCtcModel>(config);
-      break;
     case ModelType::kEncDecHybridRNNTCTCBPEModel:
       return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
-      break;
     case ModelType::kTdnn:
       return std::make_unique<OfflineTdnnCtcModel>(config);
-      break;
     case ModelType::kZipformerCtc:
       return std::make_unique<OfflineZipformerCtcModel>(config);
-      break;
     case ModelType::kWenetCtc:
       return std::make_unique<OfflineWenetCtcModel>(config);
-      break;
     case ModelType::kTeleSpeechCtc:
       return std::make_unique<OfflineTeleSpeechCtcModel>(config);
-      break;
     case ModelType::kUnknown:
       SHERPA_ONNX_LOGE("Unknown model type in offline CTC!");
       return nullptr;
@@ -145,10 +156,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
   return nullptr;
 }
 
-#if __ANDROID_API__ >= 9
-
+template <typename Manager>
 std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
-    AAssetManager *mgr, const OfflineModelConfig &config) {
+    Manager *mgr, const OfflineModelConfig &config) {
   // TODO(fangjun): Refactor it. We don't need to use model_type here
   ModelType model_type = ModelType::kUnknown;
 
@@ -176,24 +186,19 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
 
   switch (model_type) {
     case ModelType::kEncDecCTCModelBPE:
+    case ModelType::kEncDecCTCModel:
       return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
-      break;
     case ModelType::kEncDecHybridRNNTCTCBPEModel:
       return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(mgr,
                                                                       config);
-      break;
     case ModelType::kTdnn:
       return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
-      break;
     case ModelType::kZipformerCtc:
       return std::make_unique<OfflineZipformerCtcModel>(mgr, config);
-      break;
     case ModelType::kWenetCtc:
       return std::make_unique<OfflineWenetCtcModel>(mgr, config);
-      break;
     case ModelType::kTeleSpeechCtc:
       return std::make_unique<OfflineTeleSpeechCtcModel>(mgr, config);
-      break;
     case ModelType::kUnknown:
       SHERPA_ONNX_LOGE("Unknown model type in offline CTC!");
       return nullptr;
@@ -201,6 +206,15 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
 
   return nullptr;
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-ctc-model.h b/sherpa-onnx/csrc/offline-ctc-model.h
index f4c7406f66..5ad4fcdcf8 100644
--- a/sherpa-onnx/csrc/offline-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-ctc-model.h
@@ -8,11 +8,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-model-config.h"
 
@@ -25,10 +20,9 @@ class OfflineCtcModel {
   static std::unique_ptr<OfflineCtcModel> Create(
       const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   static std::unique_ptr<OfflineCtcModel> Create(
-      AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+      Manager *mgr, const OfflineModelConfig &config);
 
   /** Run the forward method of the model.
    *
@@ -66,6 +60,10 @@ class OfflineCtcModel {
 
   // Return true if the model supports batch size > 1
   virtual bool SupportBatchProcessing() const { return true; }
+
+  // return true for models from https://github.com/salute-developers/GigaAM
+  // return false otherwise
+  virtual bool IsGigaAM() const { return false; }
 };
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-lm.cc b/sherpa-onnx/csrc/offline-lm.cc
index e199ea5e46..0c42b7ff9a 100644
--- a/sherpa-onnx/csrc/offline-lm.cc
+++ b/sherpa-onnx/csrc/offline-lm.cc
@@ -8,6 +8,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/offline-rnn-lm.h"
 
 namespace sherpa_onnx {
@@ -16,12 +25,11 @@ std::unique_ptr<OfflineLM> OfflineLM::Create(const OfflineLMConfig &config) {
   return std::make_unique<OfflineRnnLM>(config);
 }
 
-#if __ANDROID_API__ >= 9
-std::unique_ptr<OfflineLM> OfflineLM::Create(AAssetManager *mgr,
+template <typename Manager>
+std::unique_ptr<OfflineLM> OfflineLM::Create(Manager *mgr,
                                              const OfflineLMConfig &config) {
   return std::make_unique<OfflineRnnLM>(mgr, config);
 }
-#endif
 
 void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
                                std::vector<Hypotheses> *hyps) {
@@ -75,4 +83,14 @@ void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
   }
 }
 
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OfflineLM> OfflineLM::Create(
+    AAssetManager *mgr, const OfflineLMConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OfflineLM> OfflineLM::Create(
+    NativeResourceManager *mgr, const OfflineLMConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-lm.h b/sherpa-onnx/csrc/offline-lm.h
index 07082c1494..a9af820207 100644
--- a/sherpa-onnx/csrc/offline-lm.h
+++ b/sherpa-onnx/csrc/offline-lm.h
@@ -8,11 +8,6 @@
 #include <memory>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/hypothesis.h"
 #include "sherpa-onnx/csrc/offline-lm-config.h"
@@ -25,10 +20,9 @@ class OfflineLM {
 
   static std::unique_ptr<OfflineLM> Create(const OfflineLMConfig &config);
 
-#if __ANDROID_API__ >= 9
-  static std::unique_ptr<OfflineLM> Create(AAssetManager *mgr,
+  template <typename Manager>
+  static std::unique_ptr<OfflineLM> Create(Manager *mgr,
                                            const OfflineLMConfig &config);
-#endif
 
   /** Rescore a batch of sentences.
    *
diff --git a/sherpa-onnx/csrc/offline-model-config.cc b/sherpa-onnx/csrc/offline-model-config.cc
index 862e4a60ce..7872903272 100644
--- a/sherpa-onnx/csrc/offline-model-config.cc
+++ b/sherpa-onnx/csrc/offline-model-config.cc
@@ -19,6 +19,7 @@ void OfflineModelConfig::Register(ParseOptions *po) {
   zipformer_ctc.Register(po);
   wenet_ctc.Register(po);
   sense_voice.Register(po);
+  moonshine.Register(po);
 
   po->Register("telespeech-ctc", &telespeech_ctc,
                "Path to model.onnx for telespeech ctc");
@@ -99,6 +100,10 @@ bool OfflineModelConfig::Validate() const {
     return sense_voice.Validate();
   }
 
+  if (!moonshine.preprocessor.empty()) {
+    return moonshine.Validate();
+  }
+
   if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
     SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
                      telespeech_ctc.c_str());
@@ -124,6 +129,7 @@ std::string OfflineModelConfig::ToString() const {
   os << "zipformer_ctc=" << zipformer_ctc.ToString() << ", ";
   os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
   os << "sense_voice=" << sense_voice.ToString() << ", ";
+  os << "moonshine=" << moonshine.ToString() << ", ";
   os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
   os << "tokens=\"" << tokens << "\", ";
   os << "num_threads=" << num_threads << ", ";
diff --git a/sherpa-onnx/csrc/offline-model-config.h b/sherpa-onnx/csrc/offline-model-config.h
index 8eb725e4e8..cfff5eed22 100644
--- a/sherpa-onnx/csrc/offline-model-config.h
+++ b/sherpa-onnx/csrc/offline-model-config.h
@@ -6,6 +6,7 @@
 
 #include <string>
 
+#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
 #include "sherpa-onnx/csrc/offline-paraformer-model-config.h"
 #include "sherpa-onnx/csrc/offline-sense-voice-model-config.h"
@@ -26,6 +27,7 @@ struct OfflineModelConfig {
   OfflineZipformerCtcModelConfig zipformer_ctc;
   OfflineWenetCtcModelConfig wenet_ctc;
   OfflineSenseVoiceModelConfig sense_voice;
+  OfflineMoonshineModelConfig moonshine;
   std::string telespeech_ctc;
 
   std::string tokens;
@@ -56,6 +58,7 @@ struct OfflineModelConfig {
                      const OfflineZipformerCtcModelConfig &zipformer_ctc,
                      const OfflineWenetCtcModelConfig &wenet_ctc,
                      const OfflineSenseVoiceModelConfig &sense_voice,
+                     const OfflineMoonshineModelConfig &moonshine,
                      const std::string &telespeech_ctc,
                      const std::string &tokens, int32_t num_threads, bool debug,
                      const std::string &provider, const std::string &model_type,
@@ -69,6 +72,7 @@ struct OfflineModelConfig {
         zipformer_ctc(zipformer_ctc),
         wenet_ctc(wenet_ctc),
         sense_voice(sense_voice),
+        moonshine(moonshine),
         telespeech_ctc(telespeech_ctc),
         tokens(tokens),
         num_threads(num_threads),
diff --git a/sherpa-onnx/csrc/offline-moonshine-decoder.h b/sherpa-onnx/csrc/offline-moonshine-decoder.h
new file mode 100644
index 0000000000..4d0b9ac93d
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-decoder.h
@@ -0,0 +1,34 @@
+// sherpa-onnx/csrc/offline-moonshine-decoder.h
+//
+// Copyright (c)  2023  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_
+
+#include <vector>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+
+namespace sherpa_onnx {
+
+struct OfflineMoonshineDecoderResult {
+  /// The decoded token IDs
+  std::vector<int32_t> tokens;
+};
+
+class OfflineMoonshineDecoder {
+ public:
+  virtual ~OfflineMoonshineDecoder() = default;
+
+  /** Run beam search given the output from the moonshine encoder model.
+   *
+   * @param encoder_out A 3-D tensor of shape (batch_size, T, dim)
+   * @return Return a vector of size `N` containing the decoded results.
+   */
+  virtual std::vector<OfflineMoonshineDecoderResult> Decode(
+      Ort::Value encoder_out) = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_
diff --git a/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc b/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc
new file mode 100644
index 0000000000..3e2d77f693
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc
@@ -0,0 +1,93 @@
+// sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc
+//
+// Copyright (c)  2023  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+
+namespace sherpa_onnx {
+
+std::vector<OfflineMoonshineDecoderResult>
+OfflineMoonshineGreedySearchDecoder::Decode(Ort::Value encoder_out) {
+  auto encoder_out_shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
+  if (encoder_out_shape[0] != 1) {
+    SHERPA_ONNX_LOGE("Support only batch size == 1. Given: %d\n",
+                     static_cast<int32_t>(encoder_out_shape[0]));
+    return {};
+  }
+
+  auto memory_info =
+      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+  // encoder_out_shape[1] * 384 is the number of audio samples
+  // 16000 is the sample rate
+  //
+  //
+  // 384 is from the moonshine paper
+  int32_t max_len =
+      static_cast<int32_t>(encoder_out_shape[1] * 384 / 16000.0 * 6);
+
+  int32_t sos = 1;
+  int32_t eos = 2;
+  int32_t seq_len = 1;
+
+  std::vector<int32_t> tokens;
+
+  std::array<int64_t, 2> token_shape = {1, 1};
+  int64_t seq_len_shape = 1;
+
+  Ort::Value token_tensor = Ort::Value::CreateTensor(
+      memory_info, &sos, 1, token_shape.data(), token_shape.size());
+
+  Ort::Value seq_len_tensor =
+      Ort::Value::CreateTensor(memory_info, &seq_len, 1, &seq_len_shape, 1);
+
+  Ort::Value logits{nullptr};
+  std::vector<Ort::Value> states;
+
+  std::tie(logits, states) = model_->ForwardUnCachedDecoder(
+      std::move(token_tensor), std::move(seq_len_tensor), View(&encoder_out));
+
+  int32_t vocab_size = logits.GetTensorTypeAndShapeInfo().GetShape()[2];
+
+  for (int32_t i = 0; i != max_len; ++i) {
+    const float *p = logits.GetTensorData<float>();
+
+    int32_t max_token_id = static_cast<int32_t>(
+        std::distance(p, std::max_element(p, p + vocab_size)));
+    if (max_token_id == eos) {
+      break;
+    }
+    tokens.push_back(max_token_id);
+
+    seq_len += 1;
+
+    token_tensor = Ort::Value::CreateTensor(
+        memory_info, &tokens.back(), 1, token_shape.data(), token_shape.size());
+
+    seq_len_tensor =
+        Ort::Value::CreateTensor(memory_info, &seq_len, 1, &seq_len_shape, 1);
+
+    // To fix the false alarm of clang-tidy
+    // error: 'states' used after it was moved
+    // [bugprone-use-after-move,-warnings-as-errors]
+    // we use a tmp_states here
+    std::vector<Ort::Value> tmp_states{std::move(states)};
+
+    std::tie(logits, states) = model_->ForwardCachedDecoder(
+        std::move(token_tensor), std::move(seq_len_tensor), View(&encoder_out),
+        std::move(tmp_states));
+  }
+
+  OfflineMoonshineDecoderResult ans;
+  ans.tokens = std::move(tokens);
+
+  return {ans};
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h b/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h
new file mode 100644
index 0000000000..b215405dba
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h
@@ -0,0 +1,29 @@
+// sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_
+
+#include <vector>
+
+#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
+#include "sherpa-onnx/csrc/offline-moonshine-model.h"
+
+namespace sherpa_onnx {
+
+class OfflineMoonshineGreedySearchDecoder : public OfflineMoonshineDecoder {
+ public:
+  explicit OfflineMoonshineGreedySearchDecoder(OfflineMoonshineModel *model)
+      : model_(model) {}
+
+  std::vector<OfflineMoonshineDecoderResult> Decode(
+      Ort::Value encoder_out) override;
+
+ private:
+  OfflineMoonshineModel *model_;  // not owned
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_
diff --git a/sherpa-onnx/csrc/offline-moonshine-model-config.cc b/sherpa-onnx/csrc/offline-moonshine-model-config.cc
new file mode 100644
index 0000000000..c687507e30
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-model-config.cc
@@ -0,0 +1,88 @@
+// sherpa-onnx/csrc/offline-moonshine-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
+
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void OfflineMoonshineModelConfig::Register(ParseOptions *po) {
+  po->Register("moonshine-preprocessor", &preprocessor,
+               "Path to onnx preprocessor of moonshine, e.g., preprocess.onnx");
+
+  po->Register("moonshine-encoder", &encoder,
+               "Path to onnx encoder of moonshine, e.g., encode.onnx");
+
+  po->Register(
+      "moonshine-uncached-decoder", &uncached_decoder,
+      "Path to onnx uncached_decoder of moonshine, e.g., uncached_decode.onnx");
+
+  po->Register(
+      "moonshine-cached-decoder", &cached_decoder,
+      "Path to onnx cached_decoder of moonshine, e.g., cached_decode.onnx");
+}
+
+bool OfflineMoonshineModelConfig::Validate() const {
+  if (preprocessor.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --moonshine-preprocessor");
+    return false;
+  }
+
+  if (!FileExists(preprocessor)) {
+    SHERPA_ONNX_LOGE("moonshine preprocessor file '%s' does not exist",
+                     preprocessor.c_str());
+    return false;
+  }
+
+  if (encoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --moonshine-encoder");
+    return false;
+  }
+
+  if (!FileExists(encoder)) {
+    SHERPA_ONNX_LOGE("moonshine encoder file '%s' does not exist",
+                     encoder.c_str());
+    return false;
+  }
+
+  if (uncached_decoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --moonshine-uncached-decoder");
+    return false;
+  }
+
+  if (!FileExists(uncached_decoder)) {
+    SHERPA_ONNX_LOGE("moonshine uncached decoder file '%s' does not exist",
+                     uncached_decoder.c_str());
+    return false;
+  }
+
+  if (cached_decoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --moonshine-cached-decoder");
+    return false;
+  }
+
+  if (!FileExists(cached_decoder)) {
+    SHERPA_ONNX_LOGE("moonshine cached decoder file '%s' does not exist",
+                     cached_decoder.c_str());
+    return false;
+  }
+
+  return true;
+}
+
+std::string OfflineMoonshineModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineMoonshineModelConfig(";
+  os << "preprocessor=\"" << preprocessor << "\", ";
+  os << "encoder=\"" << encoder << "\", ";
+  os << "uncached_decoder=\"" << uncached_decoder << "\", ";
+  os << "cached_decoder=\"" << cached_decoder << "\")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-moonshine-model-config.h b/sherpa-onnx/csrc/offline-moonshine-model-config.h
new file mode 100644
index 0000000000..829ca520d9
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-model-config.h
@@ -0,0 +1,37 @@
+// sherpa-onnx/csrc/offline-moonshine-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
+
+#include <string>
+
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineMoonshineModelConfig {
+  std::string preprocessor;
+  std::string encoder;
+  std::string uncached_decoder;
+  std::string cached_decoder;
+
+  OfflineMoonshineModelConfig() = default;
+  OfflineMoonshineModelConfig(const std::string &preprocessor,
+                              const std::string &encoder,
+                              const std::string &uncached_decoder,
+                              const std::string &cached_decoder)
+      : preprocessor(preprocessor),
+        encoder(encoder),
+        uncached_decoder(uncached_decoder),
+        cached_decoder(cached_decoder) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-moonshine-model.cc b/sherpa-onnx/csrc/offline-moonshine-model.cc
new file mode 100644
index 0000000000..dbd18a92d7
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-model.cc
@@ -0,0 +1,299 @@
+// sherpa-onnx/csrc/offline-moonshine-model.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-moonshine-model.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/session.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+class OfflineMoonshineModel::Impl {
+ public:
+  explicit Impl(const OfflineModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    {
+      auto buf = ReadFile(config.moonshine.preprocessor);
+      InitPreprocessor(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(config.moonshine.encoder);
+      InitEncoder(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(config.moonshine.uncached_decoder);
+      InitUnCachedDecoder(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(config.moonshine.cached_decoder);
+      InitCachedDecoder(buf.data(), buf.size());
+    }
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    {
+      auto buf = ReadFile(mgr, config.moonshine.preprocessor);
+      InitPreprocessor(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(mgr, config.moonshine.encoder);
+      InitEncoder(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(mgr, config.moonshine.uncached_decoder);
+      InitUnCachedDecoder(buf.data(), buf.size());
+    }
+
+    {
+      auto buf = ReadFile(mgr, config.moonshine.cached_decoder);
+      InitCachedDecoder(buf.data(), buf.size());
+    }
+  }
+
+  Ort::Value ForwardPreprocessor(Ort::Value audio) {
+    auto features = preprocessor_sess_->Run(
+        {}, preprocessor_input_names_ptr_.data(), &audio, 1,
+        preprocessor_output_names_ptr_.data(),
+        preprocessor_output_names_ptr_.size());
+
+    return std::move(features[0]);
+  }
+
+  Ort::Value ForwardEncoder(Ort::Value features, Ort::Value features_len) {
+    std::array<Ort::Value, 2> encoder_inputs{std::move(features),
+                                             std::move(features_len)};
+    auto encoder_out = encoder_sess_->Run(
+        {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
+        encoder_inputs.size(), encoder_output_names_ptr_.data(),
+        encoder_output_names_ptr_.size());
+
+    return std::move(encoder_out[0]);
+  }
+
+  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardUnCachedDecoder(
+      Ort::Value tokens, Ort::Value seq_len, Ort::Value encoder_out) {
+    std::array<Ort::Value, 3> uncached_decoder_input = {
+        std::move(tokens),
+        std::move(encoder_out),
+        std::move(seq_len),
+    };
+
+    auto uncached_decoder_out = uncached_decoder_sess_->Run(
+        {}, uncached_decoder_input_names_ptr_.data(),
+        uncached_decoder_input.data(), uncached_decoder_input.size(),
+        uncached_decoder_output_names_ptr_.data(),
+        uncached_decoder_output_names_ptr_.size());
+
+    std::vector<Ort::Value> states;
+    states.reserve(uncached_decoder_out.size() - 1);
+
+    int32_t i = -1;
+    for (auto &s : uncached_decoder_out) {
+      ++i;
+      if (i == 0) {
+        continue;
+      }
+
+      states.push_back(std::move(s));
+    }
+
+    return {std::move(uncached_decoder_out[0]), std::move(states)};
+  }
+
+  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardCachedDecoder(
+      Ort::Value tokens, Ort::Value seq_len, Ort::Value encoder_out,
+      std::vector<Ort::Value> states) {
+    std::vector<Ort::Value> cached_decoder_input;
+    cached_decoder_input.reserve(3 + states.size());
+    cached_decoder_input.push_back(std::move(tokens));
+    cached_decoder_input.push_back(std::move(encoder_out));
+    cached_decoder_input.push_back(std::move(seq_len));
+
+    for (auto &s : states) {
+      cached_decoder_input.push_back(std::move(s));
+    }
+
+    auto cached_decoder_out = cached_decoder_sess_->Run(
+        {}, cached_decoder_input_names_ptr_.data(), cached_decoder_input.data(),
+        cached_decoder_input.size(), cached_decoder_output_names_ptr_.data(),
+        cached_decoder_output_names_ptr_.size());
+
+    std::vector<Ort::Value> next_states;
+    next_states.reserve(cached_decoder_out.size() - 1);
+
+    int32_t i = -1;
+    for (auto &s : cached_decoder_out) {
+      ++i;
+      if (i == 0) {
+        continue;
+      }
+
+      next_states.push_back(std::move(s));
+    }
+
+    return {std::move(cached_decoder_out[0]), std::move(next_states)};
+  }
+
+  OrtAllocator *Allocator() { return allocator_; }
+
+ private:
+  void InitPreprocessor(void *model_data, size_t model_data_length) {
+    preprocessor_sess_ = std::make_unique<Ort::Session>(
+        env_, model_data, model_data_length, sess_opts_);
+
+    GetInputNames(preprocessor_sess_.get(), &preprocessor_input_names_,
+                  &preprocessor_input_names_ptr_);
+
+    GetOutputNames(preprocessor_sess_.get(), &preprocessor_output_names_,
+                   &preprocessor_output_names_ptr_);
+  }
+
+  void InitEncoder(void *model_data, size_t model_data_length) {
+    encoder_sess_ = std::make_unique<Ort::Session>(
+        env_, model_data, model_data_length, sess_opts_);
+
+    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
+                  &encoder_input_names_ptr_);
+
+    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
+                   &encoder_output_names_ptr_);
+  }
+
+  void InitUnCachedDecoder(void *model_data, size_t model_data_length) {
+    uncached_decoder_sess_ = std::make_unique<Ort::Session>(
+        env_, model_data, model_data_length, sess_opts_);
+
+    GetInputNames(uncached_decoder_sess_.get(), &uncached_decoder_input_names_,
+                  &uncached_decoder_input_names_ptr_);
+
+    GetOutputNames(uncached_decoder_sess_.get(),
+                   &uncached_decoder_output_names_,
+                   &uncached_decoder_output_names_ptr_);
+  }
+
+  void InitCachedDecoder(void *model_data, size_t model_data_length) {
+    cached_decoder_sess_ = std::make_unique<Ort::Session>(
+        env_, model_data, model_data_length, sess_opts_);
+
+    GetInputNames(cached_decoder_sess_.get(), &cached_decoder_input_names_,
+                  &cached_decoder_input_names_ptr_);
+
+    GetOutputNames(cached_decoder_sess_.get(), &cached_decoder_output_names_,
+                   &cached_decoder_output_names_ptr_);
+  }
+
+ private:
+  OfflineModelConfig config_;
+  Ort::Env env_;
+  Ort::SessionOptions sess_opts_;
+  Ort::AllocatorWithDefaultOptions allocator_;
+
+  std::unique_ptr<Ort::Session> preprocessor_sess_;
+  std::unique_ptr<Ort::Session> encoder_sess_;
+  std::unique_ptr<Ort::Session> uncached_decoder_sess_;
+  std::unique_ptr<Ort::Session> cached_decoder_sess_;
+
+  std::vector<std::string> preprocessor_input_names_;
+  std::vector<const char *> preprocessor_input_names_ptr_;
+
+  std::vector<std::string> preprocessor_output_names_;
+  std::vector<const char *> preprocessor_output_names_ptr_;
+
+  std::vector<std::string> encoder_input_names_;
+  std::vector<const char *> encoder_input_names_ptr_;
+
+  std::vector<std::string> encoder_output_names_;
+  std::vector<const char *> encoder_output_names_ptr_;
+
+  std::vector<std::string> uncached_decoder_input_names_;
+  std::vector<const char *> uncached_decoder_input_names_ptr_;
+
+  std::vector<std::string> uncached_decoder_output_names_;
+  std::vector<const char *> uncached_decoder_output_names_ptr_;
+
+  std::vector<std::string> cached_decoder_input_names_;
+  std::vector<const char *> cached_decoder_input_names_ptr_;
+
+  std::vector<std::string> cached_decoder_output_names_;
+  std::vector<const char *> cached_decoder_output_names_ptr_;
+};
+
+OfflineMoonshineModel::OfflineMoonshineModel(const OfflineModelConfig &config)
+    : impl_(std::make_unique<Impl>(config)) {}
+
+template <typename Manager>
+OfflineMoonshineModel::OfflineMoonshineModel(Manager *mgr,
+                                             const OfflineModelConfig &config)
+    : impl_(std::make_unique<Impl>(mgr, config)) {}
+
+OfflineMoonshineModel::~OfflineMoonshineModel() = default;
+
+Ort::Value OfflineMoonshineModel::ForwardPreprocessor(Ort::Value audio) const {
+  return impl_->ForwardPreprocessor(std::move(audio));
+}
+
+Ort::Value OfflineMoonshineModel::ForwardEncoder(
+    Ort::Value features, Ort::Value features_len) const {
+  return impl_->ForwardEncoder(std::move(features), std::move(features_len));
+}
+
+std::pair<Ort::Value, std::vector<Ort::Value>>
+OfflineMoonshineModel::ForwardUnCachedDecoder(Ort::Value token,
+                                              Ort::Value seq_len,
+                                              Ort::Value encoder_out) const {
+  return impl_->ForwardUnCachedDecoder(std::move(token), std::move(seq_len),
+                                       std::move(encoder_out));
+}
+
+std::pair<Ort::Value, std::vector<Ort::Value>>
+OfflineMoonshineModel::ForwardCachedDecoder(
+    Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out,
+    std::vector<Ort::Value> states) const {
+  return impl_->ForwardCachedDecoder(std::move(token), std::move(seq_len),
+                                     std::move(encoder_out), std::move(states));
+}
+
+OrtAllocator *OfflineMoonshineModel::Allocator() const {
+  return impl_->Allocator();
+}
+
+#if __ANDROID_API__ >= 9
+template OfflineMoonshineModel::OfflineMoonshineModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineMoonshineModel::OfflineMoonshineModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-moonshine-model.h b/sherpa-onnx/csrc/offline-moonshine-model.h
new file mode 100644
index 0000000000..a8f9b408d0
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-moonshine-model.h
@@ -0,0 +1,87 @@
+// sherpa-onnx/csrc/offline-moonshine-model.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+#include "sherpa-onnx/csrc/offline-model-config.h"
+
+namespace sherpa_onnx {
+
+// please see
+// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/moonshine/test.py
+class OfflineMoonshineModel {
+ public:
+  explicit OfflineMoonshineModel(const OfflineModelConfig &config);
+
+  template <typename Manager>
+  OfflineMoonshineModel(Manager *mgr, const OfflineModelConfig &config);
+
+  ~OfflineMoonshineModel();
+
+  /** Run the preprocessor model.
+   *
+   * @param audio A float32 tensor of shape (batch_size, num_samples)
+   *
+   * @return Return a float32 tensor of shape (batch_size, T, dim) that
+   *         can be used as the input of ForwardEncoder()
+   */
+  Ort::Value ForwardPreprocessor(Ort::Value audio) const;
+
+  /** Run the encoder model.
+   *
+   * @param features A float32 tensor of shape (batch_size, T, dim)
+   * @param features_len A int32 tensor of shape (batch_size,)
+   * @returns A float32 tensor of shape (batch_size, T, dim).
+   */
+  Ort::Value ForwardEncoder(Ort::Value features, Ort::Value features_len) const;
+
+  /** Run the uncached decoder.
+   *
+   * @param token A int32 tensor of shape (batch_size, num_tokens)
+   * @param seq_len A int32 tensor of shape (batch_size,) containing number
+   *                of predicted tokens so far
+   * @param encoder_out A float32 tensor of shape (batch_size, T, dim)
+   *
+   * @returns Return a pair:
+   *
+   *          - logits, a float32 tensor of shape (batch_size, 1, dim)
+   *          - states, a list of states
+   */
+  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardUnCachedDecoder(
+      Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out) const;
+
+  /** Run the cached decoder.
+   *
+   * @param token A int32 tensor of shape (batch_size, num_tokens)
+   * @param seq_len A int32 tensor of shape (batch_size,) containing number
+   *                of predicted tokens so far
+   * @param encoder_out A float32 tensor of shape (batch_size, T, dim)
+   * @param states A list of previous states
+   *
+   * @returns Return a pair:
+   *          - logits, a float32 tensor of shape (batch_size, 1, dim)
+   *          - states, a list of new states
+   */
+  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardCachedDecoder(
+      Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out,
+      std::vector<Ort::Value> states) const;
+
+  /** Return an allocator for allocating memory
+   */
+  OrtAllocator *Allocator() const;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_
diff --git a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc
index 2d790954b6..18db415b4c 100644
--- a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc
@@ -4,6 +4,15 @@
 
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -23,8 +32,8 @@ class OfflineNemoEncDecCtcModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -32,7 +41,6 @@ class OfflineNemoEncDecCtcModel::Impl {
     auto buf = ReadFile(mgr, config_.nemo_ctc.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   Ort::Value features_length) {
@@ -68,10 +76,12 @@ class OfflineNemoEncDecCtcModel::Impl {
 
   int32_t SubsamplingFactor() const { return subsampling_factor_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   std::string FeatureNormalizationMethod() const { return normalize_type_; }
 
+  bool IsGigaAM() const { return is_giga_am_; }
+
  private:
   void Init(void *model_data, size_t model_data_length) {
     sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
@@ -86,13 +96,19 @@ class OfflineNemoEncDecCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
     SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
     SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
-    SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type");
+    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
+                                               "normalize_type");
+    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(is_giga_am_, "is_giga_am", 0);
   }
 
  private:
@@ -112,17 +128,20 @@ class OfflineNemoEncDecCtcModel::Impl {
   int32_t vocab_size_ = 0;
   int32_t subsampling_factor_ = 0;
   std::string normalize_type_;
+
+  // it is 1 for models from
+  // https://github.com/salute-developers/GigaAM
+  int32_t is_giga_am_ = 0;
 };
 
 OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
     const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
-    AAssetManager *mgr, const OfflineModelConfig &config)
+    Manager *mgr, const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineNemoEncDecCtcModel::~OfflineNemoEncDecCtcModel() = default;
 
@@ -146,4 +165,16 @@ std::string OfflineNemoEncDecCtcModel::FeatureNormalizationMethod() const {
   return impl_->FeatureNormalizationMethod();
 }
 
+bool OfflineNemoEncDecCtcModel::IsGigaAM() const { return impl_->IsGigaAM(); }
+
+#if __ANDROID_API__ >= 9
+template OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
index 6e1ba5855e..08ff068c1b 100644
--- a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -29,10 +24,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
  public:
   explicit OfflineNemoEncDecCtcModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineNemoEncDecCtcModel(AAssetManager *mgr,
-                            const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineNemoEncDecCtcModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineNemoEncDecCtcModel() override;
 
@@ -76,6 +69,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
   // for details
   std::string FeatureNormalizationMethod() const override;
 
+  bool IsGigaAM() const override;
+
  private:
   class Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/sherpa-onnx/csrc/offline-paraformer-model.cc b/sherpa-onnx/csrc/offline-paraformer-model.cc
index ce1851062a..c8c65c8c9e 100644
--- a/sherpa-onnx/csrc/offline-paraformer-model.cc
+++ b/sherpa-onnx/csrc/offline-paraformer-model.cc
@@ -8,6 +8,15 @@
 #include <string>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -26,8 +35,8 @@ class OfflineParaformerModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -35,7 +44,6 @@ class OfflineParaformerModel::Impl {
     auto buf = ReadFile(mgr, config_.paraformer.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   Ort::Value features_length) {
@@ -56,7 +64,7 @@ class OfflineParaformerModel::Impl {
 
   const std::vector<float> &InverseStdDev() const { return inv_stddev_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -72,7 +80,11 @@ class OfflineParaformerModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -109,11 +121,10 @@ class OfflineParaformerModel::Impl {
 OfflineParaformerModel::OfflineParaformerModel(const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineParaformerModel::OfflineParaformerModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineParaformerModel::OfflineParaformerModel(Manager *mgr,
                                                const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineParaformerModel::~OfflineParaformerModel() = default;
 
@@ -141,4 +152,14 @@ OrtAllocator *OfflineParaformerModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineParaformerModel::OfflineParaformerModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineParaformerModel::OfflineParaformerModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-paraformer-model.h b/sherpa-onnx/csrc/offline-paraformer-model.h
index d5c2329f62..219f80796c 100644
--- a/sherpa-onnx/csrc/offline-paraformer-model.h
+++ b/sherpa-onnx/csrc/offline-paraformer-model.h
@@ -7,11 +7,6 @@
 #include <memory>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-model-config.h"
 
@@ -21,9 +16,8 @@ class OfflineParaformerModel {
  public:
   explicit OfflineParaformerModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineParaformerModel(AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineParaformerModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineParaformerModel();
 
diff --git a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
index 7bbe6938c0..3dca0dfcc8 100644
--- a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
@@ -12,11 +12,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-ctc-decoder.h"
 #include "sherpa-onnx/csrc/offline-ctc-fst-decoder.h"
 #include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"
@@ -46,7 +41,7 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
     text.append(sym);
 
     if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
-      // for byte bpe models
+      // for bpe models with byte_fallback
       // (but don't rewrite printable characters 0x20..0x7e,
       //  which collide with standard BPE units)
       std::ostringstream os;
@@ -57,6 +52,11 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
 
     r.tokens.push_back(std::move(sym));
   }
+
+  if (sym_table.IsByteBpe()) {
+    text = sym_table.DecodeByteBpe(text);
+  }
+
   r.text = std::move(text);
 
   float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
@@ -80,16 +80,14 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
     Init();
   }
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizerCtcImpl(AAssetManager *mgr,
-                           const OfflineRecognizerConfig &config)
+  template <typename Manager>
+  OfflineRecognizerCtcImpl(Manager *mgr, const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
         symbol_table_(mgr, config_.model_config.tokens),
         model_(OfflineCtcModel::Create(mgr, config_.model_config)) {
     Init();
   }
-#endif
 
   void Init() {
     if (!config_.model_config.telespeech_ctc.empty()) {
@@ -104,11 +102,20 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
     }
 
     if (!config_.model_config.nemo_ctc.model.empty()) {
-      config_.feat_config.low_freq = 0;
-      config_.feat_config.high_freq = 0;
-      config_.feat_config.is_librosa = true;
-      config_.feat_config.remove_dc_offset = false;
-      config_.feat_config.window_type = "hann";
+      if (model_->IsGigaAM()) {
+        config_.feat_config.low_freq = 0;
+        config_.feat_config.high_freq = 8000;
+        config_.feat_config.remove_dc_offset = false;
+        config_.feat_config.preemph_coeff = 0;
+        config_.feat_config.window_type = "hann";
+        config_.feat_config.feature_dim = 64;
+      } else {
+        config_.feat_config.low_freq = 0;
+        config_.feat_config.high_freq = 0;
+        config_.feat_config.is_librosa = true;
+        config_.feat_config.remove_dc_offset = false;
+        config_.feat_config.window_type = "hann";
+      }
     }
 
     if (!config_.model_config.wenet_ctc.model.empty()) {
diff --git a/sherpa-onnx/csrc/offline-recognizer-impl.cc b/sherpa-onnx/csrc/offline-recognizer-impl.cc
index 5062968cca..1867bf39be 100644
--- a/sherpa-onnx/csrc/offline-recognizer-impl.cc
+++ b/sherpa-onnx/csrc/offline-recognizer-impl.cc
@@ -5,21 +5,26 @@
 #include "sherpa-onnx/csrc/offline-recognizer-impl.h"
 
 #include <string>
+#include <strstream>
 #include <utility>
 #include <vector>
 
 #if __ANDROID_API__ >= 9
-#include <strstream>
 
 #include "android/asset_manager.h"
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "fst/extensions/far/far.h"
 #include "kaldifst/csrc/kaldi-fst-io.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-recognizer-ctc-impl.h"
+#include "sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h"
 #include "sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h"
 #include "sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h"
 #include "sherpa-onnx/csrc/offline-recognizer-transducer-impl.h"
@@ -51,6 +56,10 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
     return std::make_unique<OfflineRecognizerWhisperImpl>(config);
   }
 
+  if (!config.model_config.moonshine.preprocessor.empty()) {
+    return std::make_unique<OfflineRecognizerMoonshineImpl>(config);
+  }
+
   // TODO(fangjun): Refactor it. We only need to use model type for the
   // following models:
   //  1. transducer and nemo_transducer
@@ -67,7 +76,11 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
                model_type == "telespeech_ctc") {
       return std::make_unique<OfflineRecognizerCtcImpl>(config);
     } else if (model_type == "whisper") {
+      // unreachable
       return std::make_unique<OfflineRecognizerWhisperImpl>(config);
+    } else if (model_type == "moonshine") {
+      // unreachable
+      return std::make_unique<OfflineRecognizerMoonshineImpl>(config);
     } else {
       SHERPA_ONNX_LOGE(
           "Invalid model_type: %s. Trying to load the model to get its type",
@@ -112,9 +125,9 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
 
-  auto model_type_ptr =
-      meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
-  if (!model_type_ptr) {
+  auto model_type =
+      LookupCustomModelMetaData(meta_data, "model_type", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n\n"
         "Please refer to the following URLs to add metadata"
@@ -155,7 +168,6 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
         "\n");
     exit(-1);
   }
-  std::string model_type(model_type_ptr.get());
 
   if (model_type == "conformer" || model_type == "zipformer" ||
       model_type == "zipformer2") {
@@ -166,13 +178,14 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
     return std::make_unique<OfflineRecognizerParaformerImpl>(config);
   }
 
-  if (model_type == "EncDecHybridRNNTCTCBPEModel" &&
+  if ((model_type == "EncDecHybridRNNTCTCBPEModel" ||
+       model_type == "EncDecRNNTBPEModel") &&
       !config.model_config.transducer.decoder_filename.empty() &&
       !config.model_config.transducer.joiner_filename.empty()) {
     return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(config);
   }
 
-  if (model_type == "EncDecCTCModelBPE" ||
+  if (model_type == "EncDecCTCModelBPE" || model_type == "EncDecCTCModel" ||
       model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
       model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
       model_type == "telespeech_ctc") {
@@ -189,7 +202,9 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
       " - Non-streaming transducer models from icefall\n"
       " - Non-streaming Paraformer models from FunASR\n"
       " - EncDecCTCModelBPE models from NeMo\n"
+      " - EncDecCTCModel models from NeMo\n"
       " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
+      " - EncDecRNNTBPEModel models from NeMO"
       " - Whisper models\n"
       " - Tdnn models\n"
       " - Zipformer CTC models\n"
@@ -200,9 +215,9 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
   exit(-1);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
-    AAssetManager *mgr, const OfflineRecognizerConfig &config) {
+    Manager *mgr, const OfflineRecognizerConfig &config) {
   if (!config.model_config.sense_voice.model.empty()) {
     return std::make_unique<OfflineRecognizerSenseVoiceImpl>(mgr, config);
   }
@@ -222,6 +237,10 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
     return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
   }
 
+  if (!config.model_config.moonshine.preprocessor.empty()) {
+    return std::make_unique<OfflineRecognizerMoonshineImpl>(mgr, config);
+  }
+
   // TODO(fangjun): Refactor it. We only need to use model type for the
   // following models:
   //  1. transducer and nemo_transducer
@@ -239,6 +258,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
       return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
     } else if (model_type == "whisper") {
       return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
+    } else if (model_type == "moonshine") {
+      return std::make_unique<OfflineRecognizerMoonshineImpl>(mgr, config);
     } else {
       SHERPA_ONNX_LOGE(
           "Invalid model_type: %s. Trying to load the model to get its type",
@@ -283,9 +304,9 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
 
-  auto model_type_ptr =
-      meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
-  if (!model_type_ptr) {
+  auto model_type =
+      LookupCustomModelMetaData(meta_data, "model_type", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n\n"
         "Please refer to the following URLs to add metadata"
@@ -326,7 +347,6 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
         "\n");
     exit(-1);
   }
-  std::string model_type(model_type_ptr.get());
 
   if (model_type == "conformer" || model_type == "zipformer" ||
       model_type == "zipformer2") {
@@ -337,13 +357,14 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
     return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
   }
 
-  if (model_type == "EncDecHybridRNNTCTCBPEModel" &&
+  if ((model_type == "EncDecHybridRNNTCTCBPEModel" ||
+       model_type == "EncDecRNNTBPEModel") &&
       !config.model_config.transducer.decoder_filename.empty() &&
       !config.model_config.transducer.joiner_filename.empty()) {
     return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(mgr, config);
   }
 
-  if (model_type == "EncDecCTCModelBPE" ||
+  if (model_type == "EncDecCTCModelBPE" || model_type == "EncDecCTCModel" ||
       model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
       model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
       model_type == "telespeech_ctc") {
@@ -360,7 +381,9 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
       " - Non-streaming transducer models from icefall\n"
       " - Non-streaming Paraformer models from FunASR\n"
       " - EncDecCTCModelBPE models from NeMo\n"
+      " - EncDecCTCModel models from NeMo\n"
       " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
+      " - EncDecRNNTBPEModel models from NeMo\n"
       " - Whisper models\n"
       " - Tdnn models\n"
       " - Zipformer CTC models\n"
@@ -370,7 +393,6 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
 
   exit(-1);
 }
-#endif
 
 OfflineRecognizerImpl::OfflineRecognizerImpl(
     const OfflineRecognizerConfig &config)
@@ -417,9 +439,9 @@ OfflineRecognizerImpl::OfflineRecognizerImpl(
   }
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineRecognizerImpl::OfflineRecognizerImpl(
-    AAssetManager *mgr, const OfflineRecognizerConfig &config)
+    Manager *mgr, const OfflineRecognizerConfig &config)
     : config_(config) {
   if (!config.rule_fsts.empty()) {
     std::vector<std::string> files;
@@ -463,10 +485,11 @@ OfflineRecognizerImpl::OfflineRecognizerImpl(
     }    // for (const auto &f : files)
   }      // if (!config.rule_fars.empty())
 }
-#endif
 
 std::string OfflineRecognizerImpl::ApplyInverseTextNormalization(
     std::string text) const {
+  text = RemoveInvalidUtf8Sequences(text);
+
   if (!itn_list_.empty()) {
     for (const auto &tn : itn_list_) {
       text = tn->Normalize(text);
@@ -480,4 +503,19 @@ void OfflineRecognizerImpl::SetConfig(const OfflineRecognizerConfig &config) {
   config_ = config;
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineRecognizerImpl::OfflineRecognizerImpl(
+    AAssetManager *mgr, const OfflineRecognizerConfig &config);
+
+template std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
+    AAssetManager *mgr, const OfflineRecognizerConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineRecognizerImpl::OfflineRecognizerImpl(
+    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
+template std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
+    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-recognizer-impl.h b/sherpa-onnx/csrc/offline-recognizer-impl.h
index 32010bf70f..8a6e6fcceb 100644
--- a/sherpa-onnx/csrc/offline-recognizer-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-impl.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "kaldifst/csrc/text-normalizer.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
@@ -28,13 +23,12 @@ class OfflineRecognizerImpl {
   static std::unique_ptr<OfflineRecognizerImpl> Create(
       const OfflineRecognizerConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizerImpl(AAssetManager *mgr,
-                        const OfflineRecognizerConfig &config);
+  template <typename Manager>
+  OfflineRecognizerImpl(Manager *mgr, const OfflineRecognizerConfig &config);
 
+  template <typename Manager>
   static std::unique_ptr<OfflineRecognizerImpl> Create(
-      AAssetManager *mgr, const OfflineRecognizerConfig &config);
-#endif
+      Manager *mgr, const OfflineRecognizerConfig &config);
 
   virtual ~OfflineRecognizerImpl() = default;
 
diff --git a/sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h b/sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h
new file mode 100644
index 0000000000..deec9852dc
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h
@@ -0,0 +1,143 @@
+// sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "sherpa-onnx/csrc/offline-model-config.h"
+#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
+#include "sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h"
+#include "sherpa-onnx/csrc/offline-moonshine-model.h"
+#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
+#include "sherpa-onnx/csrc/offline-recognizer.h"
+#include "sherpa-onnx/csrc/symbol-table.h"
+#include "sherpa-onnx/csrc/transpose.h"
+
+namespace sherpa_onnx {
+
+static OfflineRecognitionResult Convert(
+    const OfflineMoonshineDecoderResult &src, const SymbolTable &sym_table) {
+  OfflineRecognitionResult r;
+  r.tokens.reserve(src.tokens.size());
+
+  std::string text;
+  for (auto i : src.tokens) {
+    if (!sym_table.Contains(i)) {
+      continue;
+    }
+
+    const auto &s = sym_table[i];
+    text += s;
+    r.tokens.push_back(s);
+  }
+
+  r.text = text;
+
+  return r;
+}
+
+class OfflineRecognizerMoonshineImpl : public OfflineRecognizerImpl {
+ public:
+  explicit OfflineRecognizerMoonshineImpl(const OfflineRecognizerConfig &config)
+      : OfflineRecognizerImpl(config),
+        config_(config),
+        symbol_table_(config_.model_config.tokens),
+        model_(std::make_unique<OfflineMoonshineModel>(config.model_config)) {
+    Init();
+  }
+
+  template <typename Manager>
+  OfflineRecognizerMoonshineImpl(Manager *mgr,
+                                 const OfflineRecognizerConfig &config)
+      : OfflineRecognizerImpl(mgr, config),
+        config_(config),
+        symbol_table_(mgr, config_.model_config.tokens),
+        model_(
+            std::make_unique<OfflineMoonshineModel>(mgr, config.model_config)) {
+    Init();
+  }
+
+  void Init() {
+    if (config_.decoding_method == "greedy_search") {
+      decoder_ =
+          std::make_unique<OfflineMoonshineGreedySearchDecoder>(model_.get());
+    } else {
+      SHERPA_ONNX_LOGE(
+          "Only greedy_search is supported at present for moonshine. Given %s",
+          config_.decoding_method.c_str());
+      exit(-1);
+    }
+  }
+
+  std::unique_ptr<OfflineStream> CreateStream() const override {
+    MoonshineTag tag;
+    return std::make_unique<OfflineStream>(tag);
+  }
+
+  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
+    // batch decoding is not implemented yet
+    for (int32_t i = 0; i != n; ++i) {
+      DecodeStream(ss[i]);
+    }
+  }
+
+  OfflineRecognizerConfig GetConfig() const override { return config_; }
+
+ private:
+  void DecodeStream(OfflineStream *s) const {
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::vector<float> audio = s->GetFrames();
+
+    try {
+      std::array<int64_t, 2> shape{1, static_cast<int64_t>(audio.size())};
+
+      Ort::Value audio_tensor = Ort::Value::CreateTensor(
+          memory_info, audio.data(), audio.size(), shape.data(), shape.size());
+
+      Ort::Value features =
+          model_->ForwardPreprocessor(std::move(audio_tensor));
+
+      int32_t features_len = features.GetTensorTypeAndShapeInfo().GetShape()[1];
+
+      int64_t features_shape = 1;
+
+      Ort::Value features_len_tensor = Ort::Value::CreateTensor(
+          memory_info, &features_len, 1, &features_shape, 1);
+
+      Ort::Value encoder_out = model_->ForwardEncoder(
+          std::move(features), std::move(features_len_tensor));
+
+      auto results = decoder_->Decode(std::move(encoder_out));
+
+      auto r = Convert(results[0], symbol_table_);
+      r.text = ApplyInverseTextNormalization(std::move(r.text));
+      s->SetResult(r);
+    } catch (const Ort::Exception &ex) {
+      SHERPA_ONNX_LOGE(
+          "\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
+          "audio samples: %d",
+          ex.what(), static_cast<int32_t>(audio.size()));
+      return;
+    }
+  }
+
+ private:
+  OfflineRecognizerConfig config_;
+  SymbolTable symbol_table_;
+  std::unique_ptr<OfflineMoonshineModel> model_;
+  std::unique_ptr<OfflineMoonshineDecoder> decoder_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_
diff --git a/sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h b/sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h
index 525c92cc2e..5b80c99e30 100644
--- a/sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h
@@ -11,11 +11,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-model-config.h"
 #include "sherpa-onnx/csrc/offline-paraformer-decoder.h"
 #include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h"
@@ -105,8 +100,8 @@ class OfflineRecognizerParaformerImpl : public OfflineRecognizerImpl {
     InitFeatConfig();
   }
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizerParaformerImpl(AAssetManager *mgr,
+  template <typename Manager>
+  OfflineRecognizerParaformerImpl(Manager *mgr,
                                   const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
@@ -124,7 +119,6 @@ class OfflineRecognizerParaformerImpl : public OfflineRecognizerImpl {
 
     InitFeatConfig();
   }
-#endif
 
   std::unique_ptr<OfflineStream> CreateStream() const override {
     return std::make_unique<OfflineStream>(config_.feat_config);
diff --git a/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h b/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
index 6cebf23c72..7ee5e41c0b 100644
--- a/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
@@ -11,11 +11,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
 #include "sherpa-onnx/csrc/offline-recognizer-impl.h"
@@ -83,8 +78,8 @@ class OfflineRecognizerSenseVoiceImpl : public OfflineRecognizerImpl {
     InitFeatConfig();
   }
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizerSenseVoiceImpl(AAssetManager *mgr,
+  template <typename Manager>
+  OfflineRecognizerSenseVoiceImpl(Manager *mgr,
                                   const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
@@ -103,7 +98,6 @@ class OfflineRecognizerSenseVoiceImpl : public OfflineRecognizerImpl {
 
     InitFeatConfig();
   }
-#endif
 
   std::unique_ptr<OfflineStream> CreateStream() const override {
     return std::make_unique<OfflineStream>(config_.feat_config);
diff --git a/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h b/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
index 05759ac5ba..158be56222 100644
--- a/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
@@ -14,11 +14,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/context-graph.h"
 #include "sherpa-onnx/csrc/log.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -48,7 +43,7 @@ static OfflineRecognitionResult Convert(
     text.append(sym);
 
     if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
-      // for byte bpe models,
+      // for bpe models with byte_fallback,
       // (but don't rewrite printable characters 0x20..0x7e,
       //  which collide with standard BPE units)
       std::ostringstream os;
@@ -59,6 +54,10 @@ static OfflineRecognitionResult Convert(
 
     r.tokens.push_back(std::move(sym));
   }
+  if (sym_table.IsByteBpe()) {
+    text = sym_table.DecodeByteBpe(text);
+  }
+
   r.text = std::move(text);
 
   float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
@@ -109,9 +108,9 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
     }
   }
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   explicit OfflineRecognizerTransducerImpl(
-      AAssetManager *mgr, const OfflineRecognizerConfig &config)
+      Manager *mgr, const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
         symbol_table_(mgr, config_.model_config.tokens),
@@ -148,7 +147,6 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
       exit(-1);
     }
   }
-#endif
 
   std::unique_ptr<OfflineStream> CreateStream(
       const std::string &hotwords) const override {
@@ -246,10 +244,7 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
     }
   }
 
-  OfflineRecognizerConfig GetConfig() const override {
-    return config_;
-  }
-
+  OfflineRecognizerConfig GetConfig() const override { return config_; }
 
   void InitHotwords() {
     // each line in hotwords_file contains space-separated words
@@ -271,8 +266,8 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
         hotwords_, config_.hotwords_score, boost_scores_);
   }
 
-#if __ANDROID_API__ >= 9
-  void InitHotwords(AAssetManager *mgr) {
+  template <typename Manager>
+  void InitHotwords(Manager *mgr) {
     // each line in hotwords_file contains space-separated words
 
     auto buf = ReadFile(mgr, config_.hotwords_file);
@@ -294,7 +289,6 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
     hotwords_graph_ = std::make_shared<ContextGraph>(
         hotwords_, config_.hotwords_score, boost_scores_);
   }
-#endif
 
  private:
   OfflineRecognizerConfig config_;
diff --git a/sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h b/sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h
index 2f5b9e2a24..167d1021e1 100644
--- a/sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h
@@ -14,11 +14,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-recognizer-impl.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
@@ -57,9 +52,9 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl {
     PostInit();
   }
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   explicit OfflineRecognizerTransducerNeMoImpl(
-      AAssetManager *mgr, const OfflineRecognizerConfig &config)
+      Manager *mgr, const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
         symbol_table_(mgr, config_.model_config.tokens),
@@ -76,7 +71,6 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl {
 
     PostInit();
   }
-#endif
 
   std::unique_ptr<OfflineStream> CreateStream() const override {
     return std::make_unique<OfflineStream>(config_.feat_config);
@@ -139,23 +133,29 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl {
     }
   }
 
-  OfflineRecognizerConfig GetConfig() const override {
-    return config_;
-  }
+  OfflineRecognizerConfig GetConfig() const override { return config_; }
 
  private:
   void PostInit() {
     config_.feat_config.nemo_normalize_type =
         model_->FeatureNormalizationMethod();
 
-    config_.feat_config.low_freq = 0;
-    // config_.feat_config.high_freq = 8000;
-    config_.feat_config.is_librosa = true;
-    config_.feat_config.remove_dc_offset = false;
-    // config_.feat_config.window_type = "hann";
     config_.feat_config.dither = 0;
-    config_.feat_config.nemo_normalize_type =
-        model_->FeatureNormalizationMethod();
+
+    if (model_->IsGigaAM()) {
+      config_.feat_config.low_freq = 0;
+      config_.feat_config.high_freq = 8000;
+      config_.feat_config.remove_dc_offset = false;
+      config_.feat_config.preemph_coeff = 0;
+      config_.feat_config.window_type = "hann";
+      config_.feat_config.feature_dim = 64;
+    } else {
+      config_.feat_config.low_freq = 0;
+      // config_.feat_config.high_freq = 8000;
+      config_.feat_config.is_librosa = true;
+      config_.feat_config.remove_dc_offset = false;
+      // config_.feat_config.window_type = "hann";
+    }
 
     int32_t vocab_size = model_->VocabSize();
 
diff --git a/sherpa-onnx/csrc/offline-recognizer-whisper-impl.h b/sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
index 023700e77f..fc6cc74ed8 100644
--- a/sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
+++ b/sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
@@ -12,11 +12,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-model-config.h"
 #include "sherpa-onnx/csrc/offline-recognizer-impl.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
@@ -60,8 +55,8 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
     Init();
   }
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizerWhisperImpl(AAssetManager *mgr,
+  template <typename Manager>
+  OfflineRecognizerWhisperImpl(Manager *mgr,
                                const OfflineRecognizerConfig &config)
       : OfflineRecognizerImpl(mgr, config),
         config_(config),
@@ -71,8 +66,6 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
     Init();
   }
 
-#endif
-
   void Init() {
     // tokens.txt from whisper is base64 encoded, so we need to decode it
     symbol_table_.ApplyBase64Decode();
@@ -105,9 +98,7 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
     config_.model_config.whisper = config.model_config.whisper;
   }
 
-  OfflineRecognizerConfig GetConfig() const override {
-    return config_;
-  }
+  OfflineRecognizerConfig GetConfig() const override { return config_; }
 
  private:
   void DecodeStream(OfflineStream *s) const {
diff --git a/sherpa-onnx/csrc/offline-recognizer.cc b/sherpa-onnx/csrc/offline-recognizer.cc
index f73e35ad61..80bc209003 100644
--- a/sherpa-onnx/csrc/offline-recognizer.cc
+++ b/sherpa-onnx/csrc/offline-recognizer.cc
@@ -6,11 +6,21 @@
 
 #include <memory>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-lm-config.h"
 #include "sherpa-onnx/csrc/offline-recognizer-impl.h"
 #include "sherpa-onnx/csrc/text-utils.h"
+
 namespace sherpa_onnx {
 
 void OfflineRecognizerConfig::Register(ParseOptions *po) {
@@ -132,11 +142,10 @@ std::string OfflineRecognizerConfig::ToString() const {
   return os.str();
 }
 
-#if __ANDROID_API__ >= 9
-OfflineRecognizer::OfflineRecognizer(AAssetManager *mgr,
+template <typename Manager>
+OfflineRecognizer::OfflineRecognizer(Manager *mgr,
                                      const OfflineRecognizerConfig &config)
     : impl_(OfflineRecognizerImpl::Create(mgr, config)) {}
-#endif
 
 OfflineRecognizer::OfflineRecognizer(const OfflineRecognizerConfig &config)
     : impl_(OfflineRecognizerImpl::Create(config)) {}
@@ -157,11 +166,21 @@ void OfflineRecognizer::DecodeStreams(OfflineStream **ss, int32_t n) const {
 }
 
 void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) {
-    impl_->SetConfig(config);
+  impl_->SetConfig(config);
 }
 
 OfflineRecognizerConfig OfflineRecognizer::GetConfig() const {
   return impl_->GetConfig();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineRecognizer::OfflineRecognizer(
+    AAssetManager *mgr, const OfflineRecognizerConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineRecognizer::OfflineRecognizer(
+    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-recognizer.h b/sherpa-onnx/csrc/offline-recognizer.h
index 8f0b47a083..3c78ea9b6d 100644
--- a/sherpa-onnx/csrc/offline-recognizer.h
+++ b/sherpa-onnx/csrc/offline-recognizer.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/features.h"
 #include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"
 #include "sherpa-onnx/csrc/offline-lm-config.h"
@@ -82,9 +77,8 @@ class OfflineRecognizer {
  public:
   ~OfflineRecognizer();
 
-#if __ANDROID_API__ >= 9
-  OfflineRecognizer(AAssetManager *mgr, const OfflineRecognizerConfig &config);
-#endif
+  template <typename Manager>
+  OfflineRecognizer(Manager *mgr, const OfflineRecognizerConfig &config);
 
   explicit OfflineRecognizer(const OfflineRecognizerConfig &config);
 
@@ -120,10 +114,10 @@ class OfflineRecognizer {
   void DecodeStreams(OfflineStream **ss, int32_t n) const;
 
   /** Onnxruntime Session objects are not affected by this method.
-  * The exact behavior can be defined by a specific recognizer impl.
-  * For instance, for the whisper recognizer, you can retrieve the language and task from
-  * the config and ignore any remaining fields in `config`.
-  */
+   * The exact behavior can be defined by a specific recognizer impl.
+   * For instance, for the whisper recognizer, you can retrieve the language and
+   * task from the config and ignore any remaining fields in `config`.
+   */
   void SetConfig(const OfflineRecognizerConfig &config);
 
   OfflineRecognizerConfig GetConfig() const;
diff --git a/sherpa-onnx/csrc/offline-rnn-lm.cc b/sherpa-onnx/csrc/offline-rnn-lm.cc
index 70cd0ad47c..665b775ba0 100644
--- a/sherpa-onnx/csrc/offline-rnn-lm.cc
+++ b/sherpa-onnx/csrc/offline-rnn-lm.cc
@@ -8,6 +8,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -27,8 +36,8 @@ class OfflineRnnLM::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineLMConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineLMConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_{GetSessionOptions(config)},
@@ -36,7 +45,6 @@ class OfflineRnnLM::Impl {
     auto buf = ReadFile(mgr, config_.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   Ort::Value Rescore(Ort::Value x, Ort::Value x_lens) {
     std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
@@ -76,10 +84,9 @@ class OfflineRnnLM::Impl {
 OfflineRnnLM::OfflineRnnLM(const OfflineLMConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineRnnLM::OfflineRnnLM(AAssetManager *mgr, const OfflineLMConfig &config)
+template <typename Manager>
+OfflineRnnLM::OfflineRnnLM(Manager *mgr, const OfflineLMConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineRnnLM::~OfflineRnnLM() = default;
 
@@ -87,4 +94,14 @@ Ort::Value OfflineRnnLM::Rescore(Ort::Value x, Ort::Value x_lens) {
   return impl_->Rescore(std::move(x), std::move(x_lens));
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineRnnLM::OfflineRnnLM(AAssetManager *mgr,
+                                    const OfflineLMConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineRnnLM::OfflineRnnLM(NativeResourceManager *mgr,
+                                    const OfflineLMConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-rnn-lm.h b/sherpa-onnx/csrc/offline-rnn-lm.h
index f4dd904f7b..2d16a19e9e 100644
--- a/sherpa-onnx/csrc/offline-rnn-lm.h
+++ b/sherpa-onnx/csrc/offline-rnn-lm.h
@@ -7,11 +7,6 @@
 
 #include <memory>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-lm-config.h"
 #include "sherpa-onnx/csrc/offline-lm.h"
@@ -24,9 +19,8 @@ class OfflineRnnLM : public OfflineLM {
 
   explicit OfflineRnnLM(const OfflineLMConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineRnnLM(AAssetManager *mgr, const OfflineLMConfig &config);
-#endif
+  template <typename Manager>
+  OfflineRnnLM(Manager *mgr, const OfflineLMConfig &config);
 
   /** Rescore a batch of sentences.
    *
diff --git a/sherpa-onnx/csrc/offline-sense-voice-model.cc b/sherpa-onnx/csrc/offline-sense-voice-model.cc
index 1d2a14ef5f..04e7cd224c 100644
--- a/sherpa-onnx/csrc/offline-sense-voice-model.cc
+++ b/sherpa-onnx/csrc/offline-sense-voice-model.cc
@@ -8,7 +8,17 @@
 #include <string>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
 #include "sherpa-onnx/csrc/text-utils.h"
 
@@ -25,8 +35,8 @@ class OfflineSenseVoiceModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -34,7 +44,6 @@ class OfflineSenseVoiceModel::Impl {
     auto buf = ReadFile(mgr, config_.sense_voice.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   Ort::Value Forward(Ort::Value features, Ort::Value features_length,
                      Ort::Value language, Ort::Value text_norm) {
@@ -55,7 +64,7 @@ class OfflineSenseVoiceModel::Impl {
     return meta_data_;
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -71,7 +80,11 @@ class OfflineSenseVoiceModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -128,11 +141,10 @@ class OfflineSenseVoiceModel::Impl {
 OfflineSenseVoiceModel::OfflineSenseVoiceModel(const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineSenseVoiceModel::OfflineSenseVoiceModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineSenseVoiceModel::OfflineSenseVoiceModel(Manager *mgr,
                                                const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineSenseVoiceModel::~OfflineSenseVoiceModel() = default;
 
@@ -153,4 +165,14 @@ OrtAllocator *OfflineSenseVoiceModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineSenseVoiceModel::OfflineSenseVoiceModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineSenseVoiceModel::OfflineSenseVoiceModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-sense-voice-model.h b/sherpa-onnx/csrc/offline-sense-voice-model.h
index 29d31b286c..e82680c56d 100644
--- a/sherpa-onnx/csrc/offline-sense-voice-model.h
+++ b/sherpa-onnx/csrc/offline-sense-voice-model.h
@@ -7,11 +7,6 @@
 #include <memory>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-model-config.h"
 #include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"
@@ -22,9 +17,8 @@ class OfflineSenseVoiceModel {
  public:
   explicit OfflineSenseVoiceModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineSenseVoiceModel(AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineSenseVoiceModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineSenseVoiceModel();
 
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc b/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc
new file mode 100644
index 0000000000..c0af5c7c49
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc
@@ -0,0 +1,60 @@
+// sherpa-onnx/csrc/offline-speaker-diarization-impl.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
+
+#include <memory>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h"
+
+namespace sherpa_onnx {
+
+std::unique_ptr<OfflineSpeakerDiarizationImpl>
+OfflineSpeakerDiarizationImpl::Create(
+    const OfflineSpeakerDiarizationConfig &config) {
+  if (!config.segmentation.pyannote.model.empty()) {
+    return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(config);
+  }
+
+  SHERPA_ONNX_LOGE("Please specify a speaker segmentation model.");
+
+  return nullptr;
+}
+
+template <typename Manager>
+std::unique_ptr<OfflineSpeakerDiarizationImpl>
+OfflineSpeakerDiarizationImpl::Create(
+    Manager *mgr, const OfflineSpeakerDiarizationConfig &config) {
+  if (!config.segmentation.pyannote.model.empty()) {
+    return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(mgr, config);
+  }
+
+  SHERPA_ONNX_LOGE("Please specify a speaker segmentation model.");
+
+  return nullptr;
+}
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OfflineSpeakerDiarizationImpl>
+OfflineSpeakerDiarizationImpl::Create(
+    AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OfflineSpeakerDiarizationImpl>
+OfflineSpeakerDiarizationImpl::Create(
+    NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-impl.h
new file mode 100644
index 0000000000..d2cbdebd26
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization-impl.h
@@ -0,0 +1,39 @@
+// sherpa-onnx/csrc/offline-speaker-diarization-impl.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_
+
+#include <functional>
+#include <memory>
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+namespace sherpa_onnx {
+
+class OfflineSpeakerDiarizationImpl {
+ public:
+  static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
+      const OfflineSpeakerDiarizationConfig &config);
+
+  template <typename Manager>
+  static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
+      Manager *mgr, const OfflineSpeakerDiarizationConfig &config);
+
+  virtual ~OfflineSpeakerDiarizationImpl() = default;
+
+  virtual int32_t SampleRate() const = 0;
+
+  // Note: Only config.clustering is used. All other fields in config are
+  // ignored
+  virtual void SetConfig(const OfflineSpeakerDiarizationConfig &config) = 0;
+
+  virtual OfflineSpeakerDiarizationResult Process(
+      const float *audio, int32_t n,
+      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
+      void *callback_arg = nullptr) const = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
new file mode 100644
index 0000000000..e8228d47bf
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
@@ -0,0 +1,743 @@
+// sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "sherpa-onnx/csrc/fast-clustering.h"
+#include "sherpa-onnx/csrc/math.h"
+#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h"
+#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
+
+namespace sherpa_onnx {
+
+namespace {  // NOLINT
+
+// copied from https://github.com/k2-fsa/k2/blob/master/k2/csrc/host/util.h#L41
+template <class T>
+inline void hash_combine(std::size_t *seed, const T &v) {  // NOLINT
+  std::hash<T> hasher;
+  *seed ^= hasher(v) + 0x9e3779b9 + ((*seed) << 6) + ((*seed) >> 2);  // NOLINT
+}
+
+// copied from https://github.com/k2-fsa/k2/blob/master/k2/csrc/host/util.h#L47
+struct PairHash {
+  template <class T1, class T2>
+  std::size_t operator()(const std::pair<T1, T2> &pair) const {
+    std::size_t result = 0;
+    hash_combine(&result, pair.first);
+    hash_combine(&result, pair.second);
+    return result;
+  }
+};
+}  // namespace
+
+using Matrix2D =
+    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+using Matrix2DInt32 =
+    Eigen::Matrix<int32_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+
+using FloatRowVector = Eigen::Matrix<float, 1, Eigen::Dynamic>;
+using Int32RowVector = Eigen::Matrix<int32_t, 1, Eigen::Dynamic>;
+
+using Int32Pair = std::pair<int32_t, int32_t>;
+
+class OfflineSpeakerDiarizationPyannoteImpl
+    : public OfflineSpeakerDiarizationImpl {
+ public:
+  ~OfflineSpeakerDiarizationPyannoteImpl() override = default;
+
+  explicit OfflineSpeakerDiarizationPyannoteImpl(
+      const OfflineSpeakerDiarizationConfig &config)
+      : config_(config),
+        segmentation_model_(config_.segmentation),
+        embedding_extractor_(config_.embedding),
+        clustering_(std::make_unique<FastClustering>(config_.clustering)) {
+    Init();
+  }
+
+  template <typename Manager>
+  OfflineSpeakerDiarizationPyannoteImpl(
+      Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
+      : config_(config),
+        segmentation_model_(mgr, config_.segmentation),
+        embedding_extractor_(mgr, config_.embedding),
+        clustering_(std::make_unique<FastClustering>(config_.clustering)) {
+    Init();
+  }
+
+  int32_t SampleRate() const override {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+
+    return meta_data.sample_rate;
+  }
+
+  void SetConfig(const OfflineSpeakerDiarizationConfig &config) override {
+    if (!config.clustering.Validate()) {
+      SHERPA_ONNX_LOGE("Invalid clustering config. Skip it");
+      return;
+    }
+    clustering_ = std::make_unique<FastClustering>(config.clustering);
+    config_.clustering = config.clustering;
+  }
+
+  OfflineSpeakerDiarizationResult Process(
+      const float *audio, int32_t n,
+      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
+      void *callback_arg = nullptr) const override {
+    std::vector<Matrix2D> segmentations = RunSpeakerSegmentationModel(audio, n);
+    // segmentations[i] is for chunk_i
+    // Each matrix is of shape (num_frames, num_powerset_classes)
+    if (segmentations.empty()) {
+      return {};
+    }
+
+    std::vector<Matrix2DInt32> labels;
+    labels.reserve(segmentations.size());
+
+    for (const auto &m : segmentations) {
+      labels.push_back(ToMultiLabel(m));
+    }
+
+    segmentations.clear();
+
+    if (labels.size() == 1) {
+      if (callback) {
+        callback(1, 1, callback_arg);
+      }
+
+      return HandleOneChunkSpecialCase(labels[0], n);
+    }
+
+    // labels[i] is a 0-1 matrix of shape (num_frames, num_speakers)
+
+    // speaker count per frame
+    Int32RowVector speakers_per_frame = ComputeSpeakersPerFrame(labels);
+
+    if (speakers_per_frame.maxCoeff() == 0) {
+      SHERPA_ONNX_LOGE("No speakers found in the audio samples");
+      return {};
+    }
+
+    auto chunk_speaker_samples_list_pair = GetChunkSpeakerSampleIndexes(labels);
+
+    // The embedding model may output NaN. valid_indexes contains indexes
+    // in chunk_speaker_samples_list_pair.second that don't lead to
+    // NaN embeddings.
+    std::vector<int32_t> valid_indexes;
+    valid_indexes.reserve(chunk_speaker_samples_list_pair.second.size());
+
+    Matrix2D embeddings =
+        ComputeEmbeddings(audio, n, chunk_speaker_samples_list_pair.second,
+                          &valid_indexes, std::move(callback), callback_arg);
+
+    if (valid_indexes.size() != chunk_speaker_samples_list_pair.second.size()) {
+      std::vector<Int32Pair> chunk_speaker_pair;
+      std::vector<std::vector<Int32Pair>> sample_indexes;
+
+      chunk_speaker_pair.reserve(valid_indexes.size());
+      sample_indexes.reserve(valid_indexes.size());
+      for (auto i : valid_indexes) {
+        chunk_speaker_pair.push_back(chunk_speaker_samples_list_pair.first[i]);
+        sample_indexes.push_back(
+            std::move(chunk_speaker_samples_list_pair.second[i]));
+      }
+
+      chunk_speaker_samples_list_pair.first = std::move(chunk_speaker_pair);
+      chunk_speaker_samples_list_pair.second = std::move(sample_indexes);
+    }
+
+    std::vector<int32_t> cluster_labels = clustering_->Cluster(
+        &embeddings(0, 0), embeddings.rows(), embeddings.cols());
+
+    int32_t max_cluster_index =
+        *std::max_element(cluster_labels.begin(), cluster_labels.end());
+
+    auto chunk_speaker_to_cluster = ConvertChunkSpeakerToCluster(
+        chunk_speaker_samples_list_pair.first, cluster_labels);
+
+    auto new_labels =
+        ReLabel(labels, max_cluster_index, chunk_speaker_to_cluster);
+
+    Matrix2DInt32 speaker_count = ComputeSpeakerCount(new_labels, n);
+
+    Matrix2DInt32 final_labels =
+        FinalizeLabels(speaker_count, speakers_per_frame);
+
+    auto result = ComputeResult(final_labels);
+
+    return result;
+  }
+
+ private:
+  void Init() { InitPowersetMapping(); }
+
+  // see also
+  // https://github.com/pyannote/pyannote-audio/blob/develop/pyannote/audio/utils/powerset.py#L68
+  void InitPowersetMapping() {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t num_classes = meta_data.num_classes;
+    int32_t powerset_max_classes = meta_data.powerset_max_classes;
+    int32_t num_speakers = meta_data.num_speakers;
+
+    powerset_mapping_ = Matrix2DInt32(num_classes, num_speakers);
+    powerset_mapping_.setZero();
+
+    int32_t k = 1;
+    for (int32_t i = 1; i <= powerset_max_classes; ++i) {
+      if (i == 1) {
+        for (int32_t j = 0; j != num_speakers; ++j, ++k) {
+          powerset_mapping_(k, j) = 1;
+        }
+      } else if (i == 2) {
+        for (int32_t j = 0; j != num_speakers; ++j) {
+          for (int32_t m = j + 1; m < num_speakers; ++m, ++k) {
+            powerset_mapping_(k, j) = 1;
+            powerset_mapping_(k, m) = 1;
+          }
+        }
+      } else {
+#if __OHOS__
+        SHERPA_ONNX_LOGE(
+            "powerset_max_classes = %{public}d is currently not supported!", i);
+#else
+        SHERPA_ONNX_LOGE(
+            "powerset_max_classes = %d is currently not supported!", i);
+#endif
+        SHERPA_ONNX_EXIT(-1);
+      }
+    }
+  }
+
+  std::vector<Matrix2D> RunSpeakerSegmentationModel(const float *audio,
+                                                    int32_t n) const {
+    std::vector<Matrix2D> ans;
+
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+
+    if (n <= 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "number of audio samples is %{public}d (<= 0). Please provide a "
+          "positive number",
+          n);
+#else
+      SHERPA_ONNX_LOGE(
+          "number of audio samples is %d (<= 0). Please provide a positive "
+          "number",
+          n);
+#endif
+      return {};
+    }
+
+    if (n <= window_size) {
+      std::vector<float> buf(window_size);
+      // NOTE: buf is zero initialized by default
+
+      std::copy(audio, audio + n, buf.data());
+
+      Matrix2D m = ProcessChunk(buf.data());
+
+      ans.push_back(std::move(m));
+
+      return ans;
+    }
+
+    int32_t num_chunks = (n - window_size) / window_shift + 1;
+    bool has_last_chunk = ((n - window_size) % window_shift) > 0;
+
+    ans.reserve(num_chunks + has_last_chunk);
+
+    const float *p = audio;
+
+    for (int32_t i = 0; i != num_chunks; ++i, p += window_shift) {
+      Matrix2D m = ProcessChunk(p);
+
+      ans.push_back(std::move(m));
+    }
+
+    if (has_last_chunk) {
+      std::vector<float> buf(window_size);
+      std::copy(p, audio + n, buf.data());
+
+      Matrix2D m = ProcessChunk(buf.data());
+
+      ans.push_back(std::move(m));
+    }
+
+    return ans;
+  }
+
+  Matrix2D ProcessChunk(const float *p) const {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::array<int64_t, 3> shape = {1, 1, window_size};
+
+    Ort::Value x =
+        Ort::Value::CreateTensor(memory_info, const_cast<float *>(p),
+                                 window_size, shape.data(), shape.size());
+
+    Ort::Value out = segmentation_model_.Forward(std::move(x));
+    std::vector<int64_t> out_shape = out.GetTensorTypeAndShapeInfo().GetShape();
+    Matrix2D m(out_shape[1], out_shape[2]);
+    std::copy(out.GetTensorData<float>(), out.GetTensorData<float>() + m.size(),
+              &m(0, 0));
+    return m;
+  }
+
+  Matrix2DInt32 ToMultiLabel(const Matrix2D &m) const {
+    int32_t num_rows = m.rows();
+    Matrix2DInt32 ans(num_rows, powerset_mapping_.cols());
+
+    std::ptrdiff_t col_id;
+
+    for (int32_t i = 0; i != num_rows; ++i) {
+      m.row(i).maxCoeff(&col_id);
+      ans.row(i) = powerset_mapping_.row(col_id);
+    }
+
+    return ans;
+  }
+
+  // See also
+  // https://github.com/pyannote/pyannote-audio/blob/develop/pyannote/audio/pipelines/utils/diarization.py#L122
+  Int32RowVector ComputeSpeakersPerFrame(
+      const std::vector<Matrix2DInt32> &labels) const {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+    int32_t receptive_field_shift = meta_data.receptive_field_shift;
+
+    int32_t num_chunks = labels.size();
+
+    int32_t num_frames = (window_size + (num_chunks - 1) * window_shift) /
+                             receptive_field_shift +
+                         1;
+
+    FloatRowVector count(num_frames);
+    FloatRowVector weight(num_frames);
+    count.setZero();
+    weight.setZero();
+
+    for (int32_t i = 0; i != num_chunks; ++i) {
+      int32_t start =
+          static_cast<float>(i) * window_shift / receptive_field_shift + 0.5;
+
+      auto seq = Eigen::seqN(start, labels[i].rows());
+
+      count(seq).array() += labels[i].rowwise().sum().array().cast<float>();
+
+      weight(seq).array() += 1;
+    }
+
+    return ((count.array() / (weight.array() + 1e-12f)) + 0.5).cast<int32_t>();
+  }
+
+  // ans.first: a list of (chunk_id, speaker_id)
+  // ans.second: a list of list of (start_sample_index, end_sample_index)
+  //
+  // ans.first[i] corresponds to ans.second[i]
+  std::pair<std::vector<Int32Pair>, std::vector<std::vector<Int32Pair>>>
+  GetChunkSpeakerSampleIndexes(const std::vector<Matrix2DInt32> &labels) const {
+    auto new_labels = ExcludeOverlap(labels);
+
+    std::vector<Int32Pair> chunk_speaker_list;
+    std::vector<std::vector<Int32Pair>> samples_index_list;
+
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+    int32_t receptive_field_shift = meta_data.receptive_field_shift;
+    int32_t num_speakers = meta_data.num_speakers;
+
+    int32_t chunk_index = 0;
+    for (const auto &label : new_labels) {
+      Matrix2DInt32 tmp = label.transpose();
+      // tmp: (num_speakers, num_frames)
+      int32_t num_frames = tmp.cols();
+
+      int32_t sample_offset = chunk_index * window_shift;
+
+      for (int32_t speaker_index = 0; speaker_index != num_speakers;
+           ++speaker_index) {
+        auto d = tmp.row(speaker_index);
+        if (d.sum() < 10) {
+          // skip segments less than 10 frames
+          continue;
+        }
+
+        Int32Pair this_chunk_speaker = {chunk_index, speaker_index};
+        std::vector<Int32Pair> this_speaker_samples;
+
+        bool is_active = false;
+        int32_t start_index;
+
+        for (int32_t k = 0; k != num_frames; ++k) {
+          if (d[k] != 0) {
+            if (!is_active) {
+              is_active = true;
+              start_index = k;
+            }
+          } else if (is_active) {
+            is_active = false;
+
+            int32_t start_samples =
+                static_cast<float>(start_index) / num_frames * window_size +
+                sample_offset;
+            int32_t end_samples =
+                static_cast<float>(k) / num_frames * window_size +
+                sample_offset;
+
+            this_speaker_samples.emplace_back(start_samples, end_samples);
+          }
+        }
+
+        if (is_active) {
+          int32_t start_samples =
+              static_cast<float>(start_index) / num_frames * window_size +
+              sample_offset;
+          int32_t end_samples =
+              static_cast<float>(num_frames - 1) / num_frames * window_size +
+              sample_offset;
+          this_speaker_samples.emplace_back(start_samples, end_samples);
+        }
+
+        chunk_speaker_list.push_back(std::move(this_chunk_speaker));
+        samples_index_list.push_back(std::move(this_speaker_samples));
+      }  // for (int32_t speaker_index = 0;
+      chunk_index += 1;
+    }  // for (const auto &label : new_labels)
+
+    return {chunk_speaker_list, samples_index_list};
+  }
+
+  // If there are multiple speakers at a frame, then this frame is excluded.
+  std::vector<Matrix2DInt32> ExcludeOverlap(
+      const std::vector<Matrix2DInt32> &labels) const {
+    int32_t num_chunks = labels.size();
+    std::vector<Matrix2DInt32> ans;
+    ans.reserve(num_chunks);
+
+    for (const auto &label : labels) {
+      Matrix2DInt32 new_label(label.rows(), label.cols());
+      new_label.setZero();
+      Int32RowVector v = label.rowwise().sum();
+
+      for (int32_t i = 0; i != v.cols(); ++i) {
+        if (v[i] < 2) {
+          new_label.row(i) = label.row(i);
+        }
+      }
+
+      ans.push_back(std::move(new_label));
+    }
+
+    return ans;
+  }
+
+  /**
+   * @param sample_indexes[i] contains the sample segment start and end indexes
+   *                          for the i-th (chunk, speaker) pair
+   * @return Return a matrix of shape (sample_indexes.size(), embedding_dim)
+   *         where ans.row[i] contains the embedding for the
+   *         i-th (chunk, speaker) pair
+   */
+  Matrix2D ComputeEmbeddings(
+      const float *audio, int32_t n,
+      const std::vector<std::vector<Int32Pair>> &sample_indexes,
+      std::vector<int32_t> *valid_indexes,
+      OfflineSpeakerDiarizationProgressCallback callback,
+      void *callback_arg) const {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t sample_rate = meta_data.sample_rate;
+    Matrix2D ans(sample_indexes.size(), embedding_extractor_.Dim());
+
+    auto IsNaNWrapper = [](float f) -> bool { return std::isnan(f); };
+
+    int32_t k = 0;
+    int32_t cur_row_index = 0;
+    for (const auto &v : sample_indexes) {
+      auto stream = embedding_extractor_.CreateStream();
+      for (const auto &p : v) {
+        int32_t end = (p.second <= n) ? p.second : n;
+        int32_t num_samples = end - p.first;
+
+        if (num_samples > 0) {
+          stream->AcceptWaveform(sample_rate, audio + p.first, num_samples);
+        }
+      }
+
+      stream->InputFinished();
+      if (!embedding_extractor_.IsReady(stream.get())) {
+        SHERPA_ONNX_LOGE(
+            "This segment is too short, which should not happen since we have "
+            "already filtered short segments");
+        SHERPA_ONNX_EXIT(-1);
+      }
+
+      std::vector<float> embedding = embedding_extractor_.Compute(stream.get());
+
+      if (std::none_of(embedding.begin(), embedding.end(), IsNaNWrapper)) {
+        // a valid embedding
+        std::copy(embedding.begin(), embedding.end(), &ans(cur_row_index, 0));
+        cur_row_index += 1;
+        valid_indexes->push_back(k);
+      }
+
+      k += 1;
+
+      if (callback) {
+        callback(k, ans.rows(), callback_arg);
+      }
+    }
+
+    if (k != cur_row_index) {
+      auto seq = Eigen::seqN(0, cur_row_index);
+      ans = ans(seq, Eigen::all);
+    }
+
+    return ans;
+  }
+
+  std::unordered_map<Int32Pair, int32_t, PairHash> ConvertChunkSpeakerToCluster(
+      const std::vector<Int32Pair> &chunk_speaker_pair,
+      const std::vector<int32_t> &cluster_labels) const {
+    std::unordered_map<Int32Pair, int32_t, PairHash> ans;
+
+    int32_t k = 0;
+    for (const auto &p : chunk_speaker_pair) {
+      ans[p] = cluster_labels[k];
+      k += 1;
+    }
+
+    return ans;
+  }
+
+  std::vector<Matrix2DInt32> ReLabel(
+      const std::vector<Matrix2DInt32> &labels, int32_t max_cluster_index,
+      std::unordered_map<Int32Pair, int32_t, PairHash> chunk_speaker_to_cluster)
+      const {
+    std::vector<Matrix2DInt32> new_labels;
+    new_labels.reserve(labels.size());
+
+    int32_t chunk_index = 0;
+    for (const auto &label : labels) {
+      Matrix2DInt32 new_label(label.rows(), max_cluster_index + 1);
+      new_label.setZero();
+
+      Matrix2DInt32 t = label.transpose();
+      // t: (num_speakers, num_frames)
+
+      for (int32_t speaker_index = 0; speaker_index != t.rows();
+           ++speaker_index) {
+        if (chunk_speaker_to_cluster.count({chunk_index, speaker_index}) == 0) {
+          continue;
+        }
+
+        int32_t new_speaker_index =
+            chunk_speaker_to_cluster.at({chunk_index, speaker_index});
+
+        for (int32_t k = 0; k != t.cols(); ++k) {
+          if (t(speaker_index, k) == 1) {
+            new_label(k, new_speaker_index) = 1;
+          }
+        }
+      }
+
+      new_labels.push_back(std::move(new_label));
+
+      chunk_index += 1;
+    }
+
+    return new_labels;
+  }
+
+  Matrix2DInt32 ComputeSpeakerCount(const std::vector<Matrix2DInt32> &labels,
+                                    int32_t num_samples) const {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+    int32_t receptive_field_shift = meta_data.receptive_field_shift;
+
+    int32_t num_chunks = labels.size();
+
+    int32_t num_frames = (window_size + (num_chunks - 1) * window_shift) /
+                             receptive_field_shift +
+                         1;
+
+    Matrix2DInt32 count(num_frames, labels[0].cols());
+    count.setZero();
+
+    for (int32_t i = 0; i != num_chunks; ++i) {
+      int32_t start =
+          static_cast<float>(i) * window_shift / receptive_field_shift + 0.5;
+
+      auto seq = Eigen::seqN(start, labels[i].rows());
+
+      count(seq, Eigen::all).array() += labels[i].array();
+    }
+
+    bool has_last_chunk = ((num_samples - window_size) % window_shift) > 0;
+
+    if (!has_last_chunk) {
+      return count;
+    }
+
+    int32_t last_frame = num_samples / receptive_field_shift;
+    return count(Eigen::seq(0, last_frame), Eigen::all);
+  }
+
+  Matrix2DInt32 FinalizeLabels(const Matrix2DInt32 &count,
+                               const Int32RowVector &speakers_per_frame) const {
+    int32_t num_rows = count.rows();
+    int32_t num_cols = count.cols();
+
+    Matrix2DInt32 ans(num_rows, num_cols);
+    ans.setZero();
+
+    for (int32_t i = 0; i != num_rows; ++i) {
+      int32_t k = speakers_per_frame[i];
+      if (k == 0) {
+        continue;
+      }
+      auto top_k = TopkIndex(&count(i, 0), num_cols, k);
+
+      for (int32_t m : top_k) {
+        ans(i, m) = 1;
+      }
+    }
+
+    return ans;
+  }
+
+  OfflineSpeakerDiarizationResult ComputeResult(
+      const Matrix2DInt32 &final_labels) const {
+    Matrix2DInt32 final_labels_t = final_labels.transpose();
+    int32_t num_speakers = final_labels_t.rows();
+    int32_t num_frames = final_labels_t.cols();
+
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+    int32_t receptive_field_shift = meta_data.receptive_field_shift;
+    int32_t receptive_field_size = meta_data.receptive_field_size;
+    int32_t sample_rate = meta_data.sample_rate;
+
+    float scale = static_cast<float>(receptive_field_shift) / sample_rate;
+    float scale_offset = 0.5 * receptive_field_size / sample_rate;
+
+    OfflineSpeakerDiarizationResult ans;
+
+    for (int32_t speaker_index = 0; speaker_index != num_speakers;
+         ++speaker_index) {
+      std::vector<OfflineSpeakerDiarizationSegment> this_speaker;
+
+      bool is_active = final_labels_t(speaker_index, 0) > 0;
+      int32_t start_index = is_active ? 0 : -1;
+
+      for (int32_t frame_index = 1; frame_index != num_frames; ++frame_index) {
+        if (is_active) {
+          if (final_labels_t(speaker_index, frame_index) == 0) {
+            float start_time = start_index * scale + scale_offset;
+            float end_time = frame_index * scale + scale_offset;
+
+            OfflineSpeakerDiarizationSegment segment(start_time, end_time,
+                                                     speaker_index);
+            this_speaker.push_back(segment);
+
+            is_active = false;
+          }
+        } else if (final_labels_t(speaker_index, frame_index) == 1) {
+          is_active = true;
+          start_index = frame_index;
+        }
+      }
+
+      if (is_active) {
+        float start_time = start_index * scale + scale_offset;
+        float end_time = (num_frames - 1) * scale + scale_offset;
+
+        OfflineSpeakerDiarizationSegment segment(start_time, end_time,
+                                                 speaker_index);
+        this_speaker.push_back(segment);
+      }
+
+      // merge segments if the gap between them is less than min_duration_off
+      MergeSegments(&this_speaker);
+
+      for (const auto &seg : this_speaker) {
+        if (seg.Duration() > config_.min_duration_on) {
+          ans.Add(seg);
+        }
+      }
+    }  // for (int32_t speaker_index = 0; speaker_index != num_speakers;
+
+    return ans;
+  }
+
+  OfflineSpeakerDiarizationResult HandleOneChunkSpecialCase(
+      const Matrix2DInt32 &final_labels, int32_t num_samples) const {
+    const auto &meta_data = segmentation_model_.GetModelMetaData();
+    int32_t window_size = meta_data.window_size;
+    int32_t window_shift = meta_data.window_shift;
+    int32_t receptive_field_shift = meta_data.receptive_field_shift;
+
+    bool has_last_chunk = (num_samples - window_size) % window_shift > 0;
+    if (!has_last_chunk) {
+      return ComputeResult(final_labels);
+    }
+
+    int32_t num_frames = final_labels.rows();
+
+    int32_t new_num_frames = num_samples / receptive_field_shift;
+
+    num_frames = (new_num_frames <= num_frames) ? new_num_frames : num_frames;
+
+    return ComputeResult(final_labels(Eigen::seq(0, num_frames), Eigen::all));
+  }
+
+  void MergeSegments(
+      std::vector<OfflineSpeakerDiarizationSegment> *segments) const {
+    float min_duration_off = config_.min_duration_off;
+    bool changed = true;
+    while (changed) {
+      changed = false;
+      for (int32_t i = 0; i < static_cast<int32_t>(segments->size()) - 1; ++i) {
+        auto s = (*segments)[i].Merge((*segments)[i + 1], min_duration_off);
+        if (s) {
+          (*segments)[i] = s.value();
+          segments->erase(segments->begin() + i + 1);
+
+          changed = true;
+          break;
+        }
+      }
+    }
+  }
+
+ private:
+  OfflineSpeakerDiarizationConfig config_;
+  OfflineSpeakerSegmentationPyannoteModel segmentation_model_;
+  SpeakerEmbeddingExtractor embedding_extractor_;
+  std::unique_ptr<FastClustering> clustering_;
+  Matrix2DInt32 powerset_mapping_;
+};
+
+}  // namespace sherpa_onnx
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-result.cc b/sherpa-onnx/csrc/offline-speaker-diarization-result.cc
new file mode 100644
index 0000000000..5969572882
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization-result.cc
@@ -0,0 +1,113 @@
+// sherpa-onnx/csrc/offline-speaker-diarization-result.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
+
+#include <algorithm>
+#include <array>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+OfflineSpeakerDiarizationSegment::OfflineSpeakerDiarizationSegment(
+    float start, float end, int32_t speaker, const std::string &text /*= {}*/) {
+  if (start > end) {
+    SHERPA_ONNX_LOGE("start %.3f should be less than end %.3f", start, end);
+    SHERPA_ONNX_EXIT(-1);
+  }
+
+  start_ = start;
+  end_ = end;
+  speaker_ = speaker;
+  text_ = text;
+}
+
+std::optional<OfflineSpeakerDiarizationSegment>
+OfflineSpeakerDiarizationSegment::Merge(
+    const OfflineSpeakerDiarizationSegment &other, float gap) const {
+  if (other.speaker_ != speaker_) {
+    SHERPA_ONNX_LOGE(
+        "The two segments should have the same speaker. this->speaker: %d, "
+        "other.speaker: %d",
+        speaker_, other.speaker_);
+    return std::nullopt;
+  }
+
+  if (end_ < other.start_ && end_ + gap >= other.start_) {
+    return OfflineSpeakerDiarizationSegment(start_, other.end_, speaker_);
+  } else if (other.end_ < start_ && other.end_ + gap >= start_) {
+    return OfflineSpeakerDiarizationSegment(other.start_, end_, speaker_);
+  } else {
+    return std::nullopt;
+  }
+}
+
+std::string OfflineSpeakerDiarizationSegment::ToString() const {
+  std::array<char, 128> s{};
+
+  snprintf(s.data(), s.size(), "%.3f -- %.3f speaker_%02d", start_, end_,
+           speaker_);
+
+  std::ostringstream os;
+  os << s.data();
+
+  if (!text_.empty()) {
+    os << " " << text_;
+  }
+
+  return os.str();
+}
+
+void OfflineSpeakerDiarizationResult::Add(
+    const OfflineSpeakerDiarizationSegment &segment) {
+  segments_.push_back(segment);
+}
+
+int32_t OfflineSpeakerDiarizationResult::NumSpeakers() const {
+  std::unordered_set<int32_t> count;
+  for (const auto &s : segments_) {
+    count.insert(s.Speaker());
+  }
+
+  return count.size();
+}
+
+int32_t OfflineSpeakerDiarizationResult::NumSegments() const {
+  return segments_.size();
+}
+
+// Return a list of segments sorted by segment.start time
+std::vector<OfflineSpeakerDiarizationSegment>
+OfflineSpeakerDiarizationResult::SortByStartTime() const {
+  auto ans = segments_;
+  std::sort(ans.begin(), ans.end(), [](const auto &a, const auto &b) {
+    return (a.Start() < b.Start()) ||
+           ((a.Start() == b.Start()) && (a.Speaker() < b.Speaker()));
+  });
+
+  return ans;
+}
+
+std::vector<std::vector<OfflineSpeakerDiarizationSegment>>
+OfflineSpeakerDiarizationResult::SortBySpeaker() const {
+  auto tmp = segments_;
+  std::sort(tmp.begin(), tmp.end(), [](const auto &a, const auto &b) {
+    return (a.Speaker() < b.Speaker()) ||
+           ((a.Speaker() == b.Speaker()) && (a.Start() < b.Start()));
+  });
+
+  std::vector<std::vector<OfflineSpeakerDiarizationSegment>> ans(NumSpeakers());
+  for (auto &s : tmp) {
+    ans[s.Speaker()].push_back(std::move(s));
+  }
+
+  return ans;
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-result.h b/sherpa-onnx/csrc/offline-speaker-diarization-result.h
new file mode 100644
index 0000000000..6298a87c76
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization-result.h
@@ -0,0 +1,67 @@
+// sherpa-onnx/csrc/offline-speaker-diarization-result.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <vector>
+
+namespace sherpa_onnx {
+
+class OfflineSpeakerDiarizationSegment {
+ public:
+  OfflineSpeakerDiarizationSegment(float start, float end, int32_t speaker,
+                                   const std::string &text = {});
+
+  // If the gap between the two segments is less than the given gap, then we
+  // merge them and return a new segment. Otherwise, it returns null.
+  std::optional<OfflineSpeakerDiarizationSegment> Merge(
+      const OfflineSpeakerDiarizationSegment &other, float gap) const;
+
+  float Start() const { return start_; }
+  float End() const { return end_; }
+  int32_t Speaker() const { return speaker_; }
+  const std::string &Text() const { return text_; }
+  float Duration() const { return end_ - start_; }
+
+  void SetText(const std::string &text) { text_ = text; }
+
+  std::string ToString() const;
+
+ private:
+  float start_;       // in seconds
+  float end_;         // in seconds
+  int32_t speaker_;   // ID of the speaker, starting from 0
+  std::string text_;  // If not empty, it contains the speech recognition result
+                      // of this segment
+};
+
+class OfflineSpeakerDiarizationResult {
+ public:
+  // Add a new segment
+  void Add(const OfflineSpeakerDiarizationSegment &segment);
+
+  // Number of distinct speakers contained in this object at this point
+  int32_t NumSpeakers() const;
+
+  int32_t NumSegments() const;
+
+  // Return a list of segments sorted by segment.start time
+  std::vector<OfflineSpeakerDiarizationSegment> SortByStartTime() const;
+
+  // ans.size() == NumSpeakers().
+  // ans[i] is for speaker_i and is sorted by start time
+  std::vector<std::vector<OfflineSpeakerDiarizationSegment>> SortBySpeaker()
+      const;
+
+ private:
+  std::vector<OfflineSpeakerDiarizationSegment> segments_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization.cc b/sherpa-onnx/csrc/offline-speaker-diarization.cc
new file mode 100644
index 0000000000..1e861ab882
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization.cc
@@ -0,0 +1,119 @@
+// sherpa-onnx/csrc/offline-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+
+#include <string>
+#include <utility>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
+
+namespace sherpa_onnx {
+
+void OfflineSpeakerDiarizationConfig::Register(ParseOptions *po) {
+  ParseOptions po_segmentation("segmentation", po);
+  segmentation.Register(&po_segmentation);
+
+  ParseOptions po_embedding("embedding", po);
+  embedding.Register(&po_embedding);
+
+  ParseOptions po_clustering("clustering", po);
+  clustering.Register(&po_clustering);
+
+  po->Register("min-duration-on", &min_duration_on,
+               "if a segment is less than this value, then it is discarded. "
+               "Set it to 0 so that no segment is discarded");
+
+  po->Register("min-duration-off", &min_duration_off,
+               "if the gap between to segments of the same speaker is less "
+               "than this value, then these two segments are merged into a "
+               "single segment. We do it recursively.");
+}
+
+bool OfflineSpeakerDiarizationConfig::Validate() const {
+  if (!segmentation.Validate()) {
+    return false;
+  }
+
+  if (!embedding.Validate()) {
+    return false;
+  }
+
+  if (!clustering.Validate()) {
+    return false;
+  }
+
+  if (min_duration_on < 0) {
+    SHERPA_ONNX_LOGE("min_duration_on %.3f is negative", min_duration_on);
+    return false;
+  }
+
+  if (min_duration_off < 0) {
+    SHERPA_ONNX_LOGE("min_duration_off %.3f is negative", min_duration_off);
+    return false;
+  }
+
+  return true;
+}
+
+std::string OfflineSpeakerDiarizationConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineSpeakerDiarizationConfig(";
+  os << "segmentation=" << segmentation.ToString() << ", ";
+  os << "embedding=" << embedding.ToString() << ", ";
+  os << "clustering=" << clustering.ToString() << ", ";
+  os << "min_duration_on=" << min_duration_on << ", ";
+  os << "min_duration_off=" << min_duration_off << ")";
+
+  return os.str();
+}
+
+OfflineSpeakerDiarization::OfflineSpeakerDiarization(
+    const OfflineSpeakerDiarizationConfig &config)
+    : impl_(OfflineSpeakerDiarizationImpl::Create(config)) {}
+
+template <typename Manager>
+OfflineSpeakerDiarization::OfflineSpeakerDiarization(
+    Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
+    : impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {}
+
+OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default;
+
+int32_t OfflineSpeakerDiarization::SampleRate() const {
+  return impl_->SampleRate();
+}
+
+void OfflineSpeakerDiarization::SetConfig(
+    const OfflineSpeakerDiarizationConfig &config) {
+  impl_->SetConfig(config);
+}
+
+OfflineSpeakerDiarizationResult OfflineSpeakerDiarization::Process(
+    const float *audio, int32_t n,
+    OfflineSpeakerDiarizationProgressCallback callback /*= nullptr*/,
+    void *callback_arg /*= nullptr*/) const {
+  return impl_->Process(audio, n, std::move(callback), callback_arg);
+}
+
+#if __ANDROID_API__ >= 9
+template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
+    AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
+    NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-diarization.h b/sherpa-onnx/csrc/offline-speaker-diarization.h
new file mode 100644
index 0000000000..acbb6f5243
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-diarization.h
@@ -0,0 +1,84 @@
+// sherpa-onnx/csrc/offline-speaker-diarization.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "sherpa-onnx/csrc/fast-clustering-config.h"
+#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
+#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
+
+namespace sherpa_onnx {
+
+struct OfflineSpeakerDiarizationConfig {
+  OfflineSpeakerSegmentationModelConfig segmentation;
+  SpeakerEmbeddingExtractorConfig embedding;
+  FastClusteringConfig clustering;
+
+  // if a segment is less than this value, then it is discarded
+  float min_duration_on = 0.3;  // in seconds
+
+  // if the gap between to segments of the same speaker is less than this value,
+  // then these two segments are merged into a single segment.
+  // We do this recursively.
+  float min_duration_off = 0.5;  // in seconds
+
+  OfflineSpeakerDiarizationConfig() = default;
+
+  OfflineSpeakerDiarizationConfig(
+      const OfflineSpeakerSegmentationModelConfig &segmentation,
+      const SpeakerEmbeddingExtractorConfig &embedding,
+      const FastClusteringConfig &clustering, float min_duration_on,
+      float min_duration_off)
+      : segmentation(segmentation),
+        embedding(embedding),
+        clustering(clustering),
+        min_duration_on(min_duration_on),
+        min_duration_off(min_duration_off) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+  std::string ToString() const;
+};
+
+class OfflineSpeakerDiarizationImpl;
+
+using OfflineSpeakerDiarizationProgressCallback = std::function<int32_t(
+    int32_t processed_chunks, int32_t num_chunks, void *arg)>;
+
+class OfflineSpeakerDiarization {
+ public:
+  explicit OfflineSpeakerDiarization(
+      const OfflineSpeakerDiarizationConfig &config);
+
+  template <typename Manager>
+  OfflineSpeakerDiarization(Manager *mgr,
+                            const OfflineSpeakerDiarizationConfig &config);
+
+  ~OfflineSpeakerDiarization();
+
+  // Expected sample rate of the input audio samples
+  int32_t SampleRate() const;
+
+  // Note: Only config.clustering is used. All other fields in config are
+  // ignored
+  void SetConfig(const OfflineSpeakerDiarizationConfig &config);
+
+  OfflineSpeakerDiarizationResult Process(
+      const float *audio, int32_t n,
+      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
+      void *callback_arg = nullptr) const;
+
+ private:
+  std::unique_ptr<OfflineSpeakerDiarizationImpl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc b/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc
new file mode 100644
index 0000000000..f1c9f7d4a8
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc
@@ -0,0 +1,57 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
+
+#include <sstream>
+#include <string>
+
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void OfflineSpeakerSegmentationModelConfig::Register(ParseOptions *po) {
+  pyannote.Register(po);
+
+  po->Register("num-threads", &num_threads,
+               "Number of threads to run the neural network");
+
+  po->Register("debug", &debug,
+               "true to print model information while loading it.");
+
+  po->Register("provider", &provider,
+               "Specify a provider to use: cpu, cuda, coreml");
+}
+
+bool OfflineSpeakerSegmentationModelConfig::Validate() const {
+  if (num_threads < 1) {
+    SHERPA_ONNX_LOGE("num_threads should be > 0. Given %d", num_threads);
+    return false;
+  }
+
+  if (!pyannote.model.empty()) {
+    return pyannote.Validate();
+  }
+
+  if (pyannote.model.empty()) {
+    SHERPA_ONNX_LOGE(
+        "You have to provide at least one speaker segmentation model");
+    return false;
+  }
+
+  return true;
+}
+
+std::string OfflineSpeakerSegmentationModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineSpeakerSegmentationModelConfig(";
+  os << "pyannote=" << pyannote.ToString() << ", ";
+  os << "num_threads=" << num_threads << ", ";
+  os << "debug=" << (debug ? "True" : "False") << ", ";
+  os << "provider=\"" << provider << "\")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h b/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h
new file mode 100644
index 0000000000..8e9e4a96e9
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h
@@ -0,0 +1,40 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_
+
+#include <string>
+
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineSpeakerSegmentationModelConfig {
+  OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+
+  int32_t num_threads = 1;
+  bool debug = false;
+  std::string provider = "cpu";
+
+  OfflineSpeakerSegmentationModelConfig() = default;
+
+  explicit OfflineSpeakerSegmentationModelConfig(
+      const OfflineSpeakerSegmentationPyannoteModelConfig &pyannote,
+      int32_t num_threads, bool debug, const std::string &provider)
+      : pyannote(pyannote),
+        num_threads(num_threads),
+        debug(debug),
+        provider(provider) {}
+
+  void Register(ParseOptions *po);
+
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc
new file mode 100644
index 0000000000..f7417ea83a
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc
@@ -0,0 +1,38 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"
+
+#include <sstream>
+#include <string>
+
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void OfflineSpeakerSegmentationPyannoteModelConfig::Register(ParseOptions *po) {
+  po->Register("pyannote-model", &model,
+               "Path to model.onnx of the Pyannote segmentation model.");
+}
+
+bool OfflineSpeakerSegmentationPyannoteModelConfig::Validate() const {
+  if (!FileExists(model)) {
+    SHERPA_ONNX_LOGE("Pyannote segmentation model: '%s' does not exist",
+                     model.c_str());
+    return false;
+  }
+
+  return true;
+}
+
+std::string OfflineSpeakerSegmentationPyannoteModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineSpeakerSegmentationPyannoteModelConfig(";
+  os << "model=\"" << model << "\")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h
new file mode 100644
index 0000000000..fb5ca4a48e
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h
@@ -0,0 +1,30 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_
+#include <string>
+
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineSpeakerSegmentationPyannoteModelConfig {
+  std::string model;
+
+  OfflineSpeakerSegmentationPyannoteModelConfig() = default;
+
+  explicit OfflineSpeakerSegmentationPyannoteModelConfig(
+      const std::string &model)
+      : model(model) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h
new file mode 100644
index 0000000000..728ed7ff4e
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h
@@ -0,0 +1,29 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_
+
+#include <cstdint>
+#include <string>
+
+namespace sherpa_onnx {
+
+// If you are not sure what each field means, please
+// have a look of the Python file in the model directory that
+// you have downloaded.
+struct OfflineSpeakerSegmentationPyannoteModelMetaData {
+  int32_t sample_rate = 0;
+  int32_t window_size = 0;            // in samples
+  int32_t window_shift = 0;           // in samples
+  int32_t receptive_field_size = 0;   // in samples
+  int32_t receptive_field_shift = 0;  // in samples
+  int32_t num_speakers = 0;
+  int32_t powerset_max_classes = 0;
+  int32_t num_classes = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc
new file mode 100644
index 0000000000..093e871b42
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc
@@ -0,0 +1,151 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/session.h"
+
+namespace sherpa_onnx {
+
+class OfflineSpeakerSegmentationPyannoteModel::Impl {
+ public:
+  explicit Impl(const OfflineSpeakerSegmentationModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto buf = ReadFile(config_.pyannote.model);
+    Init(buf.data(), buf.size());
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto buf = ReadFile(mgr, config_.pyannote.model);
+    Init(buf.data(), buf.size());
+  }
+
+  const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
+      const {
+    return meta_data_;
+  }
+
+  Ort::Value Forward(Ort::Value x) {
+    auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1,
+                          output_names_ptr_.data(), output_names_ptr_.size());
+
+    return std::move(out[0]);
+  }
+
+ private:
+  void Init(void *model_data, size_t model_data_length) {
+    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
+                                           sess_opts_);
+
+    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
+
+    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
+
+    // get meta data
+    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
+    if (config_.debug) {
+      std::ostringstream os;
+      PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
+    }
+
+    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
+    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.window_size, "window_size");
+
+    meta_data_.window_shift =
+        static_cast<int32_t>(0.1 * meta_data_.window_size);
+
+    SHERPA_ONNX_READ_META_DATA(meta_data_.receptive_field_size,
+                               "receptive_field_size");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.receptive_field_shift,
+                               "receptive_field_shift");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "num_speakers");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.powerset_max_classes,
+                               "powerset_max_classes");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.num_classes, "num_classes");
+  }
+
+ private:
+  OfflineSpeakerSegmentationModelConfig config_;
+  Ort::Env env_;
+  Ort::SessionOptions sess_opts_;
+  Ort::AllocatorWithDefaultOptions allocator_;
+
+  std::unique_ptr<Ort::Session> sess_;
+
+  std::vector<std::string> input_names_;
+  std::vector<const char *> input_names_ptr_;
+
+  std::vector<std::string> output_names_;
+  std::vector<const char *> output_names_ptr_;
+
+  OfflineSpeakerSegmentationPyannoteModelMetaData meta_data_;
+};
+
+OfflineSpeakerSegmentationPyannoteModel::
+    OfflineSpeakerSegmentationPyannoteModel(
+        const OfflineSpeakerSegmentationModelConfig &config)
+    : impl_(std::make_unique<Impl>(config)) {}
+
+template <typename Manager>
+OfflineSpeakerSegmentationPyannoteModel::
+    OfflineSpeakerSegmentationPyannoteModel(
+        Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
+    : impl_(std::make_unique<Impl>(mgr, config)) {}
+
+OfflineSpeakerSegmentationPyannoteModel::
+    ~OfflineSpeakerSegmentationPyannoteModel() = default;
+
+const OfflineSpeakerSegmentationPyannoteModelMetaData &
+OfflineSpeakerSegmentationPyannoteModel::GetModelMetaData() const {
+  return impl_->GetModelMetaData();
+}
+
+Ort::Value OfflineSpeakerSegmentationPyannoteModel::Forward(
+    Ort::Value x) const {
+  return impl_->Forward(std::move(x));
+}
+
+#if __ANDROID_API__ >= 9
+template OfflineSpeakerSegmentationPyannoteModel::
+    OfflineSpeakerSegmentationPyannoteModel(
+        AAssetManager *mgr,
+        const OfflineSpeakerSegmentationModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineSpeakerSegmentationPyannoteModel::
+    OfflineSpeakerSegmentationPyannoteModel(
+        NativeResourceManager *mgr,
+        const OfflineSpeakerSegmentationModelConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h
new file mode 100644
index 0000000000..a3cc7ed3ff
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h
@@ -0,0 +1,44 @@
+// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_
+
+#include <memory>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h"
+
+namespace sherpa_onnx {
+
+class OfflineSpeakerSegmentationPyannoteModel {
+ public:
+  explicit OfflineSpeakerSegmentationPyannoteModel(
+      const OfflineSpeakerSegmentationModelConfig &config);
+
+  template <typename Manager>
+  OfflineSpeakerSegmentationPyannoteModel(
+      Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config);
+
+  ~OfflineSpeakerSegmentationPyannoteModel();
+
+  const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
+      const;
+
+  /**
+   * @param x A 3-D float tensor of shape (batch_size, 1, num_samples)
+   * @return Return a float tensor of
+   *         shape (batch_size, num_frames, num_speakers). Note that
+   *         num_speakers here uses powerset encoding.
+   */
+  Ort::Value Forward(Ort::Value x) const;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_
diff --git a/sherpa-onnx/csrc/offline-stream.cc b/sherpa-onnx/csrc/offline-stream.cc
index 0f83807dc0..6fa0ca6c82 100644
--- a/sherpa-onnx/csrc/offline-stream.cc
+++ b/sherpa-onnx/csrc/offline-stream.cc
@@ -133,6 +133,10 @@ class OfflineStream::Impl {
     fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
   }
 
+  explicit Impl(MoonshineTag /*tag*/) : is_moonshine_(true) {
+    config_.sampling_rate = 16000;
+  }
+
   void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
     if (config_.normalize_samples) {
       AcceptWaveformImpl(sampling_rate, waveform, n);
@@ -164,7 +168,9 @@ class OfflineStream::Impl {
       std::vector<float> samples;
       resampler->Resample(waveform, n, true, &samples);
 
-      if (fbank_) {
+      if (is_moonshine_) {
+        samples_.insert(samples_.end(), samples.begin(), samples.end());
+      } else if (fbank_) {
         fbank_->AcceptWaveform(config_.sampling_rate, samples.data(),
                                samples.size());
         fbank_->InputFinished();
@@ -181,7 +187,9 @@ class OfflineStream::Impl {
       return;
     }  // if (sampling_rate != config_.sampling_rate)
 
-    if (fbank_) {
+    if (is_moonshine_) {
+      samples_.insert(samples_.end(), waveform, waveform + n);
+    } else if (fbank_) {
       fbank_->AcceptWaveform(sampling_rate, waveform, n);
       fbank_->InputFinished();
     } else if (mfcc_) {
@@ -194,10 +202,18 @@ class OfflineStream::Impl {
   }
 
   int32_t FeatureDim() const {
+    if (is_moonshine_) {
+      return samples_.size();
+    }
+
     return mfcc_ ? mfcc_opts_.num_ceps : opts_.mel_opts.num_bins;
   }
 
   std::vector<float> GetFrames() const {
+    if (is_moonshine_) {
+      return samples_;
+    }
+
     int32_t n = fbank_  ? fbank_->NumFramesReady()
                 : mfcc_ ? mfcc_->NumFramesReady()
                         : whisper_fbank_->NumFramesReady();
@@ -300,6 +316,10 @@ class OfflineStream::Impl {
   OfflineRecognitionResult r_;
   ContextGraphPtr context_graph_;
   bool is_ced_ = false;
+  bool is_moonshine_ = false;
+
+  // used only when is_moonshine_== true
+  std::vector<float> samples_;
 };
 
 OfflineStream::OfflineStream(const FeatureExtractorConfig &config /*= {}*/,
@@ -311,6 +331,9 @@ OfflineStream::OfflineStream(WhisperTag tag)
 
 OfflineStream::OfflineStream(CEDTag tag) : impl_(std::make_unique<Impl>(tag)) {}
 
+OfflineStream::OfflineStream(MoonshineTag tag)
+    : impl_(std::make_unique<Impl>(tag)) {}
+
 OfflineStream::~OfflineStream() = default;
 
 void OfflineStream::AcceptWaveform(int32_t sampling_rate, const float *waveform,
diff --git a/sherpa-onnx/csrc/offline-stream.h b/sherpa-onnx/csrc/offline-stream.h
index 95bc80e837..e4bed1115f 100644
--- a/sherpa-onnx/csrc/offline-stream.h
+++ b/sherpa-onnx/csrc/offline-stream.h
@@ -34,7 +34,7 @@ struct OfflineRecognitionResult {
   // event target of the audio.
   std::string event;
 
-    /// timestamps.size() == tokens.size()
+  /// timestamps.size() == tokens.size()
   /// timestamps[i] records the time in seconds when tokens[i] is decoded.
   std::vector<float> timestamps;
 
@@ -49,6 +49,10 @@ struct WhisperTag {
 
 struct CEDTag {};
 
+// It uses a neural network model, a preprocessor, to convert
+// audio samples to features
+struct MoonshineTag {};
+
 class OfflineStream {
  public:
   explicit OfflineStream(const FeatureExtractorConfig &config = {},
@@ -56,6 +60,7 @@ class OfflineStream {
 
   explicit OfflineStream(WhisperTag tag);
   explicit OfflineStream(CEDTag tag);
+  explicit OfflineStream(MoonshineTag tag);
   ~OfflineStream();
 
   /**
@@ -72,7 +77,10 @@ class OfflineStream {
   void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                       int32_t n) const;
 
-  /// Return feature dim of this extractor
+  /// Return feature dim of this extractor.
+  ///
+  /// Note: if it is Moonshine, then it returns the number of audio samples
+  /// currently received.
   int32_t FeatureDim() const;
 
   // Get all the feature frames of this stream in a 1-D array, which is
diff --git a/sherpa-onnx/csrc/offline-tdnn-ctc-model.cc b/sherpa-onnx/csrc/offline-tdnn-ctc-model.cc
index ea91d1c551..de441c4814 100644
--- a/sherpa-onnx/csrc/offline-tdnn-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-tdnn-ctc-model.cc
@@ -6,6 +6,15 @@
 
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -25,8 +34,8 @@ class OfflineTdnnCtcModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -34,7 +43,6 @@ class OfflineTdnnCtcModel::Impl {
     auto buf = ReadFile(mgr, config_.tdnn.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features) {
     auto nnet_out =
@@ -63,7 +71,7 @@ class OfflineTdnnCtcModel::Impl {
 
   int32_t VocabSize() const { return vocab_size_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -79,7 +87,11 @@ class OfflineTdnnCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -106,11 +118,10 @@ class OfflineTdnnCtcModel::Impl {
 OfflineTdnnCtcModel::OfflineTdnnCtcModel(const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineTdnnCtcModel::OfflineTdnnCtcModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineTdnnCtcModel::OfflineTdnnCtcModel(Manager *mgr,
                                          const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineTdnnCtcModel::~OfflineTdnnCtcModel() = default;
 
@@ -125,4 +136,14 @@ OrtAllocator *OfflineTdnnCtcModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineTdnnCtcModel::OfflineTdnnCtcModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTdnnCtcModel::OfflineTdnnCtcModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tdnn-ctc-model.h b/sherpa-onnx/csrc/offline-tdnn-ctc-model.h
index b6b5c7e59c..0c2e43afe9 100644
--- a/sherpa-onnx/csrc/offline-tdnn-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-tdnn-ctc-model.h
@@ -7,11 +7,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -27,9 +22,8 @@ class OfflineTdnnCtcModel : public OfflineCtcModel {
  public:
   explicit OfflineTdnnCtcModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineTdnnCtcModel(AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineTdnnCtcModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineTdnnCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/offline-telespeech-ctc-model.cc b/sherpa-onnx/csrc/offline-telespeech-ctc-model.cc
index 68c0afbe84..d87e47a0d1 100644
--- a/sherpa-onnx/csrc/offline-telespeech-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-telespeech-ctc-model.cc
@@ -4,6 +4,15 @@
 
 #include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -23,8 +32,8 @@ class OfflineTeleSpeechCtcModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -32,7 +41,6 @@ class OfflineTeleSpeechCtcModel::Impl {
     auto buf = ReadFile(mgr, config_.telespeech_ctc);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   Ort::Value /*features_length*/) {
@@ -69,7 +77,7 @@ class OfflineTeleSpeechCtcModel::Impl {
 
   int32_t SubsamplingFactor() const { return subsampling_factor_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -85,7 +93,11 @@ class OfflineTeleSpeechCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     {
@@ -117,11 +129,10 @@ OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
     const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
-    AAssetManager *mgr, const OfflineModelConfig &config)
+    Manager *mgr, const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineTeleSpeechCtcModel::~OfflineTeleSpeechCtcModel() = default;
 
@@ -141,4 +152,14 @@ OrtAllocator *OfflineTeleSpeechCtcModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-telespeech-ctc-model.h b/sherpa-onnx/csrc/offline-telespeech-ctc-model.h
index 42ef300ffc..3fe701a021 100644
--- a/sherpa-onnx/csrc/offline-telespeech-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-telespeech-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -31,10 +26,8 @@ class OfflineTeleSpeechCtcModel : public OfflineCtcModel {
  public:
   explicit OfflineTeleSpeechCtcModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineTeleSpeechCtcModel(AAssetManager *mgr,
-                            const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineTeleSpeechCtcModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineTeleSpeechCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/offline-transducer-model.cc b/sherpa-onnx/csrc/offline-transducer-model.cc
index 6a297347dd..da519cc2f7 100644
--- a/sherpa-onnx/csrc/offline-transducer-model.cc
+++ b/sherpa-onnx/csrc/offline-transducer-model.cc
@@ -8,6 +8,15 @@
 #include <string>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-transducer-decoder.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -38,8 +47,8 @@ class OfflineTransducerModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -59,7 +68,6 @@ class OfflineTransducerModel::Impl {
       InitJoiner(buf.data(), buf.size());
     }
   }
-#endif
 
   std::pair<Ort::Value, Ort::Value> RunEncoder(Ort::Value features,
                                                Ort::Value features_length) {
@@ -95,11 +103,11 @@ class OfflineTransducerModel::Impl {
   int32_t VocabSize() const { return vocab_size_; }
   int32_t ContextSize() const { return context_size_; }
   int32_t SubsamplingFactor() const { return 4; }
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   Ort::Value BuildDecoderInput(
       const std::vector<OfflineTransducerDecoderResult> &results,
-      int32_t end_index) const {
+      int32_t end_index) {
     assert(end_index <= results.size());
 
     int32_t batch_size = end_index;
@@ -122,7 +130,7 @@ class OfflineTransducerModel::Impl {
   }
 
   Ort::Value BuildDecoderInput(const std::vector<Hypothesis> &results,
-                               int32_t end_index) const {
+                               int32_t end_index) {
     assert(end_index <= results.size());
 
     int32_t batch_size = end_index;
@@ -161,7 +169,11 @@ class OfflineTransducerModel::Impl {
       std::ostringstream os;
       os << "---encoder---\n";
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
   }
 
@@ -244,11 +256,10 @@ class OfflineTransducerModel::Impl {
 OfflineTransducerModel::OfflineTransducerModel(const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineTransducerModel::OfflineTransducerModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineTransducerModel::OfflineTransducerModel(Manager *mgr,
                                                const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineTransducerModel::~OfflineTransducerModel() = default;
 
@@ -291,4 +302,14 @@ Ort::Value OfflineTransducerModel::BuildDecoderInput(
   return impl_->BuildDecoderInput(results, end_index);
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineTransducerModel::OfflineTransducerModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTransducerModel::OfflineTransducerModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-transducer-model.h b/sherpa-onnx/csrc/offline-transducer-model.h
index 31a238cb76..e8ca93a3cc 100644
--- a/sherpa-onnx/csrc/offline-transducer-model.h
+++ b/sherpa-onnx/csrc/offline-transducer-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/hypothesis.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -25,9 +20,8 @@ class OfflineTransducerModel {
  public:
   explicit OfflineTransducerModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineTransducerModel(AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineTransducerModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineTransducerModel();
 
diff --git a/sherpa-onnx/csrc/offline-transducer-nemo-model.cc b/sherpa-onnx/csrc/offline-transducer-nemo-model.cc
index f18e57da9d..bd6f1ab575 100644
--- a/sherpa-onnx/csrc/offline-transducer-nemo-model.cc
+++ b/sherpa-onnx/csrc/offline-transducer-nemo-model.cc
@@ -9,6 +9,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-transducer-decoder.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -40,8 +49,8 @@ class OfflineTransducerNeMoModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -61,7 +70,6 @@ class OfflineTransducerNeMoModel::Impl {
       InitJoiner(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> RunEncoder(Ort::Value features,
                                      Ort::Value features_length) {
@@ -123,7 +131,7 @@ class OfflineTransducerNeMoModel::Impl {
     return std::move(logit[0]);
   }
 
-  std::vector<Ort::Value> GetDecoderInitStates(int32_t batch_size) const {
+  std::vector<Ort::Value> GetDecoderInitStates(int32_t batch_size) {
     std::array<int64_t, 3> s0_shape{pred_rnn_layers_, batch_size, pred_hidden_};
     Ort::Value s0 = Ort::Value::CreateTensor<float>(allocator_, s0_shape.data(),
                                                     s0_shape.size());
@@ -149,10 +157,12 @@ class OfflineTransducerNeMoModel::Impl {
   int32_t SubsamplingFactor() const { return subsampling_factor_; }
   int32_t VocabSize() const { return vocab_size_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   std::string FeatureNormalizationMethod() const { return normalize_type_; }
 
+  bool IsGigaAM() const { return is_giga_am_; }
+
  private:
   void InitEncoder(void *model_data, size_t model_data_length) {
     encoder_sess_ = std::make_unique<Ort::Session>(
@@ -170,7 +180,11 @@ class OfflineTransducerNeMoModel::Impl {
       std::ostringstream os;
       os << "---encoder---\n";
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -181,9 +195,11 @@ class OfflineTransducerNeMoModel::Impl {
     vocab_size_ += 1;
 
     SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
-    SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type");
+    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
+                                               "normalize_type");
     SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
     SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");
+    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(is_giga_am_, "is_giga_am", 0);
 
     if (normalize_type_ == "NA") {
       normalize_type_ = "";
@@ -245,17 +261,17 @@ class OfflineTransducerNeMoModel::Impl {
   std::string normalize_type_;
   int32_t pred_rnn_layers_ = -1;
   int32_t pred_hidden_ = -1;
+  int32_t is_giga_am_ = 0;
 };
 
 OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
     const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
-    AAssetManager *mgr, const OfflineModelConfig &config)
+    Manager *mgr, const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineTransducerNeMoModel::~OfflineTransducerNeMoModel() = default;
 
@@ -298,4 +314,16 @@ std::string OfflineTransducerNeMoModel::FeatureNormalizationMethod() const {
   return impl_->FeatureNormalizationMethod();
 }
 
+bool OfflineTransducerNeMoModel::IsGigaAM() const { return impl_->IsGigaAM(); }
+
+#if __ANDROID_API__ >= 9
+template OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-transducer-nemo-model.h b/sherpa-onnx/csrc/offline-transducer-nemo-model.h
index 9ac135916d..697f749e27 100644
--- a/sherpa-onnx/csrc/offline-transducer-nemo-model.h
+++ b/sherpa-onnx/csrc/offline-transducer-nemo-model.h
@@ -9,11 +9,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-model-config.h"
 
@@ -26,10 +21,8 @@ class OfflineTransducerNeMoModel {
  public:
   explicit OfflineTransducerNeMoModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineTransducerNeMoModel(AAssetManager *mgr,
-                             const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineTransducerNeMoModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineTransducerNeMoModel();
 
@@ -93,6 +86,8 @@ class OfflineTransducerNeMoModel {
   // for details
   std::string FeatureNormalizationMethod() const;
 
+  bool IsGigaAM() const;
+
  private:
   class Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/sherpa-onnx/csrc/offline-tts-character-frontend.cc b/sherpa-onnx/csrc/offline-tts-character-frontend.cc
index 72481e094f..0806a9fa29 100644
--- a/sherpa-onnx/csrc/offline-tts-character-frontend.cc
+++ b/sherpa-onnx/csrc/offline-tts-character-frontend.cc
@@ -2,20 +2,24 @@
 //
 // Copyright (c)  2023  Xiaomi Corporation
 
-#if __ANDROID_API__ >= 9
-#include <strstream>
-
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
 #include <algorithm>
 #include <cctype>
 #include <codecvt>
 #include <fstream>
 #include <locale>
 #include <sstream>
+#include <strstream>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -82,9 +86,9 @@ OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
   token2id_ = ReadTokens(is);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
-    AAssetManager *mgr, const std::string &tokens,
+    Manager *mgr, const std::string &tokens,
     const OfflineTtsVitsModelMetaData &meta_data)
     : meta_data_(meta_data) {
   auto buf = ReadFile(mgr, tokens);
@@ -92,8 +96,6 @@ OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
   token2id_ = ReadTokens(is);
 }
 
-#endif
-
 std::vector<TokenIDs> OfflineTtsCharacterFrontend::ConvertTextToTokenIds(
     const std::string &_text, const std::string & /*voice = ""*/) const {
   // see
@@ -189,4 +191,18 @@ std::vector<TokenIDs> OfflineTtsCharacterFrontend::ConvertTextToTokenIds(
   return ans;
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
+    AAssetManager *mgr, const std::string &tokens,
+    const OfflineTtsVitsModelMetaData &meta_data);
+
+#endif
+
+#if __OHOS__
+template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
+    NativeResourceManager *mgr, const std::string &tokens,
+    const OfflineTtsVitsModelMetaData &meta_data);
+
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-character-frontend.h b/sherpa-onnx/csrc/offline-tts-character-frontend.h
index ffd2bb5f44..55bf6a70fc 100644
--- a/sherpa-onnx/csrc/offline-tts-character-frontend.h
+++ b/sherpa-onnx/csrc/offline-tts-character-frontend.h
@@ -9,13 +9,8 @@
 #include <unordered_map>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
-#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
+#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"
 
 namespace sherpa_onnx {
 
@@ -24,11 +19,10 @@ class OfflineTtsCharacterFrontend : public OfflineTtsFrontend {
   OfflineTtsCharacterFrontend(const std::string &tokens,
                               const OfflineTtsVitsModelMetaData &meta_data);
 
-#if __ANDROID_API__ >= 9
-  OfflineTtsCharacterFrontend(AAssetManager *mgr, const std::string &tokens,
+  template <typename Manager>
+  OfflineTtsCharacterFrontend(Manager *mgr, const std::string &tokens,
                               const OfflineTtsVitsModelMetaData &meta_data);
 
-#endif
   /** Convert a string to token IDs.
    *
    * @param text The input text.
diff --git a/sherpa-onnx/csrc/offline-tts-frontend.h b/sherpa-onnx/csrc/offline-tts-frontend.h
index cba50e36fd..43c4501c32 100644
--- a/sherpa-onnx/csrc/offline-tts-frontend.h
+++ b/sherpa-onnx/csrc/offline-tts-frontend.h
@@ -19,6 +19,9 @@ struct TokenIDs {
   /*implicit*/ TokenIDs(std::vector<int64_t> tokens)  // NOLINT
       : tokens{std::move(tokens)} {}
 
+  /*implicit*/ TokenIDs(const std::vector<int32_t> &tokens)  // NOLINT
+      : tokens{tokens.begin(), tokens.end()} {}
+
   TokenIDs(std::vector<int64_t> tokens,  // NOLINT
            std::vector<int64_t> tones)   // NOLINT
       : tokens{std::move(tokens)}, tones{std::move(tones)} {}
@@ -51,6 +54,9 @@ class OfflineTtsFrontend {
       const std::string &text, const std::string &voice = "") const = 0;
 };
 
+// implementation is in ./piper-phonemize-lexicon.cc
+void InitEspeak(const std::string &data_dir);
+
 }  // namespace sherpa_onnx
 
 #endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
diff --git a/sherpa-onnx/csrc/offline-tts-impl.cc b/sherpa-onnx/csrc/offline-tts-impl.cc
index 063730db8b..199b0f7926 100644
--- a/sherpa-onnx/csrc/offline-tts-impl.cc
+++ b/sherpa-onnx/csrc/offline-tts-impl.cc
@@ -5,23 +5,66 @@
 #include "sherpa-onnx/csrc/offline-tts-impl.h"
 
 #include <memory>
+#include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/offline-tts-kokoro-impl.h"
+#include "sherpa-onnx/csrc/offline-tts-matcha-impl.h"
 #include "sherpa-onnx/csrc/offline-tts-vits-impl.h"
 
 namespace sherpa_onnx {
 
+std::vector<int64_t> OfflineTtsImpl::AddBlank(const std::vector<int64_t> &x,
+                                              int32_t blank_id /*= 0*/) const {
+  // we assume the blank ID is 0
+  std::vector<int64_t> buffer(x.size() * 2 + 1, blank_id);
+  int32_t i = 1;
+  for (auto k : x) {
+    buffer[i] = k;
+    i += 2;
+  }
+  return buffer;
+}
+
 std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
     const OfflineTtsConfig &config) {
-  // TODO(fangjun): Support other types
-  return std::make_unique<OfflineTtsVitsImpl>(config);
+  if (!config.model.vits.model.empty()) {
+    return std::make_unique<OfflineTtsVitsImpl>(config);
+  } else if (!config.model.matcha.acoustic_model.empty()) {
+    return std::make_unique<OfflineTtsMatchaImpl>(config);
+  }
+
+  return std::make_unique<OfflineTtsKokoroImpl>(config);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
-    AAssetManager *mgr, const OfflineTtsConfig &config) {
-  // TODO(fangjun): Support other types
-  return std::make_unique<OfflineTtsVitsImpl>(mgr, config);
+    Manager *mgr, const OfflineTtsConfig &config) {
+  if (!config.model.vits.model.empty()) {
+    return std::make_unique<OfflineTtsVitsImpl>(mgr, config);
+  } else if (!config.model.matcha.acoustic_model.empty()) {
+    return std::make_unique<OfflineTtsMatchaImpl>(mgr, config);
+  }
+
+  return std::make_unique<OfflineTtsKokoroImpl>(mgr, config);
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
+    AAssetManager *mgr, const OfflineTtsConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
+    NativeResourceManager *mgr, const OfflineTtsConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-impl.h b/sherpa-onnx/csrc/offline-tts-impl.h
index 3c9e27b1bf..061acc747c 100644
--- a/sherpa-onnx/csrc/offline-tts-impl.h
+++ b/sherpa-onnx/csrc/offline-tts-impl.h
@@ -7,11 +7,7 @@
 
 #include <memory>
 #include <string>
-
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
+#include <vector>
 
 #include "sherpa-onnx/csrc/offline-tts.h"
 
@@ -23,10 +19,9 @@ class OfflineTtsImpl {
 
   static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config);
 
-#if __ANDROID_API__ >= 9
-  static std::unique_ptr<OfflineTtsImpl> Create(AAssetManager *mgr,
+  template <typename Manager>
+  static std::unique_ptr<OfflineTtsImpl> Create(Manager *mgr,
                                                 const OfflineTtsConfig &config);
-#endif
 
   virtual GeneratedAudio Generate(
       const std::string &text, int64_t sid = 0, float speed = 1.0,
@@ -38,6 +33,9 @@ class OfflineTtsImpl {
   // Number of supported speakers.
   // If it supports only a single speaker, then it return 0 or 1.
   virtual int32_t NumSpeakers() const = 0;
+
+  std::vector<int64_t> AddBlank(const std::vector<int64_t> &x,
+                                int32_t blank_id = 0) const;
 };
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-impl.h b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h
new file mode 100644
index 0000000000..416cadcef4
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h
@@ -0,0 +1,434 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-impl.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
+
+#include <iomanip>
+#include <ios>
+#include <memory>
+#include <string>
+#include <strstream>
+#include <utility>
+#include <vector>
+
+#include "fst/extensions/far/far.h"
+#include "kaldifst/csrc/kaldi-fst-io.h"
+#include "kaldifst/csrc/text-normalizer.h"
+#include "sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h"
+#include "sherpa-onnx/csrc/lexicon.h"
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/offline-tts-frontend.h"
+#include "sherpa-onnx/csrc/offline-tts-impl.h"
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsKokoroImpl : public OfflineTtsImpl {
+ public:
+  explicit OfflineTtsKokoroImpl(const OfflineTtsConfig &config)
+      : config_(config),
+        model_(std::make_unique<OfflineTtsKokoroModel>(config.model)) {
+    InitFrontend();
+
+    if (!config.rule_fsts.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fsts, ",", false, &files);
+      tn_list_.reserve(files.size());
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
+        }
+        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
+      }
+    }
+
+    if (!config.rule_fars.empty()) {
+      if (config.model.debug) {
+        SHERPA_ONNX_LOGE("Loading FST archives");
+      }
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fars, ",", false, &files);
+
+      tn_list_.reserve(files.size() + tn_list_.size());
+
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
+        }
+        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
+            fst::FarReader<fst::StdArc>::Open(f));
+        for (; !reader->Done(); reader->Next()) {
+          std::unique_ptr<fst::StdConstFst> r(
+              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
+
+          tn_list_.push_back(
+              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
+        }
+      }
+
+      if (config.model.debug) {
+        SHERPA_ONNX_LOGE("FST archives loaded!");
+      }
+    }
+  }
+
+  template <typename Manager>
+  OfflineTtsKokoroImpl(Manager *mgr, const OfflineTtsConfig &config)
+      : config_(config),
+        model_(std::make_unique<OfflineTtsKokoroModel>(mgr, config.model)) {
+    InitFrontend(mgr);
+
+    if (!config.rule_fsts.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fsts, ",", false, &files);
+      tn_list_.reserve(files.size());
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
+        }
+        auto buf = ReadFile(mgr, f);
+        std::istrstream is(buf.data(), buf.size());
+        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
+      }
+    }
+
+    if (!config.rule_fars.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fars, ",", false, &files);
+      tn_list_.reserve(files.size() + tn_list_.size());
+
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
+        }
+
+        auto buf = ReadFile(mgr, f);
+
+        std::unique_ptr<std::istream> s(
+            new std::istrstream(buf.data(), buf.size()));
+
+        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
+            fst::FarReader<fst::StdArc>::Open(std::move(s)));
+
+        for (; !reader->Done(); reader->Next()) {
+          std::unique_ptr<fst::StdConstFst> r(
+              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
+
+          tn_list_.push_back(
+              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
+        }  // for (; !reader->Done(); reader->Next())
+      }    // for (const auto &f : files)
+    }      // if (!config.rule_fars.empty())
+  }
+
+  int32_t SampleRate() const override {
+    return model_->GetMetaData().sample_rate;
+  }
+
+  int32_t NumSpeakers() const override {
+    return model_->GetMetaData().num_speakers;
+  }
+
+  GeneratedAudio Generate(
+      const std::string &_text, int64_t sid = 0, float speed = 1.0,
+      GeneratedAudioCallback callback = nullptr) const override {
+    const auto &meta_data = model_->GetMetaData();
+    int32_t num_speakers = meta_data.num_speakers;
+
+    if (num_speakers == 0 && sid != 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This is a single-speaker model and supports only sid 0. Given sid: "
+          "%{public}d. sid is ignored",
+          static_cast<int32_t>(sid));
+#else
+      SHERPA_ONNX_LOGE(
+          "This is a single-speaker model and supports only sid 0. Given sid: "
+          "%d. sid is ignored",
+          static_cast<int32_t>(sid));
+#endif
+    }
+
+    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This model contains only %{public}d speakers. sid should be in the "
+          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
+          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#else
+      SHERPA_ONNX_LOGE(
+          "This model contains only %d speakers. sid should be in the range "
+          "[%d, %d]. Given: %d. Use sid=0",
+          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#endif
+      sid = 0;
+    }
+
+    std::string text = _text;
+    if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
+#else
+      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
+#endif
+      std::ostringstream os;
+      os << "In bytes (hex):\n";
+      const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
+      for (int32_t i = 0; i != text.size(); ++i) {
+        os << std::setw(2) << std::setfill('0') << std::hex
+           << static_cast<uint32_t>(p[i]) << " ";
+      }
+      os << "\n";
+
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
+    }
+
+    if (!tn_list_.empty()) {
+      for (const auto &tn : tn_list_) {
+        text = tn->Normalize(text);
+        if (config_.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
+#else
+          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
+#endif
+        }
+      }
+    }
+
+    std::vector<TokenIDs> token_ids =
+        frontend_->ConvertTextToTokenIds(text, "en-us");
+
+    if (token_ids.empty() ||
+        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
+                       text.c_str());
+#else
+      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
+#endif
+      return {};
+    }
+
+    std::vector<std::vector<int64_t>> x;
+
+    x.reserve(token_ids.size());
+
+    for (auto &i : token_ids) {
+      x.push_back(std::move(i.tokens));
+    }
+
+    int32_t x_size = static_cast<int32_t>(x.size());
+
+    if (config_.max_num_sentences != 1) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "max_num_sentences (%{public}d) != 1 is ignored for Kokoro TTS "
+          "models",
+          config_.max_num_sentences);
+#else
+      SHERPA_ONNX_LOGE(
+          "max_num_sentences (%d) != 1 is ignored for Kokoro TTS models",
+          config_.max_num_sentences);
+#endif
+    }
+
+    // the input text is too long, we process sentences within it in batches
+    // to avoid OOM. Batch size is config_.max_num_sentences
+    std::vector<std::vector<int64_t>> batch_x;
+
+    int32_t batch_size = 1;
+    batch_x.reserve(config_.max_num_sentences);
+    int32_t num_batches = x_size / batch_size;
+
+    if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Split it into %{public}d batches. batch size: "
+          "%{public}d. Number of sentences: %{public}d",
+          num_batches, batch_size, x_size);
+#else
+      SHERPA_ONNX_LOGE(
+          "Split it into %d batches. batch size: %d. Number "
+          "of sentences: %d",
+          num_batches, batch_size, x_size);
+#endif
+    }
+
+    GeneratedAudio ans;
+
+    int32_t should_continue = 1;
+
+    int32_t k = 0;
+
+    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
+      batch_x.clear();
+      for (int32_t i = 0; i != batch_size; ++i, ++k) {
+        batch_x.push_back(std::move(x[k]));
+      }
+
+      auto audio = Process(batch_x, sid, speed);
+      ans.sample_rate = audio.sample_rate;
+      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
+                         audio.samples.end());
+      if (callback) {
+        should_continue = callback(audio.samples.data(), audio.samples.size(),
+                                   (b + 1) * 1.0 / num_batches);
+        // Caution(fangjun): audio is freed when the callback returns, so users
+        // should copy the data if they want to access the data after
+        // the callback returns to avoid segmentation fault.
+      }
+    }
+
+    batch_x.clear();
+    while (k < static_cast<int32_t>(x.size()) && should_continue) {
+      batch_x.push_back(std::move(x[k]));
+
+      ++k;
+    }
+
+    if (!batch_x.empty()) {
+      auto audio = Process(batch_x, sid, speed);
+      ans.sample_rate = audio.sample_rate;
+      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
+                         audio.samples.end());
+      if (callback) {
+        callback(audio.samples.data(), audio.samples.size(), 1.0);
+        // Caution(fangjun): audio is freed when the callback returns, so users
+        // should copy the data if they want to access the data after
+        // the callback returns to avoid segmentation fault.
+      }
+    }
+
+    return ans;
+  }
+
+ private:
+  template <typename Manager>
+  void InitFrontend(Manager *mgr) {
+    const auto &meta_data = model_->GetMetaData();
+
+    if (meta_data.version >= 2) {
+      // this is a multi-lingual model, we require that you pass lexicon
+      // and dict_dir
+      if (config_.model.kokoro.lexicon.empty() ||
+          config_.model.kokoro.dict_dir.empty()) {
+        SHERPA_ONNX_LOGE("Current model version: '%d'", meta_data.version);
+        SHERPA_ONNX_LOGE(
+            "You are using a multi-lingual Kokoro model (e.g., Kokoro >= "
+            "v1.0). please pass --kokoro-lexicon and --kokoro-dict-dir");
+        SHERPA_ONNX_EXIT(-1);
+      }
+
+      frontend_ = std::make_unique<KokoroMultiLangLexicon>(
+          mgr, config_.model.kokoro.tokens, config_.model.kokoro.lexicon,
+          config_.model.kokoro.dict_dir, config_.model.kokoro.data_dir,
+          meta_data, config_.model.debug);
+
+      return;
+    }
+
+    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
+        mgr, config_.model.kokoro.tokens, config_.model.kokoro.data_dir,
+        meta_data);
+  }
+
+  void InitFrontend() {
+    const auto &meta_data = model_->GetMetaData();
+    if (meta_data.version >= 2) {
+      // this is a multi-lingual model, we require that you pass lexicon
+      // and dict_dir
+      if (config_.model.kokoro.lexicon.empty() ||
+          config_.model.kokoro.dict_dir.empty()) {
+        SHERPA_ONNX_LOGE("Current model version: '%d'", meta_data.version);
+        SHERPA_ONNX_LOGE(
+            "You are using a multi-lingual Kokoro model (e.g., Kokoro >= "
+            "v1.0). please pass --kokoro-lexicon and --kokoro-dict-dir");
+        SHERPA_ONNX_EXIT(-1);
+      }
+
+      frontend_ = std::make_unique<KokoroMultiLangLexicon>(
+          config_.model.kokoro.tokens, config_.model.kokoro.lexicon,
+          config_.model.kokoro.dict_dir, config_.model.kokoro.data_dir,
+          meta_data, config_.model.debug);
+
+      return;
+    }
+
+    // this is for kokoro v0.19, which supports only English
+    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
+        config_.model.kokoro.tokens, config_.model.kokoro.data_dir, meta_data);
+  }
+
+  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
+                         int32_t sid, float speed) const {
+    int32_t num_tokens = 0;
+    for (const auto &k : tokens) {
+      num_tokens += k.size();
+    }
+
+    std::vector<int64_t> x;
+    x.reserve(num_tokens);
+    for (const auto &k : tokens) {
+      x.insert(x.end(), k.begin(), k.end());
+    }
+
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
+    Ort::Value x_tensor = Ort::Value::CreateTensor(
+        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
+
+    Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);
+
+    std::vector<int64_t> audio_shape =
+        audio.GetTensorTypeAndShapeInfo().GetShape();
+
+    int64_t total = 1;
+    // The output shape may be (1, 1, total) or (1, total) or (total,)
+    for (auto i : audio_shape) {
+      total *= i;
+    }
+
+    const float *p = audio.GetTensorData<float>();
+
+    GeneratedAudio ans;
+    ans.sample_rate = model_->GetMetaData().sample_rate;
+    ans.samples = std::vector<float>(p, p + total);
+    return ans;
+  }
+
+ private:
+  OfflineTtsConfig config_;
+  std::unique_ptr<OfflineTtsKokoroModel> model_;
+  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
+  std::unique_ptr<OfflineTtsFrontend> frontend_;
+};
+
+}  // namespace sherpa_onnx
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc b/sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
new file mode 100644
index 0000000000..5964506005
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
@@ -0,0 +1,135 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"
+
+#include <vector>
+
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+void OfflineTtsKokoroModelConfig::Register(ParseOptions *po) {
+  po->Register("kokoro-model", &model, "Path to Kokoro model");
+  po->Register("kokoro-voices", &voices,
+               "Path to voices.bin for Kokoro models");
+  po->Register("kokoro-tokens", &tokens,
+               "Path to tokens.txt for Kokoro models");
+  po->Register(
+      "kokoro-lexicon", &lexicon,
+      "Path to lexicon.txt for Kokoro models. Used only for Kokoro >= v1.0"
+      "You can pass multiple files, separated by ','. Example: "
+      "./lexicon-us-en.txt,./lexicon-zh.txt");
+  po->Register("kokoro-data-dir", &data_dir,
+               "Path to the directory containing dict for espeak-ng.");
+  po->Register("kokoro-dict-dir", &dict_dir,
+               "Path to the directory containing dict for jieba. "
+               "Used only for Kokoro >= v1.0");
+  po->Register("kokoro-length-scale", &length_scale,
+               "Speech speed. Larger->Slower; Smaller->faster.");
+}
+
+bool OfflineTtsKokoroModelConfig::Validate() const {
+  if (model.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --kokoro-model");
+    return false;
+  }
+
+  if (!FileExists(model)) {
+    SHERPA_ONNX_LOGE("--kokoro-model: '%s' does not exist", model.c_str());
+    return false;
+  }
+
+  if (tokens.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --kokoro-tokens");
+    return false;
+  }
+
+  if (!FileExists(tokens)) {
+    SHERPA_ONNX_LOGE("--kokoro-tokens: '%s' does not exist", tokens.c_str());
+    return false;
+  }
+
+  if (!lexicon.empty()) {
+    std::vector<std::string> files;
+    SplitStringToVector(lexicon, ",", false, &files);
+    for (const auto &f : files) {
+      if (!FileExists(f)) {
+        SHERPA_ONNX_LOGE(
+            "lexicon '%s' does not exist. Please re-check --kokoro-lexicon",
+            f.c_str());
+        return false;
+      }
+    }
+  }
+
+  if (data_dir.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --kokoro-data-dir");
+    return false;
+  }
+
+  if (!FileExists(data_dir + "/phontab")) {
+    SHERPA_ONNX_LOGE(
+        "'%s/phontab' does not exist. Please check --kokoro-data-dir",
+        data_dir.c_str());
+    return false;
+  }
+
+  if (!FileExists(data_dir + "/phonindex")) {
+    SHERPA_ONNX_LOGE(
+        "'%s/phonindex' does not exist. Please check --kokoro-data-dir",
+        data_dir.c_str());
+    return false;
+  }
+
+  if (!FileExists(data_dir + "/phondata")) {
+    SHERPA_ONNX_LOGE(
+        "'%s/phondata' does not exist. Please check --kokoro-data-dir",
+        data_dir.c_str());
+    return false;
+  }
+
+  if (!FileExists(data_dir + "/intonations")) {
+    SHERPA_ONNX_LOGE(
+        "'%s/intonations' does not exist. Please check --kokoro-data-dir",
+        data_dir.c_str());
+    return false;
+  }
+
+  if (!dict_dir.empty()) {
+    std::vector<std::string> required_files = {
+        "jieba.dict.utf8", "hmm_model.utf8",  "user.dict.utf8",
+        "idf.utf8",        "stop_words.utf8",
+    };
+
+    for (const auto &f : required_files) {
+      if (!FileExists(dict_dir + "/" + f)) {
+        SHERPA_ONNX_LOGE("'%s/%s' does not exist. Please check kokoro-dict-dir",
+                         dict_dir.c_str(), f.c_str());
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+std::string OfflineTtsKokoroModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineTtsKokoroModelConfig(";
+  os << "model=\"" << model << "\", ";
+  os << "voices=\"" << voices << "\", ";
+  os << "tokens=\"" << tokens << "\", ";
+  os << "lexicon=\"" << lexicon << "\", ";
+  os << "data_dir=\"" << data_dir << "\", ";
+  os << "dict_dir=\"" << dict_dir << "\", ";
+  os << "length_scale=" << length_scale << ")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model-config.h b/sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
new file mode 100644
index 0000000000..fae17927ea
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
@@ -0,0 +1,54 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
+
+#include <string>
+
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineTtsKokoroModelConfig {
+  std::string model;
+  std::string voices;
+  std::string tokens;
+
+  // Note: You can pass multiple files, separated by ",", to lexicon
+  // Example: lexicon = "./lexicon-gb-en.txt,./lexicon-zh.txt";
+  std::string lexicon;
+
+  std::string data_dir;
+
+  std::string dict_dir;
+
+  // speed = 1 / length_scale
+  float length_scale = 1.0;
+
+  OfflineTtsKokoroModelConfig() = default;
+
+  OfflineTtsKokoroModelConfig(const std::string &model,
+                              const std::string &voices,
+                              const std::string &tokens,
+                              const std::string &lexicon,
+                              const std::string &data_dir,
+                              const std::string &dict_dir, float length_scale)
+      : model(model),
+        voices(voices),
+        tokens(tokens),
+        lexicon(lexicon),
+        data_dir(data_dir),
+        dict_dir(dict_dir),
+        length_scale(length_scale) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h b/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h
new file mode 100644
index 0000000000..64b70851c4
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h
@@ -0,0 +1,25 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-model-metadata.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_
+
+#include <cstdint>
+#include <string>
+
+namespace sherpa_onnx {
+
+// please refer to
+// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/kokoro/add-meta-data.py
+struct OfflineTtsKokoroModelMetaData {
+  int32_t sample_rate = 0;
+  int32_t num_speakers = 0;
+  int32_t version = 1;
+  int32_t has_espeak = 1;
+  int32_t max_token_len = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model.cc b/sherpa-onnx/csrc/offline-tts-kokoro-model.cc
new file mode 100644
index 0000000000..7f7c901315
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-model.cc
@@ -0,0 +1,251 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-model.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/session.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsKokoroModel::Impl {
+ public:
+  explicit Impl(const OfflineTtsModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto model_buf = ReadFile(config.kokoro.model);
+    auto voices_buf = ReadFile(config.kokoro.voices);
+    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
+         voices_buf.size());
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto model_buf = ReadFile(mgr, config.kokoro.model);
+    auto voices_buf = ReadFile(mgr, config.kokoro.voices);
+    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
+         voices_buf.size());
+  }
+
+  const OfflineTtsKokoroModelMetaData &GetMetaData() const {
+    return meta_data_;
+  }
+
+  Ort::Value Run(Ort::Value x, int32_t sid, float speed) {
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
+    if (x_shape[0] != 1) {
+      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
+                       static_cast<int32_t>(x_shape[0]));
+      exit(-1);
+    }
+
+    // there is a 0 at the front and end of x
+    int32_t len = static_cast<int32_t>(x_shape[1]) - 2;
+    int32_t num_speakers = meta_data_.num_speakers;
+    int32_t dim0 = style_dim_[0];
+    int32_t dim1 = style_dim_[2];
+    if (len >= dim0) {
+      SHERPA_ONNX_LOGE("Bad things happened! %d vs %d", len, dim0);
+      SHERPA_ONNX_EXIT(-1);
+    }
+
+    /*const*/ float *p = styles_.data() + sid * dim0 * dim1 + len * dim1;
+
+    std::array<int64_t, 2> style_embedding_shape = {1, dim1};
+    Ort::Value style_embedding = Ort::Value::CreateTensor(
+        memory_info, p, dim1, style_embedding_shape.data(),
+        style_embedding_shape.size());
+
+    int64_t speed_shape = 1;
+
+    Ort::Value speed_tensor =
+        Ort::Value::CreateTensor(memory_info, &speed, 1, &speed_shape, 1);
+
+    std::array<Ort::Value, 3> inputs = {
+        std::move(x), std::move(style_embedding), std::move(speed_tensor)};
+
+    auto out =
+        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
+                   output_names_ptr_.data(), output_names_ptr_.size());
+
+    return std::move(out[0]);
+  }
+
+ private:
+  void Init(void *model_data, size_t model_data_length, const char *voices_data,
+            size_t voices_data_length) {
+    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
+                                           sess_opts_);
+
+    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
+
+    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
+    // get meta data
+    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
+    if (config_.debug) {
+      std::ostringstream os;
+      os << "---kokoro model---\n";
+      PrintModelMetadata(os, meta_data);
+
+      os << "----------input names----------\n";
+      int32_t i = 0;
+      for (const auto &s : input_names_) {
+        os << i << " " << s << "\n";
+        ++i;
+      }
+      os << "----------output names----------\n";
+      i = 0;
+      for (const auto &s : output_names_) {
+        os << i << " " << s << "\n";
+        ++i;
+      }
+
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
+    }
+
+    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
+    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
+    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
+    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
+
+    if (config_.debug) {
+      std::vector<std::string> speaker_names;
+      SHERPA_ONNX_READ_META_DATA_VEC_STRING(speaker_names, "speaker_names");
+      std::ostringstream os;
+      os << "\n";
+      for (int32_t i = 0; i != speaker_names.size(); ++i) {
+        os << i << "->" << speaker_names[i] << ", ";
+      }
+      os << "\n";
+
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
+    }
+
+    SHERPA_ONNX_READ_META_DATA_VEC(style_dim_, "style_dim");
+    if (style_dim_.size() != 3) {
+      SHERPA_ONNX_LOGE("style_dim should be 3-d, given: %d",
+                       static_cast<int32_t>(style_dim_.size()));
+      SHERPA_ONNX_EXIT(-1);
+    }
+
+    if (style_dim_[1] != 1) {
+      SHERPA_ONNX_LOGE("style_dim[0] should be 1, given: %d", style_dim_[1]);
+      SHERPA_ONNX_EXIT(-1);
+    }
+
+    int32_t actual_num_floats = voices_data_length / sizeof(float);
+    int32_t expected_num_floats =
+        style_dim_[0] * style_dim_[2] * meta_data_.num_speakers;
+
+    if (actual_num_floats != expected_num_floats) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Corrupted --kokoro-voices '%{public}s'. Expected #floats: "
+          "%{public}d, actual: %{public}d",
+          config_.kokoro.voices.c_str(), expected_num_floats,
+          actual_num_floats);
+#else
+      SHERPA_ONNX_LOGE(
+          "Corrupted --kokoro-voices '%s'. Expected #floats: %d, actual: %d",
+          config_.kokoro.voices.c_str(), expected_num_floats,
+          actual_num_floats);
+#endif
+
+      SHERPA_ONNX_EXIT(-1);
+    }
+
+    styles_ = std::vector<float>(
+        reinterpret_cast<const float *>(voices_data),
+        reinterpret_cast<const float *>(voices_data) + expected_num_floats);
+
+    meta_data_.max_token_len = style_dim_[0];
+  }
+
+ private:
+  OfflineTtsModelConfig config_;
+  Ort::Env env_;
+  Ort::SessionOptions sess_opts_;
+  Ort::AllocatorWithDefaultOptions allocator_;
+
+  std::unique_ptr<Ort::Session> sess_;
+
+  std::vector<std::string> input_names_;
+  std::vector<const char *> input_names_ptr_;
+
+  std::vector<std::string> output_names_;
+  std::vector<const char *> output_names_ptr_;
+
+  OfflineTtsKokoroModelMetaData meta_data_;
+  std::vector<int32_t> style_dim_;
+
+  // (num_speakers, style_dim_[0], style_dim_[2])
+  std::vector<float> styles_;
+};
+
+OfflineTtsKokoroModel::OfflineTtsKokoroModel(
+    const OfflineTtsModelConfig &config)
+    : impl_(std::make_unique<Impl>(config)) {}
+
+template <typename Manager>
+OfflineTtsKokoroModel::OfflineTtsKokoroModel(
+    Manager *mgr, const OfflineTtsModelConfig &config)
+    : impl_(std::make_unique<Impl>(mgr, config)) {}
+
+OfflineTtsKokoroModel::~OfflineTtsKokoroModel() = default;
+
+const OfflineTtsKokoroModelMetaData &OfflineTtsKokoroModel::GetMetaData()
+    const {
+  return impl_->GetMetaData();
+}
+
+Ort::Value OfflineTtsKokoroModel::Run(Ort::Value x, int64_t sid /*= 0*/,
+                                      float speed /*= 1.0*/) const {
+  return impl_->Run(std::move(x), sid, speed);
+}
+
+#if __ANDROID_API__ >= 9
+template OfflineTtsKokoroModel::OfflineTtsKokoroModel(
+    AAssetManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTtsKokoroModel::OfflineTtsKokoroModel(
+    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model.h b/sherpa-onnx/csrc/offline-tts-kokoro-model.h
new file mode 100644
index 0000000000..694f27f77e
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-kokoro-model.h
@@ -0,0 +1,39 @@
+// sherpa-onnx/csrc/offline-tts-kokoro-model.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_
+
+#include <memory>
+#include <string>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"
+#include "sherpa-onnx/csrc/offline-tts-model-config.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsKokoroModel {
+ public:
+  ~OfflineTtsKokoroModel();
+
+  explicit OfflineTtsKokoroModel(const OfflineTtsModelConfig &config);
+
+  template <typename Manager>
+  OfflineTtsKokoroModel(Manager *mgr, const OfflineTtsModelConfig &config);
+
+  // Return a float32 tensor containing the mel
+  // of shape (batch_size, mel_dim, num_frames)
+  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0) const;
+
+  const OfflineTtsKokoroModelMetaData &GetMetaData() const;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-impl.h b/sherpa-onnx/csrc/offline-tts-matcha-impl.h
new file mode 100644
index 0000000000..7bd45fede1
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-impl.h
@@ -0,0 +1,413 @@
+// sherpa-onnx/csrc/offline-tts-matcha-impl.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_
+
+#include <memory>
+#include <string>
+#include <strstream>
+#include <utility>
+#include <vector>
+
+#include "fst/extensions/far/far.h"
+#include "kaldifst/csrc/kaldi-fst-io.h"
+#include "kaldifst/csrc/text-normalizer.h"
+#include "sherpa-onnx/csrc/hifigan-vocoder.h"
+#include "sherpa-onnx/csrc/jieba-lexicon.h"
+#include "sherpa-onnx/csrc/lexicon.h"
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/melo-tts-lexicon.h"
+#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
+#include "sherpa-onnx/csrc/offline-tts-frontend.h"
+#include "sherpa-onnx/csrc/offline-tts-impl.h"
+#include "sherpa-onnx/csrc/offline-tts-matcha-model.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
+#include "sherpa-onnx/csrc/text-utils.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsMatchaImpl : public OfflineTtsImpl {
+ public:
+  explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config)
+      : config_(config),
+        model_(std::make_unique<OfflineTtsMatchaModel>(config.model)),
+        vocoder_(std::make_unique<HifiganVocoder>(
+            config.model.num_threads, config.model.provider,
+            config.model.matcha.vocoder)) {
+    InitFrontend();
+
+    if (!config.rule_fsts.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fsts, ",", false, &files);
+      tn_list_.reserve(files.size());
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
+        }
+        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
+      }
+    }
+
+    if (!config.rule_fars.empty()) {
+      if (config.model.debug) {
+        SHERPA_ONNX_LOGE("Loading FST archives");
+      }
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fars, ",", false, &files);
+
+      tn_list_.reserve(files.size() + tn_list_.size());
+
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
+        }
+        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
+            fst::FarReader<fst::StdArc>::Open(f));
+        for (; !reader->Done(); reader->Next()) {
+          std::unique_ptr<fst::StdConstFst> r(
+              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
+
+          tn_list_.push_back(
+              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
+        }
+      }
+
+      if (config.model.debug) {
+        SHERPA_ONNX_LOGE("FST archives loaded!");
+      }
+    }
+  }
+
+  template <typename Manager>
+  OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config)
+      : config_(config),
+        model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)),
+        vocoder_(std::make_unique<HifiganVocoder>(
+            mgr, config.model.num_threads, config.model.provider,
+            config.model.matcha.vocoder)) {
+    InitFrontend(mgr);
+
+    if (!config.rule_fsts.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fsts, ",", false, &files);
+      tn_list_.reserve(files.size());
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
+        }
+        auto buf = ReadFile(mgr, f);
+        std::istrstream is(buf.data(), buf.size());
+        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
+      }
+    }
+
+    if (!config.rule_fars.empty()) {
+      std::vector<std::string> files;
+      SplitStringToVector(config.rule_fars, ",", false, &files);
+      tn_list_.reserve(files.size() + tn_list_.size());
+
+      for (const auto &f : files) {
+        if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
+          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
+        }
+
+        auto buf = ReadFile(mgr, f);
+
+        std::unique_ptr<std::istream> s(
+            new std::istrstream(buf.data(), buf.size()));
+
+        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
+            fst::FarReader<fst::StdArc>::Open(std::move(s)));
+
+        for (; !reader->Done(); reader->Next()) {
+          std::unique_ptr<fst::StdConstFst> r(
+              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
+
+          tn_list_.push_back(
+              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
+        }  // for (; !reader->Done(); reader->Next())
+      }    // for (const auto &f : files)
+    }      // if (!config.rule_fars.empty())
+  }
+
+  int32_t SampleRate() const override {
+    return model_->GetMetaData().sample_rate;
+  }
+
+  int32_t NumSpeakers() const override {
+    return model_->GetMetaData().num_speakers;
+  }
+
+  GeneratedAudio Generate(
+      const std::string &_text, int64_t sid = 0, float speed = 1.0,
+      GeneratedAudioCallback callback = nullptr) const override {
+    const auto &meta_data = model_->GetMetaData();
+    int32_t num_speakers = meta_data.num_speakers;
+
+    if (num_speakers == 0 && sid != 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This is a single-speaker model and supports only sid 0. Given sid: "
+          "%{public}d. sid is ignored",
+          static_cast<int32_t>(sid));
+#else
+      SHERPA_ONNX_LOGE(
+          "This is a single-speaker model and supports only sid 0. Given sid: "
+          "%d. sid is ignored",
+          static_cast<int32_t>(sid));
+#endif
+    }
+
+    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This model contains only %{public}d speakers. sid should be in the "
+          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
+          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#else
+      SHERPA_ONNX_LOGE(
+          "This model contains only %d speakers. sid should be in the range "
+          "[%d, %d]. Given: %d. Use sid=0",
+          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#endif
+      sid = 0;
+    }
+
+    std::string text = _text;
+    if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
+#else
+      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
+#endif
+    }
+
+    if (!tn_list_.empty()) {
+      for (const auto &tn : tn_list_) {
+        text = tn->Normalize(text);
+        if (config_.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
+#else
+          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
+#endif
+        }
+      }
+    }
+
+    std::vector<TokenIDs> token_ids =
+        frontend_->ConvertTextToTokenIds(text, "en-US");
+
+    if (token_ids.empty() ||
+        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
+                       text.c_str());
+#else
+      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
+#endif
+      return {};
+    }
+
+    std::vector<std::vector<int64_t>> x;
+
+    x.reserve(token_ids.size());
+
+    for (auto &i : token_ids) {
+      x.push_back(std::move(i.tokens));
+    }
+
+    for (auto &k : x) {
+      k = AddBlank(k, meta_data.pad_id);
+    }
+
+    int32_t x_size = static_cast<int32_t>(x.size());
+
+    if (config_.max_num_sentences <= 0 || x_size <= config_.max_num_sentences) {
+      auto ans = Process(x, sid, speed);
+      if (callback) {
+        callback(ans.samples.data(), ans.samples.size(), 1.0);
+      }
+      return ans;
+    }
+
+    // the input text is too long, we process sentences within it in batches
+    // to avoid OOM. Batch size is config_.max_num_sentences
+    std::vector<std::vector<int64_t>> batch_x;
+
+    int32_t batch_size = config_.max_num_sentences;
+    batch_x.reserve(config_.max_num_sentences);
+    int32_t num_batches = x_size / batch_size;
+
+    if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Text is too long. Split it into %{public}d batches. batch size: "
+          "%{public}d. Number of sentences: %{public}d",
+          num_batches, batch_size, x_size);
+#else
+      SHERPA_ONNX_LOGE(
+          "Text is too long. Split it into %d batches. batch size: %d. Number "
+          "of sentences: %d",
+          num_batches, batch_size, x_size);
+#endif
+    }
+
+    GeneratedAudio ans;
+
+    int32_t should_continue = 1;
+
+    int32_t k = 0;
+
+    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
+      batch_x.clear();
+      for (int32_t i = 0; i != batch_size; ++i, ++k) {
+        batch_x.push_back(std::move(x[k]));
+      }
+
+      auto audio = Process(batch_x, sid, speed);
+      ans.sample_rate = audio.sample_rate;
+      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
+                         audio.samples.end());
+      if (callback) {
+        should_continue = callback(audio.samples.data(), audio.samples.size(),
+                                   (b + 1) * 1.0 / num_batches);
+        // Caution(fangjun): audio is freed when the callback returns, so users
+        // should copy the data if they want to access the data after
+        // the callback returns to avoid segmentation fault.
+      }
+    }
+
+    batch_x.clear();
+    while (k < static_cast<int32_t>(x.size()) && should_continue) {
+      batch_x.push_back(std::move(x[k]));
+
+      ++k;
+    }
+
+    if (!batch_x.empty()) {
+      auto audio = Process(batch_x, sid, speed);
+      ans.sample_rate = audio.sample_rate;
+      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
+                         audio.samples.end());
+      if (callback) {
+        callback(audio.samples.data(), audio.samples.size(), 1.0);
+        // Caution(fangjun): audio is freed when the callback returns, so users
+        // should copy the data if they want to access the data after
+        // the callback returns to avoid segmentation fault.
+      }
+    }
+
+    return ans;
+  }
+
+ private:
+  template <typename Manager>
+  void InitFrontend(Manager *mgr) {
+    // for piper phonemizer
+    // we require that you copy espeak_ng_data
+    // from assets to disk
+    //
+    // for jieba
+    // we require that you copy dict from assets to disk
+    const auto &meta_data = model_->GetMetaData();
+
+    if (meta_data.jieba && !meta_data.has_espeak) {
+      frontend_ = std::make_unique<JiebaLexicon>(
+          mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
+          config_.model.matcha.dict_dir, config_.model.debug);
+    } else if (meta_data.has_espeak && !meta_data.jieba) {
+      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
+          mgr, config_.model.matcha.tokens, config_.model.matcha.data_dir,
+          meta_data);
+    } else {
+      SHERPA_ONNX_LOGE("jieba + espeaker-ng is not supported yet");
+      SHERPA_ONNX_EXIT(-1);
+    }
+  }
+
+  void InitFrontend() {
+    const auto &meta_data = model_->GetMetaData();
+
+    if (meta_data.jieba && !meta_data.has_espeak) {
+      frontend_ = std::make_unique<JiebaLexicon>(
+          config_.model.matcha.lexicon, config_.model.matcha.tokens,
+          config_.model.matcha.dict_dir, config_.model.debug);
+    } else if (meta_data.has_espeak && !meta_data.jieba) {
+      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
+          config_.model.matcha.tokens, config_.model.matcha.data_dir,
+          meta_data);
+    } else {
+      SHERPA_ONNX_LOGE("jieba + espeaker-ng is not supported yet");
+      SHERPA_ONNX_EXIT(-1);
+    }
+  }
+
+  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
+                         int32_t sid, float speed) const {
+    int32_t num_tokens = 0;
+    for (const auto &k : tokens) {
+      num_tokens += k.size();
+    }
+
+    std::vector<int64_t> x;
+    x.reserve(num_tokens);
+    for (const auto &k : tokens) {
+      x.insert(x.end(), k.begin(), k.end());
+    }
+
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
+    Ort::Value x_tensor = Ort::Value::CreateTensor(
+        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
+
+    Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed);
+    Ort::Value audio = vocoder_->Run(std::move(mel));
+
+    std::vector<int64_t> audio_shape =
+        audio.GetTensorTypeAndShapeInfo().GetShape();
+
+    int64_t total = 1;
+    // The output shape may be (1, 1, total) or (1, total) or (total,)
+    for (auto i : audio_shape) {
+      total *= i;
+    }
+
+    const float *p = audio.GetTensorData<float>();
+
+    GeneratedAudio ans;
+    ans.sample_rate = model_->GetMetaData().sample_rate;
+    ans.samples = std::vector<float>(p, p + total);
+    return ans;
+  }
+
+ private:
+  OfflineTtsConfig config_;
+  std::unique_ptr<OfflineTtsMatchaModel> model_;
+  std::unique_ptr<HifiganVocoder> vocoder_;
+  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
+  std::unique_ptr<OfflineTtsFrontend> frontend_;
+};
+
+}  // namespace sherpa_onnx
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-model-config.cc b/sherpa-onnx/csrc/offline-tts-matcha-model-config.cc
new file mode 100644
index 0000000000..5c736b54d6
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-model-config.cc
@@ -0,0 +1,143 @@
+// sherpa-onnx/csrc/offline-tts-matcha-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"
+
+#include <vector>
+
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void OfflineTtsMatchaModelConfig::Register(ParseOptions *po) {
+  po->Register("matcha-acoustic-model", &acoustic_model,
+               "Path to matcha acoustic model");
+  po->Register("matcha-vocoder", &vocoder, "Path to matcha vocoder");
+  po->Register("matcha-lexicon", &lexicon,
+               "Path to lexicon.txt for Matcha models");
+  po->Register("matcha-tokens", &tokens,
+               "Path to tokens.txt for Matcha models");
+  po->Register("matcha-data-dir", &data_dir,
+               "Path to the directory containing dict for espeak-ng. If it is "
+               "given, --matcha-lexicon is ignored.");
+  po->Register("matcha-dict-dir", &dict_dir,
+               "Path to the directory containing dict for jieba. Used only for "
+               "Chinese TTS models using jieba");
+  po->Register("matcha-noise-scale", &noise_scale,
+               "noise_scale for Matcha models");
+  po->Register("matcha-length-scale", &length_scale,
+               "Speech speed. Larger->Slower; Smaller->faster.");
+}
+
+bool OfflineTtsMatchaModelConfig::Validate() const {
+  if (acoustic_model.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --matcha-acoustic-model");
+    return false;
+  }
+
+  if (!FileExists(acoustic_model)) {
+    SHERPA_ONNX_LOGE("--matcha-acoustic-model: '%s' does not exist",
+                     acoustic_model.c_str());
+    return false;
+  }
+
+  if (vocoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --matcha-vocoder");
+    return false;
+  }
+
+  if (!FileExists(vocoder)) {
+    SHERPA_ONNX_LOGE("--matcha-vocoder: '%s' does not exist", vocoder.c_str());
+    return false;
+  }
+
+  if (tokens.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --matcha-tokens");
+    return false;
+  }
+
+  if (!FileExists(tokens)) {
+    SHERPA_ONNX_LOGE("--matcha-tokens: '%s' does not exist", tokens.c_str());
+    return false;
+  }
+
+  if (!data_dir.empty()) {
+    if (!FileExists(data_dir + "/phontab")) {
+      SHERPA_ONNX_LOGE(
+          "'%s/phontab' does not exist. Please check --matcha-data-dir",
+          data_dir.c_str());
+      return false;
+    }
+
+    if (!FileExists(data_dir + "/phonindex")) {
+      SHERPA_ONNX_LOGE(
+          "'%s/phonindex' does not exist. Please check --matcha-data-dir",
+          data_dir.c_str());
+      return false;
+    }
+
+    if (!FileExists(data_dir + "/phondata")) {
+      SHERPA_ONNX_LOGE(
+          "'%s/phondata' does not exist. Please check --matcha-data-dir",
+          data_dir.c_str());
+      return false;
+    }
+
+    if (!FileExists(data_dir + "/intonations")) {
+      SHERPA_ONNX_LOGE(
+          "'%s/intonations' does not exist. Please check --matcha-data-dir",
+          data_dir.c_str());
+      return false;
+    }
+  }
+
+  if (!dict_dir.empty()) {
+    std::vector<std::string> required_files = {
+        "jieba.dict.utf8", "hmm_model.utf8",  "user.dict.utf8",
+        "idf.utf8",        "stop_words.utf8",
+    };
+
+    for (const auto &f : required_files) {
+      if (!FileExists(dict_dir + "/" + f)) {
+        SHERPA_ONNX_LOGE(
+            "'%s/%s' does not exist. Please check --matcha-dict-dir",
+            dict_dir.c_str(), f.c_str());
+        return false;
+      }
+    }
+
+    // we require that --matcha-lexicon is not empty
+    if (lexicon.empty()) {
+      SHERPA_ONNX_LOGE("Please provide --matcha-lexicon");
+      return false;
+    }
+
+    if (!FileExists(lexicon)) {
+      SHERPA_ONNX_LOGE("--matcha-lexicon: '%s' does not exist",
+                       lexicon.c_str());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+std::string OfflineTtsMatchaModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineTtsMatchaModelConfig(";
+  os << "acoustic_model=\"" << acoustic_model << "\", ";
+  os << "vocoder=\"" << vocoder << "\", ";
+  os << "lexicon=\"" << lexicon << "\", ";
+  os << "tokens=\"" << tokens << "\", ";
+  os << "data_dir=\"" << data_dir << "\", ";
+  os << "dict_dir=\"" << dict_dir << "\", ";
+  os << "noise_scale=" << noise_scale << ", ";
+  os << "length_scale=" << length_scale << ")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-model-config.h b/sherpa-onnx/csrc/offline-tts-matcha-model-config.h
new file mode 100644
index 0000000000..f367a7e05f
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-model-config.h
@@ -0,0 +1,56 @@
+// sherpa-onnx/csrc/offline-tts-matcha-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
+
+#include <string>
+
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineTtsMatchaModelConfig {
+  std::string acoustic_model;
+  std::string vocoder;
+  std::string lexicon;
+  std::string tokens;
+
+  // If data_dir is given, lexicon is ignored
+  // data_dir is for piper-phonemizer, which uses espeak-ng
+  std::string data_dir;
+
+  // Used for Chinese TTS models using jieba
+  std::string dict_dir;
+
+  float noise_scale = 1;
+  float length_scale = 1;
+
+  OfflineTtsMatchaModelConfig() = default;
+
+  OfflineTtsMatchaModelConfig(const std::string &acoustic_model,
+                              const std::string &vocoder,
+                              const std::string &lexicon,
+                              const std::string &tokens,
+                              const std::string &data_dir,
+                              const std::string &dict_dir,
+                              float noise_scale = 1.0, float length_scale = 1)
+      : acoustic_model(acoustic_model),
+        vocoder(vocoder),
+        lexicon(lexicon),
+        tokens(tokens),
+        data_dir(data_dir),
+        dict_dir(dict_dir),
+        noise_scale(noise_scale),
+        length_scale(length_scale) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h b/sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
new file mode 100644
index 0000000000..06e91011ce
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
@@ -0,0 +1,28 @@
+// sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
+//
+// Copyright (c)  2023  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_
+
+#include <cstdint>
+#include <string>
+
+namespace sherpa_onnx {
+
+// If you are not sure what each field means, please
+// have a look of the Python file in the model directory that
+// you have downloaded.
+struct OfflineTtsMatchaModelMetaData {
+  int32_t sample_rate = 0;
+  int32_t num_speakers = 0;
+  int32_t version = 1;
+  int32_t jieba = 0;
+  int32_t has_espeak = 0;
+  int32_t use_eos_bos = 0;
+  int32_t pad_id = 0;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-model.cc b/sherpa-onnx/csrc/offline-tts-matcha-model.cc
new file mode 100644
index 0000000000..afea546d08
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-model.cc
@@ -0,0 +1,198 @@
+// sherpa-onnx/csrc/offline-tts-matcha-model.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-tts-matcha-model.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/session.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsMatchaModel::Impl {
+ public:
+  explicit Impl(const OfflineTtsModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto buf = ReadFile(config.matcha.acoustic_model);
+    Init(buf.data(), buf.size());
+  }
+
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
+      : config_(config),
+        env_(ORT_LOGGING_LEVEL_ERROR),
+        sess_opts_(GetSessionOptions(config)),
+        allocator_{} {
+    auto buf = ReadFile(mgr, config.matcha.acoustic_model);
+    Init(buf.data(), buf.size());
+  }
+
+  const OfflineTtsMatchaModelMetaData &GetMetaData() const {
+    return meta_data_;
+  }
+
+  Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
+    auto memory_info =
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+
+    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
+    if (x_shape[0] != 1) {
+      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
+                       static_cast<int32_t>(x_shape[0]));
+      exit(-1);
+    }
+
+    int64_t len = x_shape[1];
+    int64_t len_shape = 1;
+
+    Ort::Value x_length =
+        Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);
+
+    int64_t scale_shape = 1;
+    float noise_scale = config_.matcha.noise_scale;
+    float length_scale = config_.matcha.length_scale;
+
+    if (speed != 1 && speed > 0) {
+      length_scale = 1. / speed;
+    }
+
+    Ort::Value noise_scale_tensor =
+        Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);
+
+    Ort::Value length_scale_tensor = Ort::Value::CreateTensor(
+        memory_info, &length_scale, 1, &scale_shape, 1);
+
+    Ort::Value sid_tensor =
+        Ort::Value::CreateTensor(memory_info, &sid, 1, &scale_shape, 1);
+
+    std::vector<Ort::Value> inputs;
+    inputs.reserve(5);
+    inputs.push_back(std::move(x));
+    inputs.push_back(std::move(x_length));
+    inputs.push_back(std::move(noise_scale_tensor));
+    inputs.push_back(std::move(length_scale_tensor));
+
+    if (input_names_.size() == 5 && input_names_.back() == "sid") {
+      inputs.push_back(std::move(sid_tensor));
+    }
+
+    auto out =
+        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
+                   output_names_ptr_.data(), output_names_ptr_.size());
+
+    return std::move(out[0]);
+  }
+
+ private:
+  void Init(void *model_data, size_t model_data_length) {
+    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
+                                           sess_opts_);
+
+    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
+
+    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
+
+    // get meta data
+    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
+    if (config_.debug) {
+      std::ostringstream os;
+      os << "---matcha model---\n";
+      PrintModelMetadata(os, meta_data);
+
+      os << "----------input names----------\n";
+      int32_t i = 0;
+      for (const auto &s : input_names_) {
+        os << i << " " << s << "\n";
+        ++i;
+      }
+      os << "----------output names----------\n";
+      i = 0;
+      for (const auto &s : output_names_) {
+        os << i << " " << s << "\n";
+        ++i;
+      }
+
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
+    }
+
+    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
+    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
+    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
+    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.jieba, "jieba");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.use_eos_bos, "use_eos_bos");
+    SHERPA_ONNX_READ_META_DATA(meta_data_.pad_id, "pad_id");
+  }
+
+ private:
+  OfflineTtsModelConfig config_;
+  Ort::Env env_;
+  Ort::SessionOptions sess_opts_;
+  Ort::AllocatorWithDefaultOptions allocator_;
+
+  std::unique_ptr<Ort::Session> sess_;
+
+  std::vector<std::string> input_names_;
+  std::vector<const char *> input_names_ptr_;
+
+  std::vector<std::string> output_names_;
+  std::vector<const char *> output_names_ptr_;
+
+  OfflineTtsMatchaModelMetaData meta_data_;
+};
+
+OfflineTtsMatchaModel::OfflineTtsMatchaModel(
+    const OfflineTtsModelConfig &config)
+    : impl_(std::make_unique<Impl>(config)) {}
+
+template <typename Manager>
+OfflineTtsMatchaModel::OfflineTtsMatchaModel(
+    Manager *mgr, const OfflineTtsModelConfig &config)
+    : impl_(std::make_unique<Impl>(mgr, config)) {}
+
+OfflineTtsMatchaModel::~OfflineTtsMatchaModel() = default;
+
+const OfflineTtsMatchaModelMetaData &OfflineTtsMatchaModel::GetMetaData()
+    const {
+  return impl_->GetMetaData();
+}
+
+Ort::Value OfflineTtsMatchaModel::Run(Ort::Value x, int64_t sid /*= 0*/,
+                                      float speed /*= 1.0*/) const {
+  return impl_->Run(std::move(x), sid, speed);
+}
+
+#if __ANDROID_API__ >= 9
+template OfflineTtsMatchaModel::OfflineTtsMatchaModel(
+    AAssetManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTtsMatchaModel::OfflineTtsMatchaModel(
+    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-matcha-model.h b/sherpa-onnx/csrc/offline-tts-matcha-model.h
new file mode 100644
index 0000000000..27ddaffdbe
--- /dev/null
+++ b/sherpa-onnx/csrc/offline-tts-matcha-model.h
@@ -0,0 +1,39 @@
+// sherpa-onnx/csrc/offline-tts-matcha-model.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_
+
+#include <memory>
+#include <string>
+
+#include "onnxruntime_cxx_api.h"  // NOLINT
+#include "sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h"
+#include "sherpa-onnx/csrc/offline-tts-model-config.h"
+
+namespace sherpa_onnx {
+
+class OfflineTtsMatchaModel {
+ public:
+  ~OfflineTtsMatchaModel();
+
+  explicit OfflineTtsMatchaModel(const OfflineTtsModelConfig &config);
+
+  template <typename Manager>
+  OfflineTtsMatchaModel(Manager *mgr, const OfflineTtsModelConfig &config);
+
+  // Return a float32 tensor containing the mel
+  // of shape (batch_size, mel_dim, num_frames)
+  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0) const;
+
+  const OfflineTtsMatchaModelMetaData &GetMetaData() const;
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_
diff --git a/sherpa-onnx/csrc/offline-tts-model-config.cc b/sherpa-onnx/csrc/offline-tts-model-config.cc
index f38c681a01..d2153b7e8f 100644
--- a/sherpa-onnx/csrc/offline-tts-model-config.cc
+++ b/sherpa-onnx/csrc/offline-tts-model-config.cc
@@ -10,6 +10,8 @@ namespace sherpa_onnx {
 
 void OfflineTtsModelConfig::Register(ParseOptions *po) {
   vits.Register(po);
+  matcha.Register(po);
+  kokoro.Register(po);
 
   po->Register("num-threads", &num_threads,
                "Number of threads to run the neural network");
@@ -27,7 +29,15 @@ bool OfflineTtsModelConfig::Validate() const {
     return false;
   }
 
-  return vits.Validate();
+  if (!vits.model.empty()) {
+    return vits.Validate();
+  }
+
+  if (!matcha.acoustic_model.empty()) {
+    return matcha.Validate();
+  }
+
+  return kokoro.Validate();
 }
 
 std::string OfflineTtsModelConfig::ToString() const {
@@ -35,6 +45,8 @@ std::string OfflineTtsModelConfig::ToString() const {
 
   os << "OfflineTtsModelConfig(";
   os << "vits=" << vits.ToString() << ", ";
+  os << "matcha=" << matcha.ToString() << ", ";
+  os << "kokoro=" << kokoro.ToString() << ", ";
   os << "num_threads=" << num_threads << ", ";
   os << "debug=" << (debug ? "True" : "False") << ", ";
   os << "provider=\"" << provider << "\")";
diff --git a/sherpa-onnx/csrc/offline-tts-model-config.h b/sherpa-onnx/csrc/offline-tts-model-config.h
index bee50ba12a..ce07cbd91f 100644
--- a/sherpa-onnx/csrc/offline-tts-model-config.h
+++ b/sherpa-onnx/csrc/offline-tts-model-config.h
@@ -7,6 +7,8 @@
 
 #include <string>
 
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"
+#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"
 #include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"
 #include "sherpa-onnx/csrc/parse-options.h"
 
@@ -14,6 +16,8 @@ namespace sherpa_onnx {
 
 struct OfflineTtsModelConfig {
   OfflineTtsVitsModelConfig vits;
+  OfflineTtsMatchaModelConfig matcha;
+  OfflineTtsKokoroModelConfig kokoro;
 
   int32_t num_threads = 1;
   bool debug = false;
@@ -22,9 +26,13 @@ struct OfflineTtsModelConfig {
   OfflineTtsModelConfig() = default;
 
   OfflineTtsModelConfig(const OfflineTtsVitsModelConfig &vits,
+                        const OfflineTtsMatchaModelConfig &matcha,
+                        const OfflineTtsKokoroModelConfig &kokoro,
                         int32_t num_threads, bool debug,
                         const std::string &provider)
       : vits(vits),
+        matcha(matcha),
+        kokoro(kokoro),
         num_threads(num_threads),
         debug(debug),
         provider(provider) {}
diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h
index fb43a88abe..72146b02ce 100644
--- a/sherpa-onnx/csrc/offline-tts-vits-impl.h
+++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h
@@ -6,16 +6,10 @@
 
 #include <memory>
 #include <string>
+#include <strstream>
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include <strstream>
-
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "fst/extensions/far/far.h"
 #include "kaldifst/csrc/kaldi-fst-io.h"
 #include "kaldifst/csrc/text-normalizer.h"
@@ -46,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
       tn_list_.reserve(files.size());
       for (const auto &f : files) {
         if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
           SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
         }
         tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
       }
@@ -63,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
 
       for (const auto &f : files) {
         if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
           SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
         }
         std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
             fst::FarReader<fst::StdArc>::Open(f));
@@ -82,8 +84,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config)
+  template <typename Manager>
+  OfflineTtsVitsImpl(Manager *mgr, const OfflineTtsConfig &config)
       : config_(config),
         model_(std::make_unique<OfflineTtsVitsModel>(mgr, config.model)) {
     InitFrontend(mgr);
@@ -94,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
       tn_list_.reserve(files.size());
       for (const auto &f : files) {
         if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
+#else
           SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
+#endif
         }
         auto buf = ReadFile(mgr, f);
         std::istrstream is(buf.data(), buf.size());
@@ -109,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
 
       for (const auto &f : files) {
         if (config.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
+#else
           SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
+#endif
         }
 
         auto buf = ReadFile(mgr, f);
@@ -130,7 +140,6 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
       }    // for (const auto &f : files)
     }      // if (!config.rule_fars.empty())
   }
-#endif
 
   int32_t SampleRate() const override {
     return model_->GetMetaData().sample_rate;
@@ -147,30 +156,52 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
     int32_t num_speakers = meta_data.num_speakers;
 
     if (num_speakers == 0 && sid != 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This is a single-speaker model and supports only sid 0. Given sid: "
+          "%{public}d. sid is ignored",
+          static_cast<int32_t>(sid));
+#else
       SHERPA_ONNX_LOGE(
           "This is a single-speaker model and supports only sid 0. Given sid: "
           "%d. sid is ignored",
           static_cast<int32_t>(sid));
+#endif
     }
 
     if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "This model contains only %{public}d speakers. sid should be in the "
+          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
+          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#else
       SHERPA_ONNX_LOGE(
           "This model contains only %d speakers. sid should be in the range "
           "[%d, %d]. Given: %d. Use sid=0",
           num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
+#endif
       sid = 0;
     }
 
     std::string text = _text;
     if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
+#else
       SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
+#endif
     }
 
     if (!tn_list_.empty()) {
       for (const auto &tn : tn_list_) {
         text = tn->Normalize(text);
         if (config_.model.debug) {
+#if __OHOS__
+          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
+#else
           SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
+#endif
         }
       }
     }
@@ -233,10 +264,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
     int32_t num_batches = x_size / batch_size;
 
     if (config_.model.debug) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Text is too long. Split it into %{public}d batches. batch size: "
+          "%{public}d. Number of sentences: %{public}d",
+          num_batches, batch_size, x_size);
+#else
       SHERPA_ONNX_LOGE(
           "Text is too long. Split it into %d batches. batch size: %d. Number "
           "of sentences: %d",
           num_batches, batch_size, x_size);
+#endif
     }
 
     GeneratedAudio ans;
@@ -262,7 +300,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
                          audio.samples.end());
       if (callback) {
         should_continue = callback(audio.samples.data(), audio.samples.size(),
-                                   b * 1.0 / num_batches);
+                                   (b + 1) * 1.0 / num_batches);
         // Caution(fangjun): audio is freed when the callback returns, so users
         // should copy the data if they want to access the data after
         // the callback returns to avoid segmentation fault.
@@ -297,13 +335,27 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
   }
 
  private:
-#if __ANDROID_API__ >= 9
-  void InitFrontend(AAssetManager *mgr) {
+  template <typename Manager>
+  void InitFrontend(Manager *mgr) {
     const auto &meta_data = model_->GetMetaData();
 
     if (meta_data.frontend == "characters") {
       frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
           mgr, config_.model.vits.tokens, meta_data);
+    } else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() &&
+               meta_data.is_melo_tts) {
+      frontend_ = std::make_unique<MeloTtsLexicon>(
+          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
+          config_.model.vits.dict_dir, model_->GetMetaData(),
+          config_.model.debug);
+    } else if (meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
+      frontend_ = std::make_unique<JiebaLexicon>(
+          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
+          config_.model.vits.dict_dir, config_.model.debug);
+    } else if (meta_data.is_melo_tts && meta_data.language == "English") {
+      frontend_ = std::make_unique<MeloTtsLexicon>(
+          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
+          model_->GetMetaData(), config_.model.debug);
     } else if ((meta_data.is_piper || meta_data.is_coqui ||
                 meta_data.is_icefall) &&
                !config_.model.vits.data_dir.empty()) {
@@ -323,7 +375,6 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
           meta_data.punctuations, meta_data.language, config_.model.debug);
     }
   }
-#endif
 
   void InitFrontend() {
     const auto &meta_data = model_->GetMetaData();
@@ -349,11 +400,14 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
           config_.model.vits.lexicon, config_.model.vits.tokens,
           config_.model.vits.dict_dir, model_->GetMetaData(),
           config_.model.debug);
+    } else if (meta_data.is_melo_tts && meta_data.language == "English") {
+      frontend_ = std::make_unique<MeloTtsLexicon>(
+          config_.model.vits.lexicon, config_.model.vits.tokens,
+          model_->GetMetaData(), config_.model.debug);
     } else if (meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
       frontend_ = std::make_unique<JiebaLexicon>(
           config_.model.vits.lexicon, config_.model.vits.tokens,
-          config_.model.vits.dict_dir, model_->GetMetaData(),
-          config_.model.debug);
+          config_.model.vits.dict_dir, config_.model.debug);
     } else if ((meta_data.is_piper || meta_data.is_coqui ||
                 meta_data.is_icefall) &&
                !config_.model.vits.data_dir.empty()) {
@@ -373,17 +427,6 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
     }
   }
 
-  std::vector<int64_t> AddBlank(const std::vector<int64_t> &x) const {
-    // we assume the blank ID is 0
-    std::vector<int64_t> buffer(x.size() * 2 + 1);
-    int32_t i = 1;
-    for (auto k : x) {
-      buffer[i] = k;
-      i += 2;
-    }
-    return buffer;
-  }
-
   GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
                          const std::vector<std::vector<int64_t>> &tones,
                          int32_t sid, float speed) const {
diff --git a/sherpa-onnx/csrc/offline-tts-vits-model-config.cc b/sherpa-onnx/csrc/offline-tts-vits-model-config.cc
index 9eb5b64a37..17c63460b1 100644
--- a/sherpa-onnx/csrc/offline-tts-vits-model-config.cc
+++ b/sherpa-onnx/csrc/offline-tts-vits-model-config.cc
@@ -51,25 +51,30 @@ bool OfflineTtsVitsModelConfig::Validate() const {
 
   if (!data_dir.empty()) {
     if (!FileExists(data_dir + "/phontab")) {
-      SHERPA_ONNX_LOGE("'%s/phontab' does not exist. Skipping test",
-                       data_dir.c_str());
+      SHERPA_ONNX_LOGE(
+          "'%s/phontab' does not exist. Please check --vits-data-dir",
+          data_dir.c_str());
       return false;
     }
 
     if (!FileExists(data_dir + "/phonindex")) {
-      SHERPA_ONNX_LOGE("'%s/phonindex' does not exist. Skipping test",
-                       data_dir.c_str());
+      SHERPA_ONNX_LOGE(
+          "'%s/phonindex' does not exist. Please check --vits-data-dir",
+          data_dir.c_str());
       return false;
     }
 
     if (!FileExists(data_dir + "/phondata")) {
-      SHERPA_ONNX_LOGE("'%s/phondata' does not exist. Skipping test",
-                       data_dir.c_str());
+      SHERPA_ONNX_LOGE(
+          "'%s/phondata' does not exist. Please check --vits-data-dir",
+          data_dir.c_str());
       return false;
     }
 
     if (!FileExists(data_dir + "/intonations")) {
-      SHERPA_ONNX_LOGE("'%s/intonations' does not exist.", data_dir.c_str());
+      SHERPA_ONNX_LOGE(
+          "'%s/intonations' does not exist. Please check --vits-data-dir",
+          data_dir.c_str());
       return false;
     }
   }
@@ -82,8 +87,8 @@ bool OfflineTtsVitsModelConfig::Validate() const {
 
     for (const auto &f : required_files) {
       if (!FileExists(dict_dir + "/" + f)) {
-        SHERPA_ONNX_LOGE("'%s/%s' does not exist.", dict_dir.c_str(),
-                         f.c_str());
+        SHERPA_ONNX_LOGE("'%s/%s' does not exist. Please check vits-dict-dir",
+                         dict_dir.c_str(), f.c_str());
         return false;
       }
     }
diff --git a/sherpa-onnx/csrc/offline-tts-vits-model-metadata.h b/sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
similarity index 80%
rename from sherpa-onnx/csrc/offline-tts-vits-model-metadata.h
rename to sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
index 5ce00d745e..3019d17d48 100644
--- a/sherpa-onnx/csrc/offline-tts-vits-model-metadata.h
+++ b/sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
@@ -1,9 +1,9 @@
-// sherpa-onnx/csrc/offline-tts-vits-model-metadata.h
+// sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
 //
 // Copyright (c)  2023  Xiaomi Corporation
 
-#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_METADATA_H_
-#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_METADATA_H_
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_
 
 #include <cstdint>
 #include <string>
@@ -46,4 +46,4 @@ struct OfflineTtsVitsModelMetaData {
 
 }  // namespace sherpa_onnx
 
-#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_METADATA_H_
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_
diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.cc b/sherpa-onnx/csrc/offline-tts-vits-model.cc
index c97cecf800..3587a109dd 100644
--- a/sherpa-onnx/csrc/offline-tts-vits-model.cc
+++ b/sherpa-onnx/csrc/offline-tts-vits-model.cc
@@ -9,6 +9,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -26,8 +35,8 @@ class OfflineTtsVitsModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineTtsModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -35,7 +44,6 @@ class OfflineTtsVitsModel::Impl {
     auto buf = ReadFile(mgr, config.vits.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
     if (meta_data_.is_piper || meta_data_.is_coqui) {
@@ -46,8 +54,10 @@ class OfflineTtsVitsModel::Impl {
   }
 
   Ort::Value Run(Ort::Value x, Ort::Value tones, int64_t sid, float speed) {
-    // For MeloTTS, we hardcode sid to the one contained in the meta data
-    sid = meta_data_.speaker_id;
+    if (meta_data_.num_speakers == 1) {
+      // For MeloTTS, we hardcode sid to the one contained in the meta data
+      sid = meta_data_.speaker_id;
+    }
 
     auto memory_info =
         Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
@@ -134,7 +144,11 @@ class OfflineTtsVitsModel::Impl {
         ++i;
       }
 
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -160,7 +174,7 @@ class OfflineTtsVitsModel::Impl {
     SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
     SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
     SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_eos_bos,
-                                            "use_eos_bos", 0);
+                                            "use_eos_bos", 1);
     SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.pad_id, "pad_id", 0);
 
     std::string comment;
@@ -334,11 +348,10 @@ class OfflineTtsVitsModel::Impl {
 OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineTtsVitsModel::OfflineTtsVitsModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineTtsVitsModel::OfflineTtsVitsModel(Manager *mgr,
                                          const OfflineTtsModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineTtsVitsModel::~OfflineTtsVitsModel() = default;
 
@@ -349,7 +362,7 @@ Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/,
 
 Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, Ort::Value tones,
                                     int64_t sid /*= 0*/,
-                                    float speed /*= 1.0*/) {
+                                    float speed /*= 1.0*/) const {
   return impl_->Run(std::move(x), std::move(tones), sid, speed);
 }
 
@@ -357,4 +370,14 @@ const OfflineTtsVitsModelMetaData &OfflineTtsVitsModel::GetMetaData() const {
   return impl_->GetMetaData();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineTtsVitsModel::OfflineTtsVitsModel(
+    AAssetManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTtsVitsModel::OfflineTtsVitsModel(
+    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.h b/sherpa-onnx/csrc/offline-tts-vits-model.h
index 543963c9d2..90803005fd 100644
--- a/sherpa-onnx/csrc/offline-tts-vits-model.h
+++ b/sherpa-onnx/csrc/offline-tts-vits-model.h
@@ -8,14 +8,9 @@
 #include <memory>
 #include <string>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-tts-model-config.h"
-#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
+#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"
 
 namespace sherpa_onnx {
 
@@ -24,9 +19,9 @@ class OfflineTtsVitsModel {
   ~OfflineTtsVitsModel();
 
   explicit OfflineTtsVitsModel(const OfflineTtsModelConfig &config);
-#if __ANDROID_API__ >= 9
-  OfflineTtsVitsModel(AAssetManager *mgr, const OfflineTtsModelConfig &config);
-#endif
+
+  template <typename Manager>
+  OfflineTtsVitsModel(Manager *mgr, const OfflineTtsModelConfig &config);
 
   /** Run the model.
    *
@@ -42,7 +37,7 @@ class OfflineTtsVitsModel {
 
   // This is for MeloTTS
   Ort::Value Run(Ort::Value x, Ort::Value tones, int64_t sid = 0,
-                 float speed = 1.0);
+                 float speed = 1.0) const;
 
   const OfflineTtsVitsModelMetaData &GetMetaData() const;
 
diff --git a/sherpa-onnx/csrc/offline-tts.cc b/sherpa-onnx/csrc/offline-tts.cc
index 12feda0b74..ec2c69523e 100644
--- a/sherpa-onnx/csrc/offline-tts.cc
+++ b/sherpa-onnx/csrc/offline-tts.cc
@@ -7,6 +7,15 @@
 #include <string>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-tts-impl.h"
@@ -78,10 +87,9 @@ std::string OfflineTtsConfig::ToString() const {
 OfflineTts::OfflineTts(const OfflineTtsConfig &config)
     : impl_(OfflineTtsImpl::Create(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineTts::OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config)
+template <typename Manager>
+OfflineTts::OfflineTts(Manager *mgr, const OfflineTtsConfig &config)
     : impl_(OfflineTtsImpl::Create(mgr, config)) {}
-#endif
 
 OfflineTts::~OfflineTts() = default;
 
@@ -95,4 +103,14 @@ int32_t OfflineTts::SampleRate() const { return impl_->SampleRate(); }
 
 int32_t OfflineTts::NumSpeakers() const { return impl_->NumSpeakers(); }
 
+#if __ANDROID_API__ >= 9
+template OfflineTts::OfflineTts(AAssetManager *mgr,
+                                const OfflineTtsConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineTts::OfflineTts(NativeResourceManager *mgr,
+                                const OfflineTtsConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-tts.h b/sherpa-onnx/csrc/offline-tts.h
index 03a13a1597..884173e7b6 100644
--- a/sherpa-onnx/csrc/offline-tts.h
+++ b/sherpa-onnx/csrc/offline-tts.h
@@ -10,11 +10,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-tts-model-config.h"
 #include "sherpa-onnx/csrc/parse-options.h"
 
@@ -35,7 +30,7 @@ struct OfflineTtsConfig {
   // Maximum number of sentences that we process at a time.
   // This is to avoid OOM for very long input text.
   // If you set it to -1, then we process all sentences in a single batch.
-  int32_t max_num_sentences = 2;
+  int32_t max_num_sentences = 1;
 
   OfflineTtsConfig() = default;
   OfflineTtsConfig(const OfflineTtsModelConfig &model,
@@ -69,9 +64,8 @@ class OfflineTts {
   ~OfflineTts();
   explicit OfflineTts(const OfflineTtsConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config);
-#endif
+  template <typename Manager>
+  OfflineTts(Manager *mgr, const OfflineTtsConfig &config);
 
   // @param text A string containing words separated by spaces
   // @param sid Speaker ID. Used only for multi-speaker models, e.g., models
diff --git a/sherpa-onnx/csrc/offline-wenet-ctc-model.cc b/sherpa-onnx/csrc/offline-wenet-ctc-model.cc
index 93fdffab83..5a93971788 100644
--- a/sherpa-onnx/csrc/offline-wenet-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-wenet-ctc-model.cc
@@ -4,6 +4,15 @@
 
 #include "sherpa-onnx/csrc/offline-wenet-ctc-model.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -23,8 +32,8 @@ class OfflineWenetCtcModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -32,7 +41,6 @@ class OfflineWenetCtcModel::Impl {
     auto buf = ReadFile(mgr, config_.wenet_ctc.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   Ort::Value features_length) {
@@ -47,7 +55,7 @@ class OfflineWenetCtcModel::Impl {
 
   int32_t SubsamplingFactor() const { return subsampling_factor_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -63,7 +71,11 @@ class OfflineWenetCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -92,11 +104,10 @@ class OfflineWenetCtcModel::Impl {
 OfflineWenetCtcModel::OfflineWenetCtcModel(const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineWenetCtcModel::OfflineWenetCtcModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineWenetCtcModel::OfflineWenetCtcModel(Manager *mgr,
                                            const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineWenetCtcModel::~OfflineWenetCtcModel() = default;
 
@@ -115,4 +126,14 @@ OrtAllocator *OfflineWenetCtcModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineWenetCtcModel::OfflineWenetCtcModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineWenetCtcModel::OfflineWenetCtcModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-wenet-ctc-model.h b/sherpa-onnx/csrc/offline-wenet-ctc-model.h
index 4eb78b73a0..ba5f21246d 100644
--- a/sherpa-onnx/csrc/offline-wenet-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-wenet-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -31,9 +26,8 @@ class OfflineWenetCtcModel : public OfflineCtcModel {
  public:
   explicit OfflineWenetCtcModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineWenetCtcModel(AAssetManager *mgr, const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineWenetCtcModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineWenetCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/offline-whisper-model-config.cc b/sherpa-onnx/csrc/offline-whisper-model-config.cc
index 5d36b82c4d..708531f736 100644
--- a/sherpa-onnx/csrc/offline-whisper-model-config.cc
+++ b/sherpa-onnx/csrc/offline-whisper-model-config.cc
@@ -20,7 +20,7 @@ void OfflineWhisperModelConfig::Register(ParseOptions *po) {
 
   po->Register(
       "whisper-language", &language,
-      "The spoke language in the input audio file. Example values: "
+      "The spoken language in the input audio file. Example values: "
       "en, de, fr, zh, jp. If it is not given for a multilingual model, we will"
       " infer the language from the input audio file. "
       "Please refer to "
diff --git a/sherpa-onnx/csrc/offline-whisper-model.cc b/sherpa-onnx/csrc/offline-whisper-model.cc
index 7812e1d091..360374cdb3 100644
--- a/sherpa-onnx/csrc/offline-whisper-model.cc
+++ b/sherpa-onnx/csrc/offline-whisper-model.cc
@@ -5,11 +5,21 @@
 #include "sherpa-onnx/csrc/offline-whisper-model.h"
 
 #include <algorithm>
+#include <cmath>
 #include <string>
 #include <tuple>
 #include <unordered_map>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -22,7 +32,6 @@ class OfflineWhisperModel::Impl {
   explicit Impl(const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
-        debug_(config.debug),
         sess_opts_(GetSessionOptions(config)),
         allocator_{} {
     {
@@ -39,7 +48,6 @@ class OfflineWhisperModel::Impl {
   explicit Impl(const SpokenLanguageIdentificationConfig &config)
       : lid_config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
-        debug_(config_.debug),
         sess_opts_(GetSessionOptions(config)),
         allocator_{} {
     {
@@ -53,13 +61,12 @@ class OfflineWhisperModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
         allocator_{} {
-    debug_ = config_.debug;
     {
       auto buf = ReadFile(mgr, config.whisper.encoder);
       InitEncoder(buf.data(), buf.size());
@@ -71,12 +78,12 @@ class OfflineWhisperModel::Impl {
     }
   }
 
-  Impl(AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const SpokenLanguageIdentificationConfig &config)
       : lid_config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
         allocator_{} {
-    debug_ = config_.debug;
     {
       auto buf = ReadFile(mgr, config.whisper.encoder);
       InitEncoder(buf.data(), buf.size());
@@ -87,7 +94,6 @@ class OfflineWhisperModel::Impl {
       InitDecoder(buf.data(), buf.size());
     }
   }
-#endif
 
   std::pair<Ort::Value, Ort::Value> ForwardEncoder(Ort::Value features) {
     auto encoder_out = encoder_sess_->Run(
@@ -163,7 +169,7 @@ class OfflineWhisperModel::Impl {
       }
     }
 
-    if (debug_) {
+    if (config_.debug) {
       SHERPA_ONNX_LOGE("Detected language: %s",
                        GetID2Lang().at(lang_id).c_str());
     }
@@ -191,7 +197,7 @@ class OfflineWhisperModel::Impl {
     return {std::move(n_layer_self_k_cache), std::move(n_layer_self_v_cache)};
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   const std::vector<int64_t> &GetInitialTokens() const { return sot_sequence_; }
 
@@ -236,11 +242,15 @@ class OfflineWhisperModel::Impl {
 
     // get meta data
     Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
-    if (debug_) {
+    if (config_.debug) {
       std::ostringstream os;
       os << "---encoder---\n";
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -293,7 +303,6 @@ class OfflineWhisperModel::Impl {
  private:
   OfflineModelConfig config_;
   SpokenLanguageIdentificationConfig lid_config_;
-  bool debug_ = false;
   Ort::Env env_;
   Ort::SessionOptions sess_opts_;
   Ort::AllocatorWithDefaultOptions allocator_;
@@ -342,17 +351,16 @@ OfflineWhisperModel::OfflineWhisperModel(
     const SpokenLanguageIdentificationConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OfflineWhisperModel::OfflineWhisperModel(AAssetManager *mgr,
+template <typename Manager>
+OfflineWhisperModel::OfflineWhisperModel(Manager *mgr,
                                          const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
 
+template <typename Manager>
 OfflineWhisperModel::OfflineWhisperModel(
-    AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config)
+    Manager *mgr, const SpokenLanguageIdentificationConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
 
-#endif
-
 OfflineWhisperModel::~OfflineWhisperModel() = default;
 
 std::pair<Ort::Value, Ort::Value> OfflineWhisperModel::ForwardEncoder(
@@ -457,4 +465,21 @@ void OfflineWhisperModel::NormalizeFeatures(float *features, int32_t num_frames,
   }
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineWhisperModel::OfflineWhisperModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+
+template OfflineWhisperModel::OfflineWhisperModel(
+    AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineWhisperModel::OfflineWhisperModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+
+template OfflineWhisperModel::OfflineWhisperModel(
+    NativeResourceManager *mgr,
+    const SpokenLanguageIdentificationConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-whisper-model.h b/sherpa-onnx/csrc/offline-whisper-model.h
index 866714bc54..60d76928cd 100644
--- a/sherpa-onnx/csrc/offline-whisper-model.h
+++ b/sherpa-onnx/csrc/offline-whisper-model.h
@@ -11,11 +11,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-model-config.h"
 #include "sherpa-onnx/csrc/spoken-language-identification.h"
@@ -29,11 +24,12 @@ class OfflineWhisperModel {
   explicit OfflineWhisperModel(
       const SpokenLanguageIdentificationConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineWhisperModel(AAssetManager *mgr, const OfflineModelConfig &config);
-  OfflineWhisperModel(AAssetManager *mgr,
+  template <typename Manager>
+  OfflineWhisperModel(Manager *mgr, const OfflineModelConfig &config);
+
+  template <typename Manager>
+  OfflineWhisperModel(Manager *mgr,
                       const SpokenLanguageIdentificationConfig &config);
-#endif
 
   ~OfflineWhisperModel();
 
diff --git a/sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc b/sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc
index 8a2e80dc21..7ddf6d9b3a 100644
--- a/sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc
+++ b/sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc
@@ -47,7 +47,7 @@ class OfflineZipformerAudioTaggingModel::Impl {
 
   int32_t NumEventClasses() const { return num_event_classes_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
diff --git a/sherpa-onnx/csrc/offline-zipformer-ctc-model.cc b/sherpa-onnx/csrc/offline-zipformer-ctc-model.cc
index 8db9439e44..8cfa30c268 100644
--- a/sherpa-onnx/csrc/offline-zipformer-ctc-model.cc
+++ b/sherpa-onnx/csrc/offline-zipformer-ctc-model.cc
@@ -6,6 +6,15 @@
 
 #include <string>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -25,8 +34,8 @@ class OfflineZipformerCtcModel::Impl {
     Init(buf.data(), buf.size());
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OfflineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OfflineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -34,7 +43,6 @@ class OfflineZipformerCtcModel::Impl {
     auto buf = ReadFile(mgr, config_.zipformer_ctc.model);
     Init(buf.data(), buf.size());
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   Ort::Value features_length) {
@@ -48,7 +56,7 @@ class OfflineZipformerCtcModel::Impl {
   int32_t VocabSize() const { return vocab_size_; }
   int32_t SubsamplingFactor() const { return 4; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void Init(void *model_data, size_t model_data_length) {
@@ -64,7 +72,11 @@ class OfflineZipformerCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     }
 
     // get vocab size from the output[0].shape, which is (N, T, vocab_size)
@@ -93,11 +105,10 @@ OfflineZipformerCtcModel::OfflineZipformerCtcModel(
     const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OfflineZipformerCtcModel::OfflineZipformerCtcModel(
-    AAssetManager *mgr, const OfflineModelConfig &config)
+    Manager *mgr, const OfflineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OfflineZipformerCtcModel::~OfflineZipformerCtcModel() = default;
 
@@ -118,4 +129,14 @@ int32_t OfflineZipformerCtcModel::SubsamplingFactor() const {
   return impl_->SubsamplingFactor();
 }
 
+#if __ANDROID_API__ >= 9
+template OfflineZipformerCtcModel::OfflineZipformerCtcModel(
+    AAssetManager *mgr, const OfflineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OfflineZipformerCtcModel::OfflineZipformerCtcModel(
+    NativeResourceManager *mgr, const OfflineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-zipformer-ctc-model.h b/sherpa-onnx/csrc/offline-zipformer-ctc-model.h
index c4e8356369..f76f96d549 100644
--- a/sherpa-onnx/csrc/offline-zipformer-ctc-model.h
+++ b/sherpa-onnx/csrc/offline-zipformer-ctc-model.h
@@ -7,11 +7,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-model-config.h"
@@ -28,10 +23,8 @@ class OfflineZipformerCtcModel : public OfflineCtcModel {
  public:
   explicit OfflineZipformerCtcModel(const OfflineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OfflineZipformerCtcModel(AAssetManager *mgr,
-                           const OfflineModelConfig &config);
-#endif
+  template <typename Manager>
+  OfflineZipformerCtcModel(Manager *mgr, const OfflineModelConfig &config);
 
   ~OfflineZipformerCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/online-cnn-bilstm-model.cc b/sherpa-onnx/csrc/online-cnn-bilstm-model.cc
index ce8da377e6..f4fb3c8f91 100644
--- a/sherpa-onnx/csrc/online-cnn-bilstm-model.cc
+++ b/sherpa-onnx/csrc/online-cnn-bilstm-model.cc
@@ -47,7 +47,7 @@ class OnlineCNNBiLSTMModel::Impl {
     return {std::move(ans[0]), std::move(ans[1])};
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   const OnlineCNNBiLSTMModelMetaData &GetModelMetadata() const {
     return meta_data_;
diff --git a/sherpa-onnx/csrc/online-conformer-transducer-model.cc b/sherpa-onnx/csrc/online-conformer-transducer-model.cc
index 7c252f5a44..519d1a935b 100644
--- a/sherpa-onnx/csrc/online-conformer-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-conformer-transducer-model.cc
@@ -17,6 +17,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -50,9 +54,9 @@ OnlineConformerTransducerModel::OnlineConformerTransducerModel(
   }
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineConformerTransducerModel::OnlineConformerTransducerModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : env_(ORT_LOGGING_LEVEL_ERROR),
       config_(config),
       sess_opts_(GetSessionOptions(config)),
@@ -72,7 +76,6 @@ OnlineConformerTransducerModel::OnlineConformerTransducerModel(
     InitJoiner(buf.data(), buf.size());
   }
 }
-#endif
 
 void OnlineConformerTransducerModel::InitEncoder(void *model_data,
                                                  size_t model_data_length) {
@@ -91,7 +94,11 @@ void OnlineConformerTransducerModel::InitEncoder(void *model_data,
     std::ostringstream os;
     os << "---encoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -121,7 +128,11 @@ void OnlineConformerTransducerModel::InitDecoder(void *model_data,
     std::ostringstream os;
     os << "---decoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -163,8 +174,11 @@ std::vector<Ort::Value> OnlineConformerTransducerModel::StackStates(
     conv_vec[i] = &states[i][1];
   }
 
-  Ort::Value attn = Cat(allocator_, attn_vec, 2);
-  Ort::Value conv = Cat(allocator_, conv_vec, 2);
+  auto allocator =
+      const_cast<OnlineConformerTransducerModel *>(this)->allocator_;
+
+  Ort::Value attn = Cat(allocator, attn_vec, 2);
+  Ort::Value conv = Cat(allocator, conv_vec, 2);
 
   std::vector<Ort::Value> ans;
   ans.reserve(2);
@@ -183,8 +197,11 @@ OnlineConformerTransducerModel::UnStackStates(
 
   std::vector<std::vector<Ort::Value>> ans(batch_size);
 
-  std::vector<Ort::Value> attn_vec = Unbind(allocator_, &states[0], 2);
-  std::vector<Ort::Value> conv_vec = Unbind(allocator_, &states[1], 2);
+  auto allocator =
+      const_cast<OnlineConformerTransducerModel *>(this)->allocator_;
+
+  std::vector<Ort::Value> attn_vec = Unbind(allocator, &states[0], 2);
+  std::vector<Ort::Value> conv_vec = Unbind(allocator, &states[1], 2);
 
   assert(attn_vec.size() == batch_size);
   assert(conv_vec.size() == batch_size);
@@ -267,4 +284,14 @@ Ort::Value OnlineConformerTransducerModel::RunJoiner(Ort::Value encoder_out,
   return std::move(logit[0]);
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineConformerTransducerModel::OnlineConformerTransducerModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineConformerTransducerModel::OnlineConformerTransducerModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-conformer-transducer-model.h b/sherpa-onnx/csrc/online-conformer-transducer-model.h
index bcf9e6eda5..5c901b87a4 100644
--- a/sherpa-onnx/csrc/online-conformer-transducer-model.h
+++ b/sherpa-onnx/csrc/online-conformer-transducer-model.h
@@ -10,11 +10,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
@@ -25,10 +20,8 @@ class OnlineConformerTransducerModel : public OnlineTransducerModel {
  public:
   explicit OnlineConformerTransducerModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineConformerTransducerModel(AAssetManager *mgr,
-                                 const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineConformerTransducerModel(Manager *mgr, const OnlineModelConfig &config);
 
   std::vector<Ort::Value> StackStates(
       const std::vector<std::vector<Ort::Value>> &states) const override;
diff --git a/sherpa-onnx/csrc/online-ctc-fst-decoder.cc b/sherpa-onnx/csrc/online-ctc-fst-decoder.cc
index 95e16ba753..dea9091832 100644
--- a/sherpa-onnx/csrc/online-ctc-fst-decoder.cc
+++ b/sherpa-onnx/csrc/online-ctc-fst-decoder.cc
@@ -13,14 +13,12 @@
 #include "fst/fstlib.h"
 #include "kaldi-decoder/csrc/decodable-ctc.h"
 #include "kaldifst/csrc/fstext-utils.h"
+#include "sherpa-onnx/csrc/fst-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-stream.h"
 
 namespace sherpa_onnx {
 
-// defined in ./offline-ctc-fst-decoder.cc
-fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename);
-
 OnlineCtcFstDecoder::OnlineCtcFstDecoder(
     const OnlineCtcFstDecoderConfig &config, int32_t blank_id)
     : config_(config), fst_(ReadGraph(config.graph)), blank_id_(blank_id) {
diff --git a/sherpa-onnx/csrc/online-ctc-model.cc b/sherpa-onnx/csrc/online-ctc-model.cc
index a3a071a722..649c6b5076 100644
--- a/sherpa-onnx/csrc/online-ctc-model.cc
+++ b/sherpa-onnx/csrc/online-ctc-model.cc
@@ -9,6 +9,15 @@
 #include <sstream>
 #include <string>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-nemo-ctc-model.h"
 #include "sherpa-onnx/csrc/online-wenet-ctc-model.h"
@@ -31,10 +40,9 @@ std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
   }
 }
 
-#if __ANDROID_API__ >= 9
-
+template <typename Manager>
 std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
-    AAssetManager *mgr, const OnlineModelConfig &config) {
+    Manager *mgr, const OnlineModelConfig &config) {
   if (!config.wenet_ctc.model.empty()) {
     return std::make_unique<OnlineWenetCtcModel>(mgr, config);
   } else if (!config.zipformer2_ctc.model.empty()) {
@@ -46,6 +54,15 @@ std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
     exit(-1);
   }
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-ctc-model.h b/sherpa-onnx/csrc/online-ctc-model.h
index 17721752dc..bd01bc543b 100644
--- a/sherpa-onnx/csrc/online-ctc-model.h
+++ b/sherpa-onnx/csrc/online-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 
@@ -25,10 +20,9 @@ class OnlineCtcModel {
   static std::unique_ptr<OnlineCtcModel> Create(
       const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   static std::unique_ptr<OnlineCtcModel> Create(
-      AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+      Manager *mgr, const OnlineModelConfig &config);
 
   // Return a list of tensors containing the initial states
   virtual std::vector<Ort::Value> GetInitStates() const = 0;
diff --git a/sherpa-onnx/csrc/online-lstm-transducer-model.cc b/sherpa-onnx/csrc/online-lstm-transducer-model.cc
index 094cc933c5..91b499fdf1 100644
--- a/sherpa-onnx/csrc/online-lstm-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-lstm-transducer-model.cc
@@ -16,6 +16,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -48,9 +52,9 @@ OnlineLstmTransducerModel::OnlineLstmTransducerModel(
   }
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineLstmTransducerModel::OnlineLstmTransducerModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : env_(ORT_LOGGING_LEVEL_ERROR),
       config_(config),
       sess_opts_(GetSessionOptions(config)),
@@ -70,7 +74,6 @@ OnlineLstmTransducerModel::OnlineLstmTransducerModel(
     InitJoiner(buf.data(), buf.size());
   }
 }
-#endif
 
 void OnlineLstmTransducerModel::InitEncoder(void *model_data,
                                             size_t model_data_length) {
@@ -89,7 +92,11 @@ void OnlineLstmTransducerModel::InitEncoder(void *model_data,
     std::ostringstream os;
     os << "---encoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -158,9 +165,10 @@ std::vector<Ort::Value> OnlineLstmTransducerModel::StackStates(
     h_buf[i] = &states[i][0];
     c_buf[i] = &states[i][1];
   }
+  auto allocator = const_cast<OnlineLstmTransducerModel *>(this)->allocator_;
 
-  Ort::Value h = Cat(allocator_, h_buf, 1);
-  Ort::Value c = Cat(allocator_, c_buf, 1);
+  Ort::Value h = Cat(allocator, h_buf, 1);
+  Ort::Value c = Cat(allocator, c_buf, 1);
 
   std::vector<Ort::Value> ans;
   ans.reserve(2);
@@ -177,8 +185,10 @@ std::vector<std::vector<Ort::Value>> OnlineLstmTransducerModel::UnStackStates(
 
   std::vector<std::vector<Ort::Value>> ans(batch_size);
 
-  std::vector<Ort::Value> h_vec = Unbind(allocator_, &states[0], 1);
-  std::vector<Ort::Value> c_vec = Unbind(allocator_, &states[1], 1);
+  auto allocator = const_cast<OnlineLstmTransducerModel *>(this)->allocator_;
+
+  std::vector<Ort::Value> h_vec = Unbind(allocator, &states[0], 1);
+  std::vector<Ort::Value> c_vec = Unbind(allocator, &states[1], 1);
 
   assert(h_vec.size() == batch_size);
   assert(c_vec.size() == batch_size);
@@ -258,4 +268,14 @@ Ort::Value OnlineLstmTransducerModel::RunJoiner(Ort::Value encoder_out,
   return std::move(logit[0]);
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineLstmTransducerModel::OnlineLstmTransducerModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineLstmTransducerModel::OnlineLstmTransducerModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-lstm-transducer-model.h b/sherpa-onnx/csrc/online-lstm-transducer-model.h
index 24119f240d..64ade36d02 100644
--- a/sherpa-onnx/csrc/online-lstm-transducer-model.h
+++ b/sherpa-onnx/csrc/online-lstm-transducer-model.h
@@ -9,11 +9,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
@@ -24,10 +19,8 @@ class OnlineLstmTransducerModel : public OnlineTransducerModel {
  public:
   explicit OnlineLstmTransducerModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineLstmTransducerModel(AAssetManager *mgr,
-                            const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineLstmTransducerModel(Manager *mgr, const OnlineModelConfig &config);
 
   std::vector<Ort::Value> StackStates(
       const std::vector<std::vector<Ort::Value>> &states) const override;
diff --git a/sherpa-onnx/csrc/online-nemo-ctc-model.cc b/sherpa-onnx/csrc/online-nemo-ctc-model.cc
index d93ff73b1f..716c7ee7e6 100644
--- a/sherpa-onnx/csrc/online-nemo-ctc-model.cc
+++ b/sherpa-onnx/csrc/online-nemo-ctc-model.cc
@@ -13,6 +13,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -36,8 +40,8 @@ class OnlineNeMoCtcModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OnlineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OnlineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -47,7 +51,6 @@ class OnlineNeMoCtcModel::Impl {
       Init(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value x,
                                   std::vector<Ort::Value> states) {
@@ -102,7 +105,7 @@ class OnlineNeMoCtcModel::Impl {
 
   int32_t ChunkShift() const { return chunk_shift_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   // Return a vector containing 3 tensors
   // - cache_last_channel
@@ -119,7 +122,7 @@ class OnlineNeMoCtcModel::Impl {
   }
 
   std::vector<Ort::Value> StackStates(
-      std::vector<std::vector<Ort::Value>> states) const {
+      std::vector<std::vector<Ort::Value>> states) {
     int32_t batch_size = static_cast<int32_t>(states.size());
     if (batch_size == 1) {
       return std::move(states[0]);
@@ -157,6 +160,8 @@ class OnlineNeMoCtcModel::Impl {
       std::vector<Ort::Value> states) const {
     assert(states.size() == 3);
 
+    auto allocator = const_cast<Impl *>(this)->allocator_;
+
     std::vector<std::vector<Ort::Value>> ans;
 
     auto shape = states[0].GetTensorTypeAndShapeInfo().GetShape();
@@ -171,9 +176,9 @@ class OnlineNeMoCtcModel::Impl {
     for (int32_t i = 0; i != 3; ++i) {
       std::vector<Ort::Value> v;
       if (i == 2) {
-        v = Unbind<int64_t>(allocator_, &states[i], 0);
+        v = Unbind<int64_t>(allocator, &states[i], 0);
       } else {
-        v = Unbind(allocator_, &states[i], 0);
+        v = Unbind(allocator, &states[i], 0);
       }
 
       assert(v.size() == batch_size);
@@ -200,7 +205,11 @@ class OnlineNeMoCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
-      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -284,11 +293,10 @@ class OnlineNeMoCtcModel::Impl {
 OnlineNeMoCtcModel::OnlineNeMoCtcModel(const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OnlineNeMoCtcModel::OnlineNeMoCtcModel(AAssetManager *mgr,
+template <typename Manager>
+OnlineNeMoCtcModel::OnlineNeMoCtcModel(Manager *mgr,
                                        const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OnlineNeMoCtcModel::~OnlineNeMoCtcModel() = default;
 
@@ -321,4 +329,14 @@ std::vector<std::vector<Ort::Value>> OnlineNeMoCtcModel::UnStackStates(
   return impl_->UnStackStates(std::move(states));
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineNeMoCtcModel::OnlineNeMoCtcModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineNeMoCtcModel::OnlineNeMoCtcModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-nemo-ctc-model.h b/sherpa-onnx/csrc/online-nemo-ctc-model.h
index c8dd182e8b..4e5f820b60 100644
--- a/sherpa-onnx/csrc/online-nemo-ctc-model.h
+++ b/sherpa-onnx/csrc/online-nemo-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-ctc-model.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
@@ -23,9 +18,8 @@ class OnlineNeMoCtcModel : public OnlineCtcModel {
  public:
   explicit OnlineNeMoCtcModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineNeMoCtcModel(AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineNeMoCtcModel(Manager *mgr, const OnlineModelConfig &config);
 
   ~OnlineNeMoCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/online-paraformer-model.cc b/sherpa-onnx/csrc/online-paraformer-model.cc
index 9397ff75bd..b21bb9bcc9 100644
--- a/sherpa-onnx/csrc/online-paraformer-model.cc
+++ b/sherpa-onnx/csrc/online-paraformer-model.cc
@@ -13,6 +13,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -38,8 +42,8 @@ class OnlineParaformerModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OnlineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OnlineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -54,7 +58,6 @@ class OnlineParaformerModel::Impl {
       InitDecoder(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> ForwardEncoder(Ort::Value features,
                                          Ort::Value features_length) {
@@ -105,7 +108,7 @@ class OnlineParaformerModel::Impl {
 
   const std::vector<float> &InverseStdDev() const { return inv_stddev_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
  private:
   void InitEncoder(void *model_data, size_t model_data_length) {
@@ -123,7 +126,11 @@ class OnlineParaformerModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
-      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -191,11 +198,10 @@ class OnlineParaformerModel::Impl {
 OnlineParaformerModel::OnlineParaformerModel(const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OnlineParaformerModel::OnlineParaformerModel(AAssetManager *mgr,
+template <typename Manager>
+OnlineParaformerModel::OnlineParaformerModel(Manager *mgr,
                                              const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OnlineParaformerModel::~OnlineParaformerModel() = default;
 
@@ -246,4 +252,14 @@ OrtAllocator *OnlineParaformerModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineParaformerModel::OnlineParaformerModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineParaformerModel::OnlineParaformerModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-paraformer-model.h b/sherpa-onnx/csrc/online-paraformer-model.h
index 3c018a72d8..cbf2d71579 100644
--- a/sherpa-onnx/csrc/online-paraformer-model.h
+++ b/sherpa-onnx/csrc/online-paraformer-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 
@@ -22,9 +17,8 @@ class OnlineParaformerModel {
  public:
   explicit OnlineParaformerModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineParaformerModel(AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineParaformerModel(Manager *mgr, const OnlineModelConfig &config);
 
   ~OnlineParaformerModel();
 
diff --git a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
index 76452138dd..797d90f0cb 100644
--- a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
@@ -34,13 +34,14 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src,
   r.tokens.reserve(src.tokens.size());
   r.timestamps.reserve(src.tokens.size());
 
+  std::string text;
   for (auto i : src.tokens) {
     auto sym = sym_table[i];
 
-    r.text.append(sym);
+    text.append(sym);
 
     if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
-      // for byte bpe models
+      // for bpe models with byte_fallback
       // (but don't rewrite printable characters 0x20..0x7e,
       //  which collide with standard BPE units)
       std::ostringstream os;
@@ -52,6 +53,12 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src,
     r.tokens.push_back(std::move(sym));
   }
 
+  if (sym_table.IsByteBpe()) {
+    text = sym_table.DecodeByteBpe(text);
+  }
+
+  r.text = std::move(text);
+
   float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
   for (auto t : src.timestamps) {
     float time = frame_shift_s * t;
@@ -88,8 +95,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
     InitDecoder();
   }
 
-#if __ANDROID_API__ >= 9
-  explicit OnlineRecognizerCtcImpl(AAssetManager *mgr,
+  template <typename Manager>
+  explicit OnlineRecognizerCtcImpl(Manager *mgr,
                                    const OnlineRecognizerConfig &config)
       : OnlineRecognizerImpl(mgr, config),
         config_(config),
@@ -104,7 +111,6 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
 
     InitDecoder();
   }
-#endif
 
   std::unique_ptr<OnlineStream> CreateStream() const override {
     auto stream = std::make_unique<OnlineStream>(config_.feat_config);
diff --git a/sherpa-onnx/csrc/online-recognizer-impl.cc b/sherpa-onnx/csrc/online-recognizer-impl.cc
index 399dab49e4..652ed2110c 100644
--- a/sherpa-onnx/csrc/online-recognizer-impl.cc
+++ b/sherpa-onnx/csrc/online-recognizer-impl.cc
@@ -4,15 +4,18 @@
 
 #include "sherpa-onnx/csrc/online-recognizer-impl.h"
 
+#include <strstream>
 #include <utility>
 
 #if __ANDROID_API__ >= 9
-#include <strstream>
-
 #include "android/asset_manager.h"
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "fst/extensions/far/far.h"
 #include "kaldifst/csrc/kaldi-fst-io.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -30,9 +33,13 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
   if (!config.model_config.transducer.encoder.empty()) {
     Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
 
+    Ort::SessionOptions sess_opts;
+    sess_opts.SetIntraOpNumThreads(1);
+    sess_opts.SetInterOpNumThreads(1);
+
     auto decoder_model = ReadFile(config.model_config.transducer.decoder);
-    auto sess = std::make_unique<Ort::Session>(
-        env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{});
+    auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
+                                               decoder_model.size(), sess_opts);
 
     size_t node_count = sess->GetOutputCount();
 
@@ -57,15 +64,19 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
   exit(-1);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
-    AAssetManager *mgr, const OnlineRecognizerConfig &config) {
+    Manager *mgr, const OnlineRecognizerConfig &config) {
   if (!config.model_config.transducer.encoder.empty()) {
     Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
 
+    Ort::SessionOptions sess_opts;
+    sess_opts.SetIntraOpNumThreads(1);
+    sess_opts.SetInterOpNumThreads(1);
+
     auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder);
-    auto sess = std::make_unique<Ort::Session>(
-        env, decoder_model.data(), decoder_model.size(), Ort::SessionOptions{});
+    auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
+                                               decoder_model.size(), sess_opts);
 
     size_t node_count = sess->GetOutputCount();
 
@@ -89,7 +100,6 @@ std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
   SHERPA_ONNX_LOGE("Please specify a model");
   exit(-1);
 }
-#endif
 
 OnlineRecognizerImpl::OnlineRecognizerImpl(const OnlineRecognizerConfig &config)
     : config_(config) {
@@ -135,8 +145,8 @@ OnlineRecognizerImpl::OnlineRecognizerImpl(const OnlineRecognizerConfig &config)
   }
 }
 
-#if __ANDROID_API__ >= 9
-OnlineRecognizerImpl::OnlineRecognizerImpl(AAssetManager *mgr,
+template <typename Manager>
+OnlineRecognizerImpl::OnlineRecognizerImpl(Manager *mgr,
                                            const OnlineRecognizerConfig &config)
     : config_(config) {
   if (!config.rule_fsts.empty()) {
@@ -181,10 +191,11 @@ OnlineRecognizerImpl::OnlineRecognizerImpl(AAssetManager *mgr,
     }    // for (const auto &f : files)
   }      // if (!config.rule_fars.empty())
 }
-#endif
 
 std::string OnlineRecognizerImpl::ApplyInverseTextNormalization(
     std::string text) const {
+  text = RemoveInvalidUtf8Sequences(text);
+
   if (!itn_list_.empty()) {
     for (const auto &tn : itn_list_) {
       text = tn->Normalize(text);
@@ -194,4 +205,20 @@ std::string OnlineRecognizerImpl::ApplyInverseTextNormalization(
   return text;
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineRecognizerImpl::OnlineRecognizerImpl(
+    AAssetManager *mgr, const OnlineRecognizerConfig &config);
+
+template std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
+    AAssetManager *mgr, const OnlineRecognizerConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineRecognizerImpl::OnlineRecognizerImpl(
+    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);
+
+template std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
+    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-recognizer-impl.h b/sherpa-onnx/csrc/online-recognizer-impl.h
index 8b569f3af2..b7bda7862c 100644
--- a/sherpa-onnx/csrc/online-recognizer-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-impl.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "kaldifst/csrc/text-normalizer.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
@@ -28,13 +23,12 @@ class OnlineRecognizerImpl {
   static std::unique_ptr<OnlineRecognizerImpl> Create(
       const OnlineRecognizerConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineRecognizerImpl(AAssetManager *mgr,
-                       const OnlineRecognizerConfig &config);
+  template <typename Manager>
+  OnlineRecognizerImpl(Manager *mgr, const OnlineRecognizerConfig &config);
 
+  template <typename Manager>
   static std::unique_ptr<OnlineRecognizerImpl> Create(
-      AAssetManager *mgr, const OnlineRecognizerConfig &config);
-#endif
+      Manager *mgr, const OnlineRecognizerConfig &config);
 
   virtual ~OnlineRecognizerImpl() = default;
 
diff --git a/sherpa-onnx/csrc/online-recognizer-paraformer-impl.h b/sherpa-onnx/csrc/online-recognizer-paraformer-impl.h
index 8ef66ea745..1e02fe5199 100644
--- a/sherpa-onnx/csrc/online-recognizer-paraformer-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-paraformer-impl.h
@@ -120,8 +120,8 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl {
     config_.feat_config.normalize_samples = false;
   }
 
-#if __ANDROID_API__ >= 9
-  explicit OnlineRecognizerParaformerImpl(AAssetManager *mgr,
+  template <typename Manager>
+  explicit OnlineRecognizerParaformerImpl(Manager *mgr,
                                           const OnlineRecognizerConfig &config)
       : OnlineRecognizerImpl(mgr, config),
         config_(config),
@@ -138,7 +138,7 @@ class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl {
     // [-32768, 32767], so we set normalize_samples to false
     config_.feat_config.normalize_samples = false;
   }
-#endif
+
   OnlineRecognizerParaformerImpl(const OnlineRecognizerParaformerImpl &) =
       delete;
 
diff --git a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
index 475a901859..e6fd0b5051 100644
--- a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
@@ -14,11 +14,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-lm.h"
@@ -43,13 +38,14 @@ OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
   r.tokens.reserve(src.tokens.size());
   r.timestamps.reserve(src.tokens.size());
 
+  std::string text;
   for (auto i : src.tokens) {
     auto sym = sym_table[i];
 
-    r.text.append(sym);
+    text.append(sym);
 
     if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
-      // for byte bpe models
+      // for bpe models with byte_fallback
       // (but don't rewrite printable characters 0x20..0x7e,
       //  which collide with standard BPE units)
       std::ostringstream os;
@@ -61,6 +57,12 @@ OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
     r.tokens.push_back(std::move(sym));
   }
 
+  if (sym_table.IsByteBpe()) {
+    text = sym_table.DecodeByteBpe(text);
+  }
+
+  r.text = std::move(text);
+
   float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
   for (auto t : src.timestamps) {
     float time = frame_shift_s * t;
@@ -130,8 +132,8 @@ class OnlineRecognizerTransducerImpl : public OnlineRecognizerImpl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  explicit OnlineRecognizerTransducerImpl(AAssetManager *mgr,
+  template <typename Manager>
+  explicit OnlineRecognizerTransducerImpl(Manager *mgr,
                                           const OnlineRecognizerConfig &config)
       : OnlineRecognizerImpl(mgr, config),
         config_(config),
@@ -178,7 +180,6 @@ class OnlineRecognizerTransducerImpl : public OnlineRecognizerImpl {
       exit(-1);
     }
   }
-#endif
 
   std::unique_ptr<OnlineStream> CreateStream() const override {
     auto stream =
@@ -429,8 +430,8 @@ class OnlineRecognizerTransducerImpl : public OnlineRecognizerImpl {
         hotwords_, config_.hotwords_score, boost_scores_);
   }
 
-#if __ANDROID_API__ >= 9
-  void InitHotwords(AAssetManager *mgr) {
+  template <typename Manager>
+  void InitHotwords(Manager *mgr) {
     // each line in hotwords_file contains space-separated words
 
     auto buf = ReadFile(mgr, config_.hotwords_file);
@@ -452,7 +453,6 @@ class OnlineRecognizerTransducerImpl : public OnlineRecognizerImpl {
     hotwords_graph_ = std::make_shared<ContextGraph>(
         hotwords_, config_.hotwords_score, boost_scores_);
   }
-#endif
 
   void InitHotwordsFromBufStr() {
     // each line in hotwords_file contains space-separated words
diff --git a/sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h b/sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h
index 0a09fdb019..a3f2756c87 100644
--- a/sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h
@@ -16,11 +16,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-recognizer-impl.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
@@ -65,9 +60,9 @@ class OnlineRecognizerTransducerNeMoImpl : public OnlineRecognizerImpl {
     PostInit();
   }
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   explicit OnlineRecognizerTransducerNeMoImpl(
-      AAssetManager *mgr, const OnlineRecognizerConfig &config)
+      Manager *mgr, const OnlineRecognizerConfig &config)
       : OnlineRecognizerImpl(mgr, config),
         config_(config),
         symbol_table_(mgr, config.model_config.tokens),
@@ -85,7 +80,6 @@ class OnlineRecognizerTransducerNeMoImpl : public OnlineRecognizerImpl {
 
     PostInit();
   }
-#endif
 
   std::unique_ptr<OnlineStream> CreateStream() const override {
     auto stream = std::make_unique<OnlineStream>(config_.feat_config);
diff --git a/sherpa-onnx/csrc/online-recognizer.cc b/sherpa-onnx/csrc/online-recognizer.cc
index c6b9399d84..4ccc939dae 100644
--- a/sherpa-onnx/csrc/online-recognizer.cc
+++ b/sherpa-onnx/csrc/online-recognizer.cc
@@ -13,12 +13,23 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/online-recognizer-impl.h"
 #include "sherpa-onnx/csrc/text-utils.h"
 
 namespace sherpa_onnx {
 
+namespace {
+
 /// Helper for `OnlineRecognizerResult::AsJsonString()`
 template <typename T>
 std::string VecToString(const std::vector<T> &vec, int32_t precision = 6) {
@@ -51,6 +62,8 @@ std::string VecToString<std::string>(const std::vector<std::string> &vec,
   return oss.str();
 }
 
+}  // namespace
+
 std::string OnlineRecognizerResult::AsJsonString() const {
   std::ostringstream os;
   os << "{ ";
@@ -193,11 +206,10 @@ std::string OnlineRecognizerConfig::ToString() const {
 OnlineRecognizer::OnlineRecognizer(const OnlineRecognizerConfig &config)
     : impl_(OnlineRecognizerImpl::Create(config)) {}
 
-#if __ANDROID_API__ >= 9
-OnlineRecognizer::OnlineRecognizer(AAssetManager *mgr,
+template <typename Manager>
+OnlineRecognizer::OnlineRecognizer(Manager *mgr,
                                    const OnlineRecognizerConfig &config)
     : impl_(OnlineRecognizerImpl::Create(mgr, config)) {}
-#endif
 
 OnlineRecognizer::~OnlineRecognizer() = default;
 
@@ -234,4 +246,14 @@ bool OnlineRecognizer::IsEndpoint(OnlineStream *s) const {
 
 void OnlineRecognizer::Reset(OnlineStream *s) const { impl_->Reset(s); }
 
+#if __ANDROID_API__ >= 9
+template OnlineRecognizer::OnlineRecognizer(
+    AAssetManager *mgr, const OnlineRecognizerConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineRecognizer::OnlineRecognizer(
+    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-recognizer.h b/sherpa-onnx/csrc/online-recognizer.h
index 45e0f42374..8854fbd224 100644
--- a/sherpa-onnx/csrc/online-recognizer.h
+++ b/sherpa-onnx/csrc/online-recognizer.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/endpoint.h"
 #include "sherpa-onnx/csrc/features.h"
 #include "sherpa-onnx/csrc/online-ctc-fst-decoder-config.h"
@@ -149,9 +144,8 @@ class OnlineRecognizer {
  public:
   explicit OnlineRecognizer(const OnlineRecognizerConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineRecognizer(AAssetManager *mgr, const OnlineRecognizerConfig &config);
-#endif
+  template <typename Manager>
+  OnlineRecognizer(Manager *mgr, const OnlineRecognizerConfig &config);
 
   ~OnlineRecognizer();
 
diff --git a/sherpa-onnx/csrc/online-rnn-lm.cc b/sherpa-onnx/csrc/online-rnn-lm.cc
index 1b13d3a2da..2a44ddbe08 100644
--- a/sherpa-onnx/csrc/online-rnn-lm.cc
+++ b/sherpa-onnx/csrc/online-rnn-lm.cc
@@ -5,10 +5,10 @@
 
 #include "sherpa-onnx/csrc/online-rnn-lm.h"
 
+#include <algorithm>
 #include <string>
 #include <utility>
 #include <vector>
-#include <algorithm>
 
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/macros.h"
@@ -53,49 +53,49 @@ class OnlineRnnLM::Impl {
 
   // classic rescore function
   void ComputeLMScore(float scale, int32_t context_size,
-                              std::vector<Hypotheses> *hyps) {
-      Ort::AllocatorWithDefaultOptions allocator;
-
-      for (auto &hyp : *hyps) {
-        for (auto &h_m : hyp) {
-          auto &h = h_m.second;
-          auto &ys = h.ys;
-          const int32_t token_num_in_chunk =
-              ys.size() - context_size - h.cur_scored_pos - 1;
-
-          if (token_num_in_chunk < 1) {
-            continue;
-          }
-
-          if (h.nn_lm_states.empty()) {
-            h.nn_lm_states = Convert(GetInitStates());
-          }
-
-          if (token_num_in_chunk >= h.lm_rescore_min_chunk) {
-            std::array<int64_t, 2> x_shape{1, token_num_in_chunk};
-
-            Ort::Value x = Ort::Value::CreateTensor<int64_t>(
-                allocator, x_shape.data(), x_shape.size());
-            int64_t *p_x = x.GetTensorMutableData<int64_t>();
-            std::copy(ys.begin() + context_size + h.cur_scored_pos,
-                               ys.end() - 1, p_x);
-
-            // streaming forward by NN LM
-            auto out = ScoreToken(std::move(x),
-                               Convert(std::move(h.nn_lm_states)));
-
-            // update NN LM score in hyp
-            const float *p_nll = out.first.GetTensorData<float>();
-            h.lm_log_prob = -scale * (*p_nll);
-
-            // update NN LM states in hyp
-            h.nn_lm_states = Convert(std::move(out.second));
-
-            h.cur_scored_pos += token_num_in_chunk;
-          }
+                      std::vector<Hypotheses> *hyps) {
+    Ort::AllocatorWithDefaultOptions allocator;
+
+    for (auto &hyp : *hyps) {
+      for (auto &h_m : hyp) {
+        auto &h = h_m.second;
+        auto &ys = h.ys;
+        const int32_t token_num_in_chunk =
+            ys.size() - context_size - h.cur_scored_pos - 1;
+
+        if (token_num_in_chunk < 1) {
+          continue;
+        }
+
+        if (h.nn_lm_states.empty()) {
+          h.nn_lm_states = Convert(GetInitStates());
+        }
+
+        if (token_num_in_chunk >= h.lm_rescore_min_chunk) {
+          std::array<int64_t, 2> x_shape{1, token_num_in_chunk};
+
+          Ort::Value x = Ort::Value::CreateTensor<int64_t>(
+              allocator, x_shape.data(), x_shape.size());
+          int64_t *p_x = x.GetTensorMutableData<int64_t>();
+          std::copy(ys.begin() + context_size + h.cur_scored_pos, ys.end() - 1,
+                    p_x);
+
+          // streaming forward by NN LM
+          auto out =
+              ScoreToken(std::move(x), Convert(std::move(h.nn_lm_states)));
+
+          // update NN LM score in hyp
+          const float *p_nll = out.first.GetTensorData<float>();
+          h.lm_log_prob = -scale * (*p_nll);
+
+          // update NN LM states in hyp
+          h.nn_lm_states = Convert(std::move(out.second));
+
+          h.cur_scored_pos += token_num_in_chunk;
         }
       }
     }
+  }
 
   std::pair<Ort::Value, std::vector<Ort::Value>> ScoreToken(
       Ort::Value x, std::vector<Ort::Value> states) {
@@ -125,7 +125,7 @@ class OnlineRnnLM::Impl {
   }
 
   // get init states for classic rescore
-  std::vector<Ort::Value> GetInitStates() const {
+  std::vector<Ort::Value> GetInitStates() {
     std::vector<Ort::Value> ans;
     ans.reserve(init_states_.size());
 
@@ -226,7 +226,7 @@ std::pair<Ort::Value, std::vector<Ort::Value>> OnlineRnnLM::ScoreToken(
 
 // classic rescore scores
 void OnlineRnnLM::ComputeLMScore(float scale, int32_t context_size,
-                              std::vector<Hypotheses> *hyps) {
+                                 std::vector<Hypotheses> *hyps) {
   return impl_->ComputeLMScore(scale, context_size, hyps);
 }
 
@@ -235,5 +235,4 @@ void OnlineRnnLM::ComputeLMScoreSF(float scale, Hypothesis *hyp) {
   return impl_->ComputeLMScoreSF(scale, hyp);
 }
 
-
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-transducer-model.cc b/sherpa-onnx/csrc/online-transducer-model.cc
index 16577dd491..9ebe40372a 100644
--- a/sherpa-onnx/csrc/online-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-transducer-model.cc
@@ -9,6 +9,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include <algorithm>
 #include <memory>
 #include <sstream>
@@ -49,13 +53,17 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
   if (debug) {
     std::ostringstream os;
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;
   auto model_type =
-      meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
-  if (!model_type) {
+      LookupCustomModelMetaData(meta_data, "model_type", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n"
         "Please make sure you are using the latest export-onnx.py from icefall "
@@ -63,16 +71,16 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
     return ModelType::kUnknown;
   }
 
-  if (model_type.get() == std::string("conformer")) {
+  if (model_type == "conformer") {
     return ModelType::kConformer;
-  } else if (model_type.get() == std::string("lstm")) {
+  } else if (model_type == "lstm") {
     return ModelType::kLstm;
-  } else if (model_type.get() == std::string("zipformer")) {
+  } else if (model_type == "zipformer") {
     return ModelType::kZipformer;
-  } else if (model_type.get() == std::string("zipformer2")) {
+  } else if (model_type == "zipformer2") {
     return ModelType::kZipformer2;
   } else {
-    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.get());
+    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
     return ModelType::kUnknown;
   }
 }
@@ -155,9 +163,9 @@ Ort::Value OnlineTransducerModel::BuildDecoderInput(
   return decoder_input;
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
-    AAssetManager *mgr, const OnlineModelConfig &config) {
+    Manager *mgr, const OnlineModelConfig &config) {
   if (!config.model_type.empty()) {
     const auto &model_type = config.model_type;
     if (model_type == "conformer") {
@@ -195,6 +203,15 @@ std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
   // unreachable code
   return nullptr;
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-transducer-model.h b/sherpa-onnx/csrc/online-transducer-model.h
index 3dacb4a505..f6404eccd9 100644
--- a/sherpa-onnx/csrc/online-transducer-model.h
+++ b/sherpa-onnx/csrc/online-transducer-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/hypothesis.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
@@ -30,10 +25,9 @@ class OnlineTransducerModel {
   static std::unique_ptr<OnlineTransducerModel> Create(
       const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   static std::unique_ptr<OnlineTransducerModel> Create(
-      AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+      Manager *mgr, const OnlineModelConfig &config);
 
   /** Stack a list of individual states into a batch.
    *
diff --git a/sherpa-onnx/csrc/online-transducer-nemo-model.cc b/sherpa-onnx/csrc/online-transducer-nemo-model.cc
index 4e12da44c8..73c23fe31a 100644
--- a/sherpa-onnx/csrc/online-transducer-nemo-model.cc
+++ b/sherpa-onnx/csrc/online-transducer-nemo-model.cc
@@ -20,6 +20,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/online-transducer-decoder.h"
@@ -54,8 +58,8 @@ class OnlineTransducerNeMoModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OnlineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OnlineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -75,7 +79,6 @@ class OnlineTransducerNeMoModel::Impl {
       InitJoiner(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> RunEncoder(Ort::Value features,
                                      std::vector<Ort::Value> states) {
@@ -197,7 +200,7 @@ class OnlineTransducerNeMoModel::Impl {
 
   int32_t VocabSize() const { return vocab_size_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   std::string FeatureNormalizationMethod() const { return normalize_type_; }
 
@@ -224,6 +227,8 @@ class OnlineTransducerNeMoModel::Impl {
 
     std::vector<Ort::Value> ans;
 
+    auto allocator = const_cast<Impl *>(this)->allocator_;
+
     // stack cache_last_channel
     std::vector<const Ort::Value *> buf(batch_size);
 
@@ -239,9 +244,9 @@ class OnlineTransducerNeMoModel::Impl {
 
       Ort::Value c{nullptr};
       if (i == 2) {
-        c = Cat<int64_t>(allocator_, buf, 0);
+        c = Cat<int64_t>(allocator, buf, 0);
       } else {
-        c = Cat(allocator_, buf, 0);
+        c = Cat(allocator, buf, 0);
       }
 
       ans.push_back(std::move(c));
@@ -251,7 +256,7 @@ class OnlineTransducerNeMoModel::Impl {
   }
 
   std::vector<std::vector<Ort::Value>> UnStackStates(
-      std::vector<Ort::Value> states) const {
+      std::vector<Ort::Value> states) {
     assert(states.size() == 3);
 
     std::vector<std::vector<Ort::Value>> ans;
@@ -300,7 +305,11 @@ class OnlineTransducerNeMoModel::Impl {
       std::ostringstream os;
       os << "---encoder---\n";
       PrintModelMetadata(os, meta_data);
-      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -458,11 +467,10 @@ OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
     const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OnlineTransducerNeMoModel::~OnlineTransducerNeMoModel() = default;
 
@@ -526,4 +534,14 @@ std::vector<std::vector<Ort::Value>> OnlineTransducerNeMoModel::UnStackStates(
   return impl_->UnStackStates(std::move(states));
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-transducer-nemo-model.h b/sherpa-onnx/csrc/online-transducer-nemo-model.h
index e12814cc06..98390bb155 100644
--- a/sherpa-onnx/csrc/online-transducer-nemo-model.h
+++ b/sherpa-onnx/csrc/online-transducer-nemo-model.h
@@ -11,11 +11,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 
@@ -28,10 +23,8 @@ class OnlineTransducerNeMoModel {
  public:
   explicit OnlineTransducerNeMoModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineTransducerNeMoModel(AAssetManager *mgr,
-                            const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineTransducerNeMoModel(Manager *mgr, const OnlineModelConfig &config);
 
   ~OnlineTransducerNeMoModel();
   // A list of 3 tensors:
diff --git a/sherpa-onnx/csrc/online-wenet-ctc-model.cc b/sherpa-onnx/csrc/online-wenet-ctc-model.cc
index 1b1605183b..bf468484cc 100644
--- a/sherpa-onnx/csrc/online-wenet-ctc-model.cc
+++ b/sherpa-onnx/csrc/online-wenet-ctc-model.cc
@@ -13,6 +13,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -33,8 +37,8 @@ class OnlineWenetCtcModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OnlineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OnlineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -44,7 +48,6 @@ class OnlineWenetCtcModel::Impl {
       Init(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value x,
                                   std::vector<Ort::Value> states) {
@@ -101,7 +104,7 @@ class OnlineWenetCtcModel::Impl {
     return config_.wenet_ctc.chunk_size * subsampling_factor_;
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   // Return a vector containing 3 tensors
   // - attn_cache
@@ -139,7 +142,11 @@ class OnlineWenetCtcModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
-      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
+      SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -212,11 +219,10 @@ class OnlineWenetCtcModel::Impl {
 OnlineWenetCtcModel::OnlineWenetCtcModel(const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-OnlineWenetCtcModel::OnlineWenetCtcModel(AAssetManager *mgr,
+template <typename Manager>
+OnlineWenetCtcModel::OnlineWenetCtcModel(Manager *mgr,
                                          const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OnlineWenetCtcModel::~OnlineWenetCtcModel() = default;
 
@@ -258,4 +264,14 @@ std::vector<std::vector<Ort::Value>> OnlineWenetCtcModel::UnStackStates(
   return ans;
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineWenetCtcModel::OnlineWenetCtcModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineWenetCtcModel::OnlineWenetCtcModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-wenet-ctc-model.h b/sherpa-onnx/csrc/online-wenet-ctc-model.h
index 1be1034cc5..28458b68b1 100644
--- a/sherpa-onnx/csrc/online-wenet-ctc-model.h
+++ b/sherpa-onnx/csrc/online-wenet-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-ctc-model.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
@@ -23,9 +18,8 @@ class OnlineWenetCtcModel : public OnlineCtcModel {
  public:
   explicit OnlineWenetCtcModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineWenetCtcModel(AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineWenetCtcModel(Manager *mgr, const OnlineModelConfig &config);
 
   ~OnlineWenetCtcModel() override;
 
diff --git a/sherpa-onnx/csrc/online-zipformer-transducer-model.cc b/sherpa-onnx/csrc/online-zipformer-transducer-model.cc
index 324b2b0886..e572feeda5 100644
--- a/sherpa-onnx/csrc/online-zipformer-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-zipformer-transducer-model.cc
@@ -17,6 +17,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -50,9 +54,9 @@ OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
   }
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : env_(ORT_LOGGING_LEVEL_ERROR),
       config_(config),
       sess_opts_(GetSessionOptions(config)),
@@ -72,7 +76,6 @@ OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
     InitJoiner(buf.data(), buf.size());
   }
 }
-#endif
 
 void OnlineZipformerTransducerModel::InitEncoder(void *model_data,
                                                  size_t model_data_length) {
@@ -91,7 +94,11 @@ void OnlineZipformerTransducerModel::InitEncoder(void *model_data,
     std::ostringstream os;
     os << "---encoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -111,15 +118,24 @@ void OnlineZipformerTransducerModel::InitEncoder(void *model_data,
       for (auto i : v) {
         os << i << " ";
       }
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     };
     print(encoder_dims_, "encoder_dims");
     print(attention_dims_, "attention_dims");
     print(num_encoder_layers_, "num_encoder_layers");
     print(cnn_module_kernels_, "cnn_module_kernels");
     print(left_context_len_, "left_context_len");
+#if __OHOS__
+    SHERPA_ONNX_LOGE("T: %{public}d", T_);
+    SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
+#else
     SHERPA_ONNX_LOGE("T: %d", T_);
     SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
+#endif
   }
 }
 
@@ -140,7 +156,11 @@ void OnlineZipformerTransducerModel::InitDecoder(void *model_data,
     std::ostringstream os;
     os << "---decoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -165,7 +185,11 @@ void OnlineZipformerTransducerModel::InitJoiner(void *model_data,
     std::ostringstream os;
     os << "---joiner---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 }
 
@@ -179,12 +203,15 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
   std::vector<Ort::Value> ans;
   ans.reserve(states[0].size());
 
+  auto allocator =
+      const_cast<OnlineZipformerTransducerModel *>(this)->allocator_;
+
   // cached_len
   for (int32_t i = 0; i != num_encoders; ++i) {
     for (int32_t n = 0; n != batch_size; ++n) {
       buf[n] = &states[n][i];
     }
-    auto v = Cat<int64_t>(allocator_, buf, 1);  // (num_layers, 1)
+    auto v = Cat<int64_t>(allocator, buf, 1);  // (num_layers, 1)
     ans.push_back(std::move(v));
   }
 
@@ -193,7 +220,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
     for (int32_t n = 0; n != batch_size; ++n) {
       buf[n] = &states[n][num_encoders + i];
     }
-    auto v = Cat(allocator_, buf, 1);  // (num_layers, 1, encoder_dims)
+    auto v = Cat(allocator, buf, 1);  // (num_layers, 1, encoder_dims)
     ans.push_back(std::move(v));
   }
 
@@ -203,7 +230,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
       buf[n] = &states[n][num_encoders * 2 + i];
     }
     // (num_layers, left_context_len, 1, attention_dims)
-    auto v = Cat(allocator_, buf, 2);
+    auto v = Cat(allocator, buf, 2);
     ans.push_back(std::move(v));
   }
 
@@ -213,7 +240,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
       buf[n] = &states[n][num_encoders * 3 + i];
     }
     // (num_layers, left_context_len, 1, attention_dims/2)
-    auto v = Cat(allocator_, buf, 2);
+    auto v = Cat(allocator, buf, 2);
     ans.push_back(std::move(v));
   }
 
@@ -223,7 +250,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
       buf[n] = &states[n][num_encoders * 4 + i];
     }
     // (num_layers, left_context_len, 1, attention_dims/2)
-    auto v = Cat(allocator_, buf, 2);
+    auto v = Cat(allocator, buf, 2);
     ans.push_back(std::move(v));
   }
 
@@ -233,7 +260,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
       buf[n] = &states[n][num_encoders * 5 + i];
     }
     // (num_layers, 1, encoder_dims, cnn_module_kernels-1)
-    auto v = Cat(allocator_, buf, 1);
+    auto v = Cat(allocator, buf, 1);
     ans.push_back(std::move(v));
   }
 
@@ -243,7 +270,7 @@ std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
       buf[n] = &states[n][num_encoders * 6 + i];
     }
     // (num_layers, 1, encoder_dims, cnn_module_kernels-1)
-    auto v = Cat(allocator_, buf, 1);
+    auto v = Cat(allocator, buf, 1);
     ans.push_back(std::move(v));
   }
 
@@ -258,12 +285,15 @@ OnlineZipformerTransducerModel::UnStackStates(
   int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];
   int32_t num_encoders = num_encoder_layers_.size();
 
+  auto allocator =
+      const_cast<OnlineZipformerTransducerModel *>(this)->allocator_;
+
   std::vector<std::vector<Ort::Value>> ans;
   ans.resize(batch_size);
 
   // cached_len
   for (int32_t i = 0; i != num_encoders; ++i) {
-    auto v = Unbind<int64_t>(allocator_, &states[i], 1);
+    auto v = Unbind<int64_t>(allocator, &states[i], 1);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -273,7 +303,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_avg
   for (int32_t i = num_encoders; i != 2 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 1);
+    auto v = Unbind(allocator, &states[i], 1);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -283,7 +313,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_key
   for (int32_t i = 2 * num_encoders; i != 3 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 2);
+    auto v = Unbind(allocator, &states[i], 2);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -293,7 +323,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_val
   for (int32_t i = 3 * num_encoders; i != 4 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 2);
+    auto v = Unbind(allocator, &states[i], 2);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -303,7 +333,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_val2
   for (int32_t i = 4 * num_encoders; i != 5 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 2);
+    auto v = Unbind(allocator, &states[i], 2);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -313,7 +343,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_conv1
   for (int32_t i = 5 * num_encoders; i != 6 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 1);
+    auto v = Unbind(allocator, &states[i], 1);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -323,7 +353,7 @@ OnlineZipformerTransducerModel::UnStackStates(
 
   // cached_conv2
   for (int32_t i = 6 * num_encoders; i != 7 * num_encoders; ++i) {
-    auto v = Unbind(allocator_, &states[i], 1);
+    auto v = Unbind(allocator, &states[i], 1);
     assert(v.size() == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -474,4 +504,14 @@ Ort::Value OnlineZipformerTransducerModel::RunJoiner(Ort::Value encoder_out,
   return std::move(logit[0]);
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-zipformer-transducer-model.h b/sherpa-onnx/csrc/online-zipformer-transducer-model.h
index b2b7da040a..9e4368a69c 100644
--- a/sherpa-onnx/csrc/online-zipformer-transducer-model.h
+++ b/sherpa-onnx/csrc/online-zipformer-transducer-model.h
@@ -9,11 +9,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
@@ -24,10 +19,8 @@ class OnlineZipformerTransducerModel : public OnlineTransducerModel {
  public:
   explicit OnlineZipformerTransducerModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineZipformerTransducerModel(AAssetManager *mgr,
-                                 const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineZipformerTransducerModel(Manager *mgr, const OnlineModelConfig &config);
 
   std::vector<Ort::Value> StackStates(
       const std::vector<std::vector<Ort::Value>> &states) const override;
diff --git a/sherpa-onnx/csrc/online-zipformer2-ctc-model.cc b/sherpa-onnx/csrc/online-zipformer2-ctc-model.cc
index 04699a56b5..298b90522c 100644
--- a/sherpa-onnx/csrc/online-zipformer2-ctc-model.cc
+++ b/sherpa-onnx/csrc/online-zipformer2-ctc-model.cc
@@ -15,6 +15,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -37,8 +41,8 @@ class OnlineZipformer2CtcModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const OnlineModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const OnlineModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -48,7 +52,6 @@ class OnlineZipformer2CtcModel::Impl {
       Init(buf.data(), buf.size());
     }
   }
-#endif
 
   std::vector<Ort::Value> Forward(Ort::Value features,
                                   std::vector<Ort::Value> states) {
@@ -70,7 +73,7 @@ class OnlineZipformer2CtcModel::Impl {
 
   int32_t ChunkShift() const { return decode_chunk_len_; }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   // Return a vector containing 3 tensors
   // - attn_cache
@@ -86,7 +89,7 @@ class OnlineZipformer2CtcModel::Impl {
   }
 
   std::vector<Ort::Value> StackStates(
-      std::vector<std::vector<Ort::Value>> states) const {
+      std::vector<std::vector<Ort::Value>> states) {
     int32_t batch_size = static_cast<int32_t>(states.size());
 
     std::vector<const Ort::Value *> buf(batch_size);
@@ -159,7 +162,7 @@ class OnlineZipformer2CtcModel::Impl {
   }
 
   std::vector<std::vector<Ort::Value>> UnStackStates(
-      std::vector<Ort::Value> states) const {
+      std::vector<Ort::Value> states) {
     int32_t m = std::accumulate(num_encoder_layers_.begin(),
                                 num_encoder_layers_.end(), 0);
     assert(states.size() == m * 6 + 2);
@@ -255,7 +258,11 @@ class OnlineZipformer2CtcModel::Impl {
       std::ostringstream os;
       os << "---zipformer2_ctc---\n";
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -415,11 +422,10 @@ OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
     const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 OnlineZipformer2CtcModel::~OnlineZipformer2CtcModel() = default;
 
@@ -458,4 +464,14 @@ std::vector<std::vector<Ort::Value>> OnlineZipformer2CtcModel::UnStackStates(
   return impl_->UnStackStates(std::move(states));
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-zipformer2-ctc-model.h b/sherpa-onnx/csrc/online-zipformer2-ctc-model.h
index 11b59e2bb9..32ddf2122d 100644
--- a/sherpa-onnx/csrc/online-zipformer2-ctc-model.h
+++ b/sherpa-onnx/csrc/online-zipformer2-ctc-model.h
@@ -8,11 +8,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-ctc-model.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
@@ -23,9 +18,8 @@ class OnlineZipformer2CtcModel : public OnlineCtcModel {
  public:
   explicit OnlineZipformer2CtcModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineZipformer2CtcModel(AAssetManager *mgr, const OnlineModelConfig &config);
-#endif
+  template <typename Manager>
+  OnlineZipformer2CtcModel(Manager *mgr, const OnlineModelConfig &config);
 
   ~OnlineZipformer2CtcModel() override;
 
diff --git a/sherpa-onnx/csrc/online-zipformer2-transducer-model.cc b/sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
index 0782f06fcb..bd79cfc4a1 100644
--- a/sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
@@ -19,6 +19,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/cat.h"
 #include "sherpa-onnx/csrc/macros.h"
@@ -54,9 +58,9 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
   }
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
-    AAssetManager *mgr, const OnlineModelConfig &config)
+    Manager *mgr, const OnlineModelConfig &config)
     : env_(ORT_LOGGING_LEVEL_ERROR),
       config_(config),
       encoder_sess_opts_(GetSessionOptions(config)),
@@ -78,7 +82,6 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
     InitJoiner(buf.data(), buf.size());
   }
 }
-#endif
 
 void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
                                                   size_t model_data_length) {
@@ -97,7 +100,11 @@ void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
     std::ostringstream os;
     os << "---encoder---\n";
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -119,7 +126,11 @@ void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
       for (auto i : v) {
         os << i << " ";
       }
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+#endif
     };
     print(encoder_dims_, "encoder_dims");
     print(query_head_dims_, "query_head_dims");
@@ -128,8 +139,14 @@ void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
     print(num_encoder_layers_, "num_encoder_layers");
     print(cnn_module_kernels_, "cnn_module_kernels");
     print(left_context_len_, "left_context_len");
+
+#if __OHOS__
+    SHERPA_ONNX_LOGE("T: %{public}d", T_);
+    SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
+#else
     SHERPA_ONNX_LOGE("T: %d", T_);
     SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
+#endif
   }
 }
 
@@ -185,6 +202,9 @@ std::vector<Ort::Value> OnlineZipformer2TransducerModel::StackStates(
 
   std::vector<const Ort::Value *> buf(batch_size);
 
+  auto allocator =
+      const_cast<OnlineZipformer2TransducerModel *>(this)->allocator_;
+
   std::vector<Ort::Value> ans;
   int32_t num_states = static_cast<int32_t>(states[0].size());
   ans.reserve(num_states);
@@ -194,42 +214,42 @@ std::vector<Ort::Value> OnlineZipformer2TransducerModel::StackStates(
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i];
       }
-      auto v = Cat(allocator_, buf, 1);
+      auto v = Cat(allocator, buf, 1);
       ans.push_back(std::move(v));
     }
     {
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i + 1];
       }
-      auto v = Cat(allocator_, buf, 1);
+      auto v = Cat(allocator, buf, 1);
       ans.push_back(std::move(v));
     }
     {
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i + 2];
       }
-      auto v = Cat(allocator_, buf, 1);
+      auto v = Cat(allocator, buf, 1);
       ans.push_back(std::move(v));
     }
     {
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i + 3];
       }
-      auto v = Cat(allocator_, buf, 1);
+      auto v = Cat(allocator, buf, 1);
       ans.push_back(std::move(v));
     }
     {
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i + 4];
       }
-      auto v = Cat(allocator_, buf, 0);
+      auto v = Cat(allocator, buf, 0);
       ans.push_back(std::move(v));
     }
     {
       for (int32_t n = 0; n != batch_size; ++n) {
         buf[n] = &states[n][6 * i + 5];
       }
-      auto v = Cat(allocator_, buf, 0);
+      auto v = Cat(allocator, buf, 0);
       ans.push_back(std::move(v));
     }
   }
@@ -238,7 +258,7 @@ std::vector<Ort::Value> OnlineZipformer2TransducerModel::StackStates(
     for (int32_t n = 0; n != batch_size; ++n) {
       buf[n] = &states[n][num_states - 2];
     }
-    auto v = Cat(allocator_, buf, 0);
+    auto v = Cat(allocator, buf, 0);
     ans.push_back(std::move(v));
   }
 
@@ -246,7 +266,7 @@ std::vector<Ort::Value> OnlineZipformer2TransducerModel::StackStates(
     for (int32_t n = 0; n != batch_size; ++n) {
       buf[n] = &states[n][num_states - 1];
     }
-    auto v = Cat<int64_t>(allocator_, buf, 0);
+    auto v = Cat<int64_t>(allocator, buf, 0);
     ans.push_back(std::move(v));
   }
   return ans;
@@ -261,12 +281,15 @@ OnlineZipformer2TransducerModel::UnStackStates(
 
   int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];
 
+  auto allocator =
+      const_cast<OnlineZipformer2TransducerModel *>(this)->allocator_;
+
   std::vector<std::vector<Ort::Value>> ans;
   ans.resize(batch_size);
 
   for (int32_t i = 0; i != m; ++i) {
     {
-      auto v = Unbind(allocator_, &states[i * 6], 1);
+      auto v = Unbind(allocator, &states[i * 6], 1);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -274,7 +297,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
       }
     }
     {
-      auto v = Unbind(allocator_, &states[i * 6 + 1], 1);
+      auto v = Unbind(allocator, &states[i * 6 + 1], 1);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -282,7 +305,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
       }
     }
     {
-      auto v = Unbind(allocator_, &states[i * 6 + 2], 1);
+      auto v = Unbind(allocator, &states[i * 6 + 2], 1);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -290,7 +313,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
       }
     }
     {
-      auto v = Unbind(allocator_, &states[i * 6 + 3], 1);
+      auto v = Unbind(allocator, &states[i * 6 + 3], 1);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -298,7 +321,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
       }
     }
     {
-      auto v = Unbind(allocator_, &states[i * 6 + 4], 0);
+      auto v = Unbind(allocator, &states[i * 6 + 4], 0);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -306,7 +329,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
       }
     }
     {
-      auto v = Unbind(allocator_, &states[i * 6 + 5], 0);
+      auto v = Unbind(allocator, &states[i * 6 + 5], 0);
       assert(static_cast<int32_t>(v.size()) == batch_size);
 
       for (int32_t n = 0; n != batch_size; ++n) {
@@ -316,7 +339,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
   }
 
   {
-    auto v = Unbind(allocator_, &states[m * 6], 0);
+    auto v = Unbind(allocator, &states[m * 6], 0);
     assert(static_cast<int32_t>(v.size()) == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -324,7 +347,7 @@ OnlineZipformer2TransducerModel::UnStackStates(
     }
   }
   {
-    auto v = Unbind<int64_t>(allocator_, &states[m * 6 + 1], 0);
+    auto v = Unbind<int64_t>(allocator, &states[m * 6 + 1], 0);
     assert(static_cast<int32_t>(v.size()) == batch_size);
 
     for (int32_t n = 0; n != batch_size; ++n) {
@@ -468,4 +491,14 @@ Ort::Value OnlineZipformer2TransducerModel::RunJoiner(Ort::Value encoder_out,
   return std::move(logit[0]);
 }
 
+#if __ANDROID_API__ >= 9
+template OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
+    AAssetManager *mgr, const OnlineModelConfig &config);
+#endif
+
+#if __OHOS__
+template OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
+    NativeResourceManager *mgr, const OnlineModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/online-zipformer2-transducer-model.h b/sherpa-onnx/csrc/online-zipformer2-transducer-model.h
index aa0f46f81c..93b124f618 100644
--- a/sherpa-onnx/csrc/online-zipformer2-transducer-model.h
+++ b/sherpa-onnx/csrc/online-zipformer2-transducer-model.h
@@ -9,11 +9,6 @@
 #include <utility>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
@@ -24,10 +19,9 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {
  public:
   explicit OnlineZipformer2TransducerModel(const OnlineModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  OnlineZipformer2TransducerModel(AAssetManager *mgr,
+  template <typename Manager>
+  OnlineZipformer2TransducerModel(Manager *mgr,
                                   const OnlineModelConfig &config);
-#endif
 
   std::vector<Ort::Value> StackStates(
       const std::vector<std::vector<Ort::Value>> &states) const override;
diff --git a/sherpa-onnx/csrc/onnx-utils.cc b/sherpa-onnx/csrc/onnx-utils.cc
index 0f637020a6..6d7e268461 100644
--- a/sherpa-onnx/csrc/onnx-utils.cc
+++ b/sherpa-onnx/csrc/onnx-utils.cc
@@ -7,9 +7,13 @@
 #include <algorithm>
 #include <fstream>
 #include <functional>
+#include <memory>
 #include <numeric>
 #include <sstream>
 #include <string>
+#include <vector>
+
+#include "sherpa-onnx/csrc/macros.h"
 
 #if __ANDROID_API__ >= 9
 #include "android/asset_manager.h"
@@ -21,6 +25,36 @@
 
 namespace sherpa_onnx {
 
+static std::string GetInputName(Ort::Session *sess, size_t index,
+                                OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 12
+  auto v = sess->GetInputNameAllocated(index, allocator);
+  return v.get();
+#else
+  auto v = sess->GetInputName(index, allocator);
+  std::string ans = v;
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
+static std::string GetOutputName(Ort::Session *sess, size_t index,
+                                 OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 12
+  auto v = sess->GetOutputNameAllocated(index, allocator);
+  return v.get();
+#else
+  auto v = sess->GetOutputName(index, allocator);
+  std::string ans = v;
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
 void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
                    std::vector<const char *> *input_names_ptr) {
   Ort::AllocatorWithDefaultOptions allocator;
@@ -28,8 +62,7 @@ void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
   input_names->resize(node_count);
   input_names_ptr->resize(node_count);
   for (size_t i = 0; i != node_count; ++i) {
-    auto tmp = sess->GetInputNameAllocated(i, allocator);
-    (*input_names)[i] = tmp.get();
+    (*input_names)[i] = GetInputName(sess, i, allocator);
     (*input_names_ptr)[i] = (*input_names)[i].c_str();
   }
 }
@@ -41,8 +74,7 @@ void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
   output_names->resize(node_count);
   output_names_ptr->resize(node_count);
   for (size_t i = 0; i != node_count; ++i) {
-    auto tmp = sess->GetOutputNameAllocated(i, allocator);
-    (*output_names)[i] = tmp.get();
+    (*output_names)[i] = GetOutputName(sess, i, allocator);
     (*output_names_ptr)[i] = (*output_names)[i].c_str();
   }
 }
@@ -78,12 +110,24 @@ Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
 
 void PrintModelMetadata(std::ostream &os, const Ort::ModelMetadata &meta_data) {
   Ort::AllocatorWithDefaultOptions allocator;
+#if ORT_API_VERSION >= 12
   std::vector<Ort::AllocatedStringPtr> v =
       meta_data.GetCustomMetadataMapKeysAllocated(allocator);
   for (const auto &key : v) {
     auto p = meta_data.LookupCustomMetadataMapAllocated(key.get(), allocator);
     os << key.get() << "=" << p.get() << "\n";
   }
+#else
+  int64_t num_keys = 0;
+  char **keys = meta_data.GetCustomMetadataMapKeys(allocator, num_keys);
+  for (int32_t i = 0; i < num_keys; ++i) {
+    auto v = LookupCustomModelMetaData(meta_data, keys[i], allocator);
+    os << keys[i] << "=" << v << "\n";
+    allocator.Free(keys[i]);
+  }
+
+  allocator.Free(keys);
+#endif
 }
 
 Ort::Value Clone(OrtAllocator *allocator, const Ort::Value *v) {
@@ -286,6 +330,38 @@ std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename) {
 }
 #endif
 
+#if __OHOS__
+std::vector<char> ReadFile(NativeResourceManager *mgr,
+                           const std::string &filename) {
+  std::unique_ptr<RawFile, decltype(&OH_ResourceManager_CloseRawFile)> fp(
+      OH_ResourceManager_OpenRawFile(mgr, filename.c_str()),
+      OH_ResourceManager_CloseRawFile);
+
+  if (!fp) {
+    std::ostringstream os;
+    os << "Read file '" << filename << "' failed.";
+    SHERPA_ONNX_LOGE("%s", os.str().c_str());
+    return {};
+  }
+
+  auto len = static_cast<int32_t>(OH_ResourceManager_GetRawFileSize(fp.get()));
+
+  std::vector<char> buffer(len);
+
+  int32_t n = OH_ResourceManager_ReadRawFile(fp.get(), buffer.data(), len);
+
+  if (n != len) {
+    std::ostringstream os;
+    os << "Read file '" << filename << "' failed. Number of bytes read: " << n
+       << ". Expected bytes to read: " << len;
+    SHERPA_ONNX_LOGE("%s", os.str().c_str());
+    return {};
+  }
+
+  return buffer;
+}
+#endif
+
 Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
                   const std::vector<int32_t> &hyps_num_split) {
   std::vector<int64_t> cur_encoder_out_shape =
@@ -361,4 +437,20 @@ std::vector<Ort::Value> Convert(std::vector<CopyableOrtValue> values) {
   return ans;
 }
 
+std::string LookupCustomModelMetaData(const Ort::ModelMetadata &meta_data,
+                                      const char *key,
+                                      OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 12
+  auto v = meta_data.LookupCustomMetadataMapAllocated(key, allocator);
+  return v ? v.get() : "";
+#else
+  auto v = meta_data.LookupCustomMetadataMap(key, allocator);
+  std::string ans = v ? v : "";
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/onnx-utils.h b/sherpa-onnx/csrc/onnx-utils.h
index 98eb25137d..c978fbb77f 100644
--- a/sherpa-onnx/csrc/onnx-utils.h
+++ b/sherpa-onnx/csrc/onnx-utils.h
@@ -22,6 +22,10 @@
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "onnxruntime_cxx_api.h"  // NOLINT
 
 namespace sherpa_onnx {
@@ -59,6 +63,9 @@ void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
 Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                               int32_t t);
 
+std::string LookupCustomModelMetaData(const Ort::ModelMetadata &meta_data,
+                                      const char *key, OrtAllocator *allocator);
+
 void PrintModelMetadata(std::ostream &os,
                         const Ort::ModelMetadata &meta_data);  // NOLINT
 
@@ -100,6 +107,11 @@ std::vector<char> ReadFile(const std::string &filename);
 std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename);
 #endif
 
+#if __OHOS__
+std::vector<char> ReadFile(NativeResourceManager *mgr,
+                           const std::string &filename);
+#endif
+
 // TODO(fangjun): Document it
 Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
                   const std::vector<int32_t> &hyps_num_split);
diff --git a/sherpa-onnx/csrc/piper-phonemize-lexicon.cc b/sherpa-onnx/csrc/piper-phonemize-lexicon.cc
index de753db61e..608a1ccd33 100644
--- a/sherpa-onnx/csrc/piper-phonemize-lexicon.cc
+++ b/sherpa-onnx/csrc/piper-phonemize-lexicon.cc
@@ -11,16 +11,19 @@
 #include <mutex>  // NOLINT
 #include <sstream>
 #include <string>
+#include <strstream>
 #include <utility>
 #include <vector>
 
 #if __ANDROID_API__ >= 9
-#include <strstream>
-
 #include "android/asset_manager.h"
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "espeak-ng/speak_lib.h"
 #include "phoneme_ids.hpp"
 #include "phonemize.hpp"
@@ -29,6 +32,17 @@
 
 namespace sherpa_onnx {
 
+void CallPhonemizeEspeak(const std::string &text,
+                         piper::eSpeakPhonemeConfig &config,  // NOLINT
+                         std::vector<std::vector<piper::Phoneme>> *phonemes) {
+  static std::mutex espeak_mutex;
+
+  std::lock_guard<std::mutex> lock(espeak_mutex);
+
+  // keep multi threads from calling into piper::phonemize_eSpeak
+  piper::phonemize_eSpeak(text, config, *phonemes);
+}
+
 static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
   std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
   std::unordered_map<char32_t, int32_t> token2id;
@@ -84,7 +98,7 @@ static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
 
 // see the function "phonemes_to_ids" from
 // https://github.com/rhasspy/piper/blob/master/notebooks/piper_inference_(ONNX).ipynb
-static std::vector<int64_t> PiperPhonemesToIds(
+static std::vector<int64_t> PiperPhonemesToIdsVits(
     const std::unordered_map<char32_t, int32_t> &token2id,
     const std::vector<piper::Phoneme> &phonemes) {
   // see
@@ -111,17 +125,78 @@ static std::vector<int64_t> PiperPhonemesToIds(
   return ans;
 }
 
+static std::vector<int64_t> PiperPhonemesToIdsMatcha(
+    const std::unordered_map<char32_t, int32_t> &token2id,
+    const std::vector<piper::Phoneme> &phonemes, bool use_eos_bos) {
+  std::vector<int64_t> ans;
+  ans.reserve(phonemes.size());
+
+  int32_t bos = token2id.at(U'^');
+  int32_t eos = token2id.at(U'$');
+
+  if (use_eos_bos) {
+    ans.push_back(bos);
+  }
+
+  for (auto p : phonemes) {
+    if (token2id.count(p)) {
+      ans.push_back(token2id.at(p));
+    } else {
+      SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
+                       static_cast<uint32_t>(p));
+    }
+  }
+
+  if (use_eos_bos) {
+    ans.push_back(eos);
+  }
+
+  return ans;
+}
+
+static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro(
+    const std::unordered_map<char32_t, int32_t> &token2id,
+    const std::vector<piper::Phoneme> &phonemes, int32_t max_len) {
+  std::vector<std::vector<int64_t>> ans;
+
+  std::vector<int64_t> current;
+  current.reserve(phonemes.size());
+
+  current.push_back(0);
+
+  for (auto p : phonemes) {
+    if (token2id.count(p)) {
+      if (current.size() > max_len - 1) {
+        current.push_back(0);
+        ans.push_back(std::move(current));
+
+        current.reserve(phonemes.size());
+        current.push_back(0);
+      }
+
+      current.push_back(token2id.at(p));
+    } else {
+      SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
+                       static_cast<uint32_t>(p));
+    }
+  }
+
+  current.push_back(0);
+  ans.push_back(std::move(current));
+  return ans;
+}
+
 static std::vector<int64_t> CoquiPhonemesToIds(
     const std::unordered_map<char32_t, int32_t> &token2id,
     const std::vector<piper::Phoneme> &phonemes,
-    const OfflineTtsVitsModelMetaData &meta_data) {
+    const OfflineTtsVitsModelMetaData &vits_meta_data) {
   // see
   // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87
-  int32_t use_eos_bos = meta_data.use_eos_bos;
-  int32_t bos_id = meta_data.bos_id;
-  int32_t eos_id = meta_data.eos_id;
-  int32_t blank_id = meta_data.blank_id;
-  int32_t add_blank = meta_data.add_blank;
+  int32_t use_eos_bos = vits_meta_data.use_eos_bos;
+  int32_t bos_id = vits_meta_data.bos_id;
+  int32_t eos_id = vits_meta_data.eos_id;
+  int32_t blank_id = vits_meta_data.blank_id;
+  int32_t add_blank = vits_meta_data.add_blank;
   int32_t comma_id = token2id.at(',');
 
   std::vector<int64_t> ans;
@@ -169,7 +244,7 @@ static std::vector<int64_t> CoquiPhonemesToIds(
   return ans;
 }
 
-static void InitEspeak(const std::string &data_dir) {
+void InitEspeak(const std::string &data_dir) {
   static std::once_flag init_flag;
   std::call_once(init_flag, [data_dir]() {
     int32_t result =
@@ -186,8 +261,8 @@ static void InitEspeak(const std::string &data_dir) {
 
 PiperPhonemizeLexicon::PiperPhonemizeLexicon(
     const std::string &tokens, const std::string &data_dir,
-    const OfflineTtsVitsModelMetaData &meta_data)
-    : meta_data_(meta_data) {
+    const OfflineTtsVitsModelMetaData &vits_meta_data)
+    : vits_meta_data_(vits_meta_data) {
   {
     std::ifstream is(tokens);
     token2id_ = ReadTokens(is);
@@ -196,11 +271,69 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon(
   InitEspeak(data_dir);
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 PiperPhonemizeLexicon::PiperPhonemizeLexicon(
-    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
-    const OfflineTtsVitsModelMetaData &meta_data)
-    : meta_data_(meta_data) {
+    Manager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsVitsModelMetaData &vits_meta_data)
+    : vits_meta_data_(vits_meta_data) {
+  {
+    auto buf = ReadFile(mgr, tokens);
+    std::istrstream is(buf.data(), buf.size());
+    token2id_ = ReadTokens(is);
+  }
+
+  // We should copy the directory of espeak-ng-data from the asset to
+  // some internal or external storage and then pass the directory to
+  // data_dir.
+  InitEspeak(data_dir);
+}
+
+PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsMatchaModelMetaData &matcha_meta_data)
+    : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
+  {
+    std::ifstream is(tokens);
+    token2id_ = ReadTokens(is);
+  }
+
+  InitEspeak(data_dir);
+}
+
+PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &kokoro_meta_data)
+    : kokoro_meta_data_(kokoro_meta_data), is_kokoro_(true) {
+  {
+    std::ifstream is(tokens);
+    token2id_ = ReadTokens(is);
+  }
+
+  InitEspeak(data_dir);
+}
+
+template <typename Manager>
+PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    Manager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsMatchaModelMetaData &matcha_meta_data)
+    : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
+  {
+    auto buf = ReadFile(mgr, tokens);
+    std::istrstream is(buf.data(), buf.size());
+    token2id_ = ReadTokens(is);
+  }
+
+  // We should copy the directory of espeak-ng-data from the asset to
+  // some internal or external storage and then pass the directory to
+  // data_dir.
+  InitEspeak(data_dir);
+}
+
+template <typename Manager>
+PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    Manager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &kokoro_meta_data)
+    : kokoro_meta_data_(kokoro_meta_data), is_kokoro_(true) {
   {
     auto buf = ReadFile(mgr, tokens);
     std::istrstream is(buf.data(), buf.size());
@@ -212,10 +345,20 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon(
   // data_dir.
   InitEspeak(data_dir);
 }
-#endif
 
 std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
     const std::string &text, const std::string &voice /*= ""*/) const {
+  if (is_matcha_) {
+    return ConvertTextToTokenIdsMatcha(text, voice);
+  } else if (is_kokoro_) {
+    return ConvertTextToTokenIdsKokoro(text, voice);
+  } else {
+    return ConvertTextToTokenIdsVits(text, voice);
+  }
+}
+
+std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsMatcha(
+    const std::string &text, const std::string &voice /*= ""*/) const {
   piper::eSpeakPhonemeConfig config;
 
   // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
@@ -224,26 +367,71 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
 
   std::vector<std::vector<piper::Phoneme>> phonemes;
 
-  static std::mutex espeak_mutex;
-  {
-    std::lock_guard<std::mutex> lock(espeak_mutex);
+  CallPhonemizeEspeak(text, config, &phonemes);
+
+  std::vector<TokenIDs> ans;
+
+  std::vector<int64_t> phoneme_ids;
 
-    // keep multi threads from calling into piper::phonemize_eSpeak
-    piper::phonemize_eSpeak(text, config, phonemes);
+  for (const auto &p : phonemes) {
+    phoneme_ids =
+        PiperPhonemesToIdsMatcha(token2id_, p, matcha_meta_data_.use_eos_bos);
+    ans.emplace_back(std::move(phoneme_ids));
   }
 
+  return ans;
+}
+
+std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsKokoro(
+    const std::string &text, const std::string &voice /*= ""*/) const {
+  piper::eSpeakPhonemeConfig config;
+
+  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
+  // to list available voices
+  config.voice = voice;  // e.g., voice is en-us
+
+  std::vector<std::vector<piper::Phoneme>> phonemes;
+
+  CallPhonemizeEspeak(text, config, &phonemes);
+
+  std::vector<TokenIDs> ans;
+
+  for (const auto &p : phonemes) {
+    auto phoneme_ids =
+        PiperPhonemesToIdsKokoro(token2id_, p, kokoro_meta_data_.max_token_len);
+
+    for (auto &ids : phoneme_ids) {
+      ans.emplace_back(std::move(ids));
+    }
+  }
+
+  return ans;
+}
+
+std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsVits(
+    const std::string &text, const std::string &voice /*= ""*/) const {
+  piper::eSpeakPhonemeConfig config;
+
+  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
+  // to list available voices
+  config.voice = voice;  // e.g., voice is en-us
+
+  std::vector<std::vector<piper::Phoneme>> phonemes;
+
+  CallPhonemizeEspeak(text, config, &phonemes);
+
   std::vector<TokenIDs> ans;
 
   std::vector<int64_t> phoneme_ids;
 
-  if (meta_data_.is_piper || meta_data_.is_icefall) {
+  if (vits_meta_data_.is_piper || vits_meta_data_.is_icefall) {
     for (const auto &p : phonemes) {
-      phoneme_ids = PiperPhonemesToIds(token2id_, p);
+      phoneme_ids = PiperPhonemesToIdsVits(token2id_, p);
       ans.emplace_back(std::move(phoneme_ids));
     }
-  } else if (meta_data_.is_coqui) {
+  } else if (vits_meta_data_.is_coqui) {
     for (const auto &p : phonemes) {
-      phoneme_ids = CoquiPhonemesToIds(token2id_, p, meta_data_);
+      phoneme_ids = CoquiPhonemesToIds(token2id_, p, vits_meta_data_);
       ans.emplace_back(std::move(phoneme_ids));
     }
 
@@ -255,4 +443,35 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
   return ans;
 }
 
+#if __ANDROID_API__ >= 9
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsVitsModelMetaData &vits_meta_data);
+
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsMatchaModelMetaData &matcha_meta_data);
+
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
+#endif
+
+#if __OHOS__
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    NativeResourceManager *mgr, const std::string &tokens,
+    const std::string &data_dir,
+    const OfflineTtsVitsModelMetaData &vits_meta_data);
+
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    NativeResourceManager *mgr, const std::string &tokens,
+    const std::string &data_dir,
+    const OfflineTtsMatchaModelMetaData &matcha_meta_data);
+
+template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
+    NativeResourceManager *mgr, const std::string &tokens,
+    const std::string &data_dir,
+    const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/piper-phonemize-lexicon.h b/sherpa-onnx/csrc/piper-phonemize-lexicon.h
index 34922de29b..bb8c6e30c8 100644
--- a/sherpa-onnx/csrc/piper-phonemize-lexicon.h
+++ b/sherpa-onnx/csrc/piper-phonemize-lexicon.h
@@ -9,34 +9,60 @@
 #include <unordered_map>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
-#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"
+#include "sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h"
+#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"
 
 namespace sherpa_onnx {
 
 class PiperPhonemizeLexicon : public OfflineTtsFrontend {
  public:
   PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
-                        const OfflineTtsVitsModelMetaData &meta_data);
+                        const OfflineTtsVitsModelMetaData &vits_meta_data);
+
+  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
+                        const OfflineTtsMatchaModelMetaData &matcha_meta_data);
+
+  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
+                        const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
+
+  template <typename Manager>
+  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
+                        const std::string &data_dir,
+                        const OfflineTtsVitsModelMetaData &vits_meta_data);
 
-#if __ANDROID_API__ >= 9
-  PiperPhonemizeLexicon(AAssetManager *mgr, const std::string &tokens,
+  template <typename Manager>
+  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
                         const std::string &data_dir,
-                        const OfflineTtsVitsModelMetaData &meta_data);
-#endif
+                        const OfflineTtsMatchaModelMetaData &matcha_meta_data);
+
+  template <typename Manager>
+  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
+                        const std::string &data_dir,
+                        const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
 
   std::vector<TokenIDs> ConvertTextToTokenIds(
       const std::string &text, const std::string &voice = "") const override;
 
+ private:
+  std::vector<TokenIDs> ConvertTextToTokenIdsVits(
+      const std::string &text, const std::string &voice = "") const;
+
+  std::vector<TokenIDs> ConvertTextToTokenIdsMatcha(
+      const std::string &text, const std::string &voice = "") const;
+
+  std::vector<TokenIDs> ConvertTextToTokenIdsKokoro(
+      const std::string &text, const std::string &voice = "") const;
+
  private:
   // map unicode codepoint to an integer ID
   std::unordered_map<char32_t, int32_t> token2id_;
-  OfflineTtsVitsModelMetaData meta_data_;
+  OfflineTtsVitsModelMetaData vits_meta_data_;
+  OfflineTtsMatchaModelMetaData matcha_meta_data_;
+  OfflineTtsKokoroModelMetaData kokoro_meta_data_;
+  bool is_matcha_ = false;
+  bool is_kokoro_ = false;
 };
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/provider-config.cc b/sherpa-onnx/csrc/provider-config.cc
index 1db62aa6bc..165e2d9a2b 100644
--- a/sherpa-onnx/csrc/provider-config.cc
+++ b/sherpa-onnx/csrc/provider-config.cc
@@ -61,8 +61,10 @@ void TensorrtConfig::Register(ParseOptions *po) {
 
 bool TensorrtConfig::Validate() const {
   if (trt_max_workspace_size < 0) {
-    SHERPA_ONNX_LOGE("trt_max_workspace_size: %ld is not valid.",
-                     trt_max_workspace_size);
+    std::ostringstream os;
+    os << "trt_max_workspace_size: " << trt_max_workspace_size
+       << " is not valid.";
+    SHERPA_ONNX_LOGE("%s", os.str().c_str());
     return false;
   }
   if (trt_max_partition_iterations < 0) {
diff --git a/sherpa-onnx/csrc/provider-config.h b/sherpa-onnx/csrc/provider-config.h
index fdc875e0a4..4c9c0db015 100644
--- a/sherpa-onnx/csrc/provider-config.h
+++ b/sherpa-onnx/csrc/provider-config.h
@@ -7,9 +7,9 @@
 
 #include <string>
 
-#include "sherpa-onnx/csrc/parse-options.h"
-#include "sherpa-onnx/csrc/macros.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/parse-options.h"
 
 namespace sherpa_onnx {
 
@@ -40,25 +40,23 @@ struct TensorrtConfig {
 
   TensorrtConfig() = default;
   TensorrtConfig(int64_t trt_max_workspace_size,
-                int32_t trt_max_partition_iterations,
-                int32_t trt_min_subgraph_size,
-                bool trt_fp16_enable,
-                bool trt_detailed_build_log,
-                bool trt_engine_cache_enable,
-                bool trt_timing_cache_enable,
-                const std::string &trt_engine_cache_path,
-                const std::string &trt_timing_cache_path,
-                bool trt_dump_subgraphs)
+                 int32_t trt_max_partition_iterations,
+                 int32_t trt_min_subgraph_size, bool trt_fp16_enable,
+                 bool trt_detailed_build_log, bool trt_engine_cache_enable,
+                 bool trt_timing_cache_enable,
+                 const std::string &trt_engine_cache_path,
+                 const std::string &trt_timing_cache_path,
+                 bool trt_dump_subgraphs)
       : trt_max_workspace_size(trt_max_workspace_size),
-      trt_max_partition_iterations(trt_max_partition_iterations),
-      trt_min_subgraph_size(trt_min_subgraph_size),
-      trt_fp16_enable(trt_fp16_enable),
-      trt_detailed_build_log(trt_detailed_build_log),
-      trt_engine_cache_enable(trt_engine_cache_enable),
-      trt_timing_cache_enable(trt_timing_cache_enable),
-      trt_engine_cache_path(trt_engine_cache_path),
-      trt_timing_cache_path(trt_timing_cache_path),
-      trt_dump_subgraphs(trt_dump_subgraphs) {}
+        trt_max_partition_iterations(trt_max_partition_iterations),
+        trt_min_subgraph_size(trt_min_subgraph_size),
+        trt_fp16_enable(trt_fp16_enable),
+        trt_detailed_build_log(trt_detailed_build_log),
+        trt_engine_cache_enable(trt_engine_cache_enable),
+        trt_timing_cache_enable(trt_timing_cache_enable),
+        trt_engine_cache_path(trt_engine_cache_path),
+        trt_timing_cache_path(trt_timing_cache_path),
+        trt_dump_subgraphs(trt_dump_subgraphs) {}
 
   void Register(ParseOptions *po);
   bool Validate() const;
@@ -74,15 +72,15 @@ struct ProviderConfig {
   // device only used for cuda and trt
 
   ProviderConfig() = default;
-  ProviderConfig(const std::string &provider,
-                int32_t device)
+  ProviderConfig(const std::string &provider, int32_t device)
       : provider(provider), device(device) {}
   ProviderConfig(const TensorrtConfig &trt_config,
-                const CudaConfig &cuda_config,
-                const std::string &provider,
-                int32_t device)
-      : trt_config(trt_config), cuda_config(cuda_config),
-      provider(provider), device(device) {}
+                 const CudaConfig &cuda_config, const std::string &provider,
+                 int32_t device)
+      : trt_config(trt_config),
+        cuda_config(cuda_config),
+        provider(provider),
+        device(device) {}
 
   void Register(ParseOptions *po);
   bool Validate() const;
diff --git a/sherpa-onnx/csrc/session.cc b/sherpa-onnx/csrc/session.cc
index 7f6f685e0a..a33594f0b5 100644
--- a/sherpa-onnx/csrc/session.cc
+++ b/sherpa-onnx/csrc/session.cc
@@ -35,9 +35,9 @@ static void OrtStatusFailure(OrtStatus *status, const char *s) {
   api.ReleaseStatus(status);
 }
 
-static Ort::SessionOptions GetSessionOptionsImpl(
+Ort::SessionOptions GetSessionOptionsImpl(
     int32_t num_threads, const std::string &provider_str,
-    const ProviderConfig *provider_config = nullptr) {
+    const ProviderConfig *provider_config /*= nullptr*/) {
   Provider p = StringToProvider(provider_str);
 
   Ort::SessionOptions sess_opts;
@@ -60,6 +60,7 @@ static Ort::SessionOptions GetSessionOptionsImpl(
     case Provider::kCPU:
       break;  // nothing to do for the CPU provider
     case Provider::kXnnpack: {
+#if ORT_API_VERSION >= 12
       if (std::find(available_providers.begin(), available_providers.end(),
                     "XnnpackExecutionProvider") != available_providers.end()) {
         sess_opts.AppendExecutionProvider("XNNPACK");
@@ -67,6 +68,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(
         SHERPA_ONNX_LOGE("Available providers: %s. Fallback to cpu!",
                          os.str().c_str());
       }
+#else
+      SHERPA_ONNX_LOGE(
+          "Does not support xnnpack for onnxruntime: %d. Fallback to cpu!",
+          static_cast<int32_t>(ORT_API_VERSION));
+#endif
       break;
     }
     case Provider::kTRT: {
@@ -259,10 +265,6 @@ Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
                                &config.provider_config);
 }
 
-Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
 Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config) {
   return GetSessionOptionsImpl(config.lm_num_threads, config.lm_provider);
 }
@@ -271,38 +273,9 @@ Ort::SessionOptions GetSessionOptions(const OnlineLMConfig &config) {
   return GetSessionOptionsImpl(config.lm_num_threads, config.lm_provider);
 }
 
-Ort::SessionOptions GetSessionOptions(const VadModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
-#if SHERPA_ONNX_ENABLE_TTS
-Ort::SessionOptions GetSessionOptions(const OfflineTtsModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-#endif
-
-Ort::SessionOptions GetSessionOptions(
-    const SpeakerEmbeddingExtractorConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
-Ort::SessionOptions GetSessionOptions(
-    const SpokenLanguageIdentificationConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
-Ort::SessionOptions GetSessionOptions(const AudioTaggingModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
-Ort::SessionOptions GetSessionOptions(
-    const OfflinePunctuationModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
-}
-
-Ort::SessionOptions GetSessionOptions(
-    const OnlinePunctuationModelConfig &config) {
-  return GetSessionOptionsImpl(config.num_threads, config.provider);
+Ort::SessionOptions GetSessionOptions(int32_t num_threads,
+                                      const std::string &provider_str) {
+  return GetSessionOptionsImpl(num_threads, provider_str);
 }
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/session.h b/sherpa-onnx/csrc/session.h
index 1e8beb114f..131023e882 100644
--- a/sherpa-onnx/csrc/session.h
+++ b/sherpa-onnx/csrc/session.h
@@ -8,53 +8,31 @@
 #include <string>
 
 #include "onnxruntime_cxx_api.h"  // NOLINT
-#include "sherpa-onnx/csrc/audio-tagging-model-config.h"
 #include "sherpa-onnx/csrc/offline-lm-config.h"
-#include "sherpa-onnx/csrc/offline-model-config.h"
-#include "sherpa-onnx/csrc/offline-punctuation-model-config.h"
-#include "sherpa-onnx/csrc/online-punctuation-model-config.h"
 #include "sherpa-onnx/csrc/online-lm-config.h"
 #include "sherpa-onnx/csrc/online-model-config.h"
-#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
-#include "sherpa-onnx/csrc/spoken-language-identification.h"
-#include "sherpa-onnx/csrc/vad-model-config.h"
-
-#if SHERPA_ONNX_ENABLE_TTS
-#include "sherpa-onnx/csrc/offline-tts-model-config.h"
-#endif
 
 namespace sherpa_onnx {
 
-Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config);
-
-Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
-                                      const std::string &model_type);
-
-Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config);
+Ort::SessionOptions GetSessionOptionsImpl(
+    int32_t num_threads, const std::string &provider_str,
+    const ProviderConfig *provider_config = nullptr);
 
 Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config);
-
 Ort::SessionOptions GetSessionOptions(const OnlineLMConfig &config);
 
-Ort::SessionOptions GetSessionOptions(const VadModelConfig &config);
-
-#if SHERPA_ONNX_ENABLE_TTS
-Ort::SessionOptions GetSessionOptions(const OfflineTtsModelConfig &config);
-#endif
-
-Ort::SessionOptions GetSessionOptions(
-    const SpeakerEmbeddingExtractorConfig &config);
-
-Ort::SessionOptions GetSessionOptions(
-    const SpokenLanguageIdentificationConfig &config);
+Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config);
 
-Ort::SessionOptions GetSessionOptions(const AudioTaggingModelConfig &config);
+Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
+                                      const std::string &model_type);
 
-Ort::SessionOptions GetSessionOptions(
-    const OfflinePunctuationModelConfig &config);
+Ort::SessionOptions GetSessionOptions(int32_t num_threads,
+                                      const std::string &provider_str);
 
-Ort::SessionOptions GetSessionOptions(
-    const OnlinePunctuationModelConfig &config);
+template <typename T>
+Ort::SessionOptions GetSessionOptions(const T &config) {
+  return GetSessionOptionsImpl(config.num_threads, config.provider);
+}
 
 }  // namespace sherpa_onnx
 
diff --git a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
index a909ff250b..cfa46dc91e 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
@@ -106,13 +106,15 @@ as the device_name.
 
     while (spotter.IsReady(stream.get())) {
       spotter.DecodeStream(stream.get());
-    }
 
-    const auto r = spotter.GetResult(stream.get());
-    if (!r.keyword.empty()) {
-      display.Print(keyword_index, r.AsJsonString());
-      fflush(stderr);
-      keyword_index++;
+      const auto r = spotter.GetResult(stream.get());
+      if (!r.keyword.empty()) {
+        display.Print(keyword_index, r.AsJsonString());
+        fflush(stderr);
+        keyword_index++;
+
+        spotter.Reset(stream.get());
+      }
     }
   }
 
diff --git a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc
index 903debea94..4d75f9d49f 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc
@@ -150,13 +150,15 @@ for a list of pre-trained models to download.
   while (!stop) {
     while (spotter.IsReady(s.get())) {
       spotter.DecodeStream(s.get());
-    }
 
-    const auto r = spotter.GetResult(s.get());
-    if (!r.keyword.empty()) {
-      display.Print(keyword_index, r.AsJsonString());
-      fflush(stderr);
-      keyword_index++;
+      const auto r = spotter.GetResult(s.get());
+      if (!r.keyword.empty()) {
+        display.Print(keyword_index, r.AsJsonString());
+        fflush(stderr);
+        keyword_index++;
+
+        spotter.Reset(s.get());
+      }
     }
 
     Pa_Sleep(20);  // sleep for 20ms
diff --git a/sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc b/sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc
new file mode 100644
index 0000000000..31cda85fcc
--- /dev/null
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc
@@ -0,0 +1,133 @@
+// sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+#include "sherpa-onnx/csrc/parse-options.h"
+#include "sherpa-onnx/csrc/wave-reader.h"
+
+static int32_t ProgressCallback(int32_t processed_chunks, int32_t num_chunks,
+                                void *) {
+  float progress = 100.0 * processed_chunks / num_chunks;
+  fprintf(stderr, "progress %.2f%%\n", progress);
+
+  // the return value is currently ignored
+  return 0;
+}
+
+int main(int32_t argc, char *argv[]) {
+  const char *kUsageMessage = R"usage(
+Offline/Non-streaming speaker diarization with sherpa-onnx
+Usage example:
+
+Step 1: Download a speaker segmentation model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+Step 2: Download a speaker embedding extractor model
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+for a list of available models. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+Step 3. Download test wave files
+
+Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+for a list of available test wave files. The following is an example
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+Step 4. Build sherpa-onnx
+
+Step 5. Run it
+
+  ./bin/sherpa-onnx-offline-speaker-diarization \
+    --clustering.num-clusters=4 \
+    --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+    --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+    ./0-four-speakers-zh.wav
+
+Since we know that there are four speakers in the test wave file, we use
+--clustering.num-clusters=4 in the above example.
+
+If we don't know number of speakers in the given wave file, we can use
+the argument --clustering.cluster-threshold. The following is an example:
+
+  ./bin/sherpa-onnx-offline-speaker-diarization \
+    --clustering.cluster-threshold=0.90 \
+    --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+    --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+    ./0-four-speakers-zh.wav
+
+A larger threshold leads to few clusters, i.e., few speakers;
+a smaller threshold leads to more clusters, i.e., more speakers
+  )usage";
+  sherpa_onnx::OfflineSpeakerDiarizationConfig config;
+  sherpa_onnx::ParseOptions po(kUsageMessage);
+  config.Register(&po);
+  po.Read(argc, argv);
+
+  std::cout << config.ToString() << "\n";
+
+  if (!config.Validate()) {
+    po.PrintUsage();
+    std::cerr << "Errors in config!\n";
+    return -1;
+  }
+
+  if (po.NumArgs() != 1) {
+    std::cerr << "Error: Please provide exactly 1 wave file.\n\n";
+    po.PrintUsage();
+    return -1;
+  }
+
+  sherpa_onnx::OfflineSpeakerDiarization sd(config);
+
+  std::cout << "Started\n";
+  const auto begin = std::chrono::steady_clock::now();
+  const std::string wav_filename = po.GetArg(1);
+  int32_t sample_rate = -1;
+  bool is_ok = false;
+  const std::vector<float> samples =
+      sherpa_onnx::ReadWave(wav_filename, &sample_rate, &is_ok);
+  if (!is_ok) {
+    std::cerr << "Failed to read " << wav_filename.c_str() << "\n";
+    return -1;
+  }
+
+  if (sample_rate != sd.SampleRate()) {
+    std::cerr << "Expect sample rate " << sd.SampleRate()
+              << ". Given: " << sample_rate << "\n";
+    return -1;
+  }
+
+  float duration = samples.size() / static_cast<float>(sample_rate);
+
+  auto result =
+      sd.Process(samples.data(), samples.size(), ProgressCallback, nullptr)
+          .SortByStartTime();
+
+  for (const auto &r : result) {
+    std::cout << r.ToString() << "\n";
+  }
+
+  const auto end = std::chrono::steady_clock::now();
+  float elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
+          .count() /
+      1000.;
+
+  fprintf(stderr, "Duration : %.3f s\n", duration);
+  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
+  float rtf = elapsed_seconds / duration;
+  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
+          elapsed_seconds, duration, rtf);
+
+  return 0;
+}
diff --git a/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc b/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
index 442ec1813e..92feb8eacb 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
@@ -9,14 +9,15 @@
 #include "sherpa-onnx/csrc/parse-options.h"
 #include "sherpa-onnx/csrc/wave-writer.h"
 
-int32_t audioCallback(const float * /*samples*/, int32_t n, float progress) {
+static int32_t AudioCallback(const float * /*samples*/, int32_t n,
+                             float progress) {
   printf("sample=%d, progress=%f\n", n, progress);
   return 1;
 }
 
 int main(int32_t argc, char *argv[]) {
   const char *kUsageMessage = R"usage(
-Offline text-to-speech with sherpa-onnx
+Offline/Non-streaming text-to-speech with sherpa-onnx
 
 Usage example:
 
@@ -71,6 +72,10 @@ or details.
     exit(EXIT_FAILURE);
   }
 
+  if (config.model.debug) {
+    fprintf(stderr, "%s\n", config.model.ToString().c_str());
+  }
+
   if (!config.Validate()) {
     fprintf(stderr, "Errors in config!\n");
     exit(EXIT_FAILURE);
@@ -79,7 +84,7 @@ or details.
   sherpa_onnx::OfflineTts tts(config);
 
   const auto begin = std::chrono::steady_clock::now();
-  auto audio = tts.Generate(po.GetArg(1), sid, 1.0, audioCallback);
+  auto audio = tts.Generate(po.GetArg(1), sid, 1.0, AudioCallback);
   const auto end = std::chrono::steady_clock::now();
 
   if (audio.samples.empty()) {
diff --git a/sherpa-onnx/csrc/sherpa-onnx-offline.cc b/sherpa-onnx/csrc/sherpa-onnx-offline.cc
index 73e77299aa..022f7569b1 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-offline.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline.cc
@@ -43,7 +43,20 @@ See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/in
     --decoding-method=greedy_search \
     /path/to/foo.wav [bar.wav foobar.wav ...]
 
-(3) Whisper models
+(3) Moonshine models
+
+See https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
+
+  ./bin/sherpa-onnx-offline \
+    --moonshine-preprocessor=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/preprocess.onnx \
+    --moonshine-encoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/encode.int8.onnx \
+    --moonshine-uncached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/uncached_decode.int8.onnx \
+    --moonshine-cached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/cached_decode.int8.onnx \
+    --tokens=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/tokens.txt \
+    --num-threads=1 \
+    /path/to/foo.wav [bar.wav foobar.wav ...]
+
+(4) Whisper models
 
 See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
 
@@ -54,7 +67,7 @@ See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
     --num-threads=1 \
     /path/to/foo.wav [bar.wav foobar.wav ...]
 
-(4) NeMo CTC models
+(5) NeMo CTC models
 
 See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
 
@@ -68,7 +81,7 @@ See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.htm
     ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
     ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
 
-(5) TDNN CTC model for the yesno recipe from icefall
+(6) TDNN CTC model for the yesno recipe from icefall
 
 See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
       //
diff --git a/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc b/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc
index ea83cfaaf6..faca83b98e 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc
@@ -19,7 +19,7 @@ The input text can contain English words.
 Usage:
 
 Please download the model from:
-https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
+https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
 
 ./bin/Release/sherpa-onnx-online-punctuation \
   --cnn-bilstm=/path/to/model.onnx \
diff --git a/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc b/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
index c90c29c52f..df3e250a55 100644
--- a/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
@@ -211,7 +211,7 @@ to download models for offline ASR.
     }
 
     while (!vad->Empty()) {
-      auto &segment = vad->Front();
+      const auto &segment = vad->Front();
       auto s = recognizer.CreateStream();
       s->AcceptWaveform(sample_rate, segment.samples.data(),
                         segment.samples.size());
diff --git a/sherpa-onnx/csrc/silero-vad-model.cc b/sherpa-onnx/csrc/silero-vad-model.cc
index 66841d56d9..1b281e5db3 100644
--- a/sherpa-onnx/csrc/silero-vad-model.cc
+++ b/sherpa-onnx/csrc/silero-vad-model.cc
@@ -8,6 +8,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -37,16 +46,16 @@ class SileroVadModel::Impl {
     min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const VadModelConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const VadModelConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
-        allocator_{} {
+        allocator_{},
+        sample_rate_(config.sample_rate) {
     auto buf = ReadFile(mgr, config.silero_vad.model);
     Init(buf.data(), buf.size());
 
-    sample_rate_ = config.sample_rate;
     if (sample_rate_ != 16000) {
       SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                        config.sample_rate);
@@ -58,7 +67,6 @@ class SileroVadModel::Impl {
 
     min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
   }
-#endif
 
   void Reset() {
     if (is_v5_) {
@@ -432,10 +440,9 @@ class SileroVadModel::Impl {
 SileroVadModel::SileroVadModel(const VadModelConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
-SileroVadModel::SileroVadModel(AAssetManager *mgr, const VadModelConfig &config)
+template <typename Manager>
+SileroVadModel::SileroVadModel(Manager *mgr, const VadModelConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 SileroVadModel::~SileroVadModel() = default;
 
@@ -465,4 +472,14 @@ void SileroVadModel::SetThreshold(float threshold) {
   impl_->SetThreshold(threshold);
 }
 
+#if __ANDROID_API__ >= 9
+template SileroVadModel::SileroVadModel(AAssetManager *mgr,
+                                        const VadModelConfig &config);
+#endif
+
+#if __OHOS__
+template SileroVadModel::SileroVadModel(NativeResourceManager *mgr,
+                                        const VadModelConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/silero-vad-model.h b/sherpa-onnx/csrc/silero-vad-model.h
index 169cb72440..839139995c 100644
--- a/sherpa-onnx/csrc/silero-vad-model.h
+++ b/sherpa-onnx/csrc/silero-vad-model.h
@@ -6,11 +6,6 @@
 
 #include <memory>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/vad-model.h"
 
 namespace sherpa_onnx {
@@ -19,9 +14,8 @@ class SileroVadModel : public VadModel {
  public:
   explicit SileroVadModel(const VadModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  SileroVadModel(AAssetManager *mgr, const VadModelConfig &config);
-#endif
+  template <typename Manager>
+  SileroVadModel(Manager *mgr, const VadModelConfig &config);
 
   ~SileroVadModel() override;
 
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h b/sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
index ca384c855e..8a884e85d8 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
@@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorGeneralImpl
       const SpeakerEmbeddingExtractorConfig &config)
       : model_(config) {}
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   SpeakerEmbeddingExtractorGeneralImpl(
-      AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
       : model_(mgr, config) {}
-#endif
 
   int32_t Dim() const override { return model_.GetMetaData().output_dim; }
 
@@ -46,9 +45,15 @@ class SpeakerEmbeddingExtractorGeneralImpl
   std::vector<float> Compute(OnlineStream *s) const override {
     int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
     if (num_frames <= 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Please make sure IsReady(s) returns true. num_frames: %{public}d",
+          num_frames);
+#else
       SHERPA_ONNX_LOGE(
           "Please make sure IsReady(s) returns true. num_frames: %d",
           num_frames);
+#endif
       return {};
     }
 
@@ -64,8 +69,13 @@ class SpeakerEmbeddingExtractorGeneralImpl
       if (meta_data.feature_normalize_type == "global-mean") {
         SubtractGlobalMean(features.data(), num_frames, feat_dim);
       } else {
+#if __OHOS__
+        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
+                         meta_data.feature_normalize_type.c_str());
+#else
         SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
                          meta_data.feature_normalize_type.c_str());
+#endif
         exit(-1);
       }
     }
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc b/sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
index 4dafce91db..650b1576ab 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
@@ -3,6 +3,15 @@
 // Copyright (c)  2024  Xiaomi Corporation
 #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h"
@@ -35,13 +44,17 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
   if (debug) {
     std::ostringstream os;
     PrintModelMetadata(os, meta_data);
+#if __OHOS__
+    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
     SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
   }
 
   Ort::AllocatorWithDefaultOptions allocator;
   auto model_type =
-      meta_data.LookupCustomMetadataMapAllocated("framework", allocator);
-  if (!model_type) {
+      LookupCustomModelMetaData(meta_data, "framework", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n"
         "Please make sure you have added metadata to the model.\n\n"
@@ -52,14 +65,18 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
     return ModelType::kUnknown;
   }
 
-  if (model_type.get() == std::string("wespeaker")) {
+  if (model_type == "wespeaker") {
     return ModelType::kWeSpeaker;
-  } else if (model_type.get() == std::string("3d-speaker")) {
+  } else if (model_type == "3d-speaker") {
     return ModelType::k3dSpeaker;
-  } else if (model_type.get() == std::string("nemo")) {
+  } else if (model_type == "nemo") {
     return ModelType::kNeMo;
   } else {
-    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.get());
+#if __OHOS__
+    SHERPA_ONNX_LOGE("Unsupported model_type: %{public}s", model_type.c_str());
+#else
+    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
+#endif
     return ModelType::kUnknown;
   }
 }
@@ -91,10 +108,10 @@ SpeakerEmbeddingExtractorImpl::Create(
   return nullptr;
 }
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 std::unique_ptr<SpeakerEmbeddingExtractorImpl>
 SpeakerEmbeddingExtractorImpl::Create(
-    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
+    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
   ModelType model_type = ModelType::kUnknown;
 
   {
@@ -120,6 +137,17 @@ SpeakerEmbeddingExtractorImpl::Create(
   // unreachable code
   return nullptr;
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
+SpeakerEmbeddingExtractorImpl::Create(
+    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
+#if __OHOS__
+template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
+SpeakerEmbeddingExtractorImpl::Create(
+    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
 #endif
 
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-impl.h b/sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
index 9465ab94ec..6299dce4b3 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
 
 namespace sherpa_onnx {
@@ -25,10 +20,9 @@ class SpeakerEmbeddingExtractorImpl {
   static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
       const SpeakerEmbeddingExtractorConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
-      AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
-#endif
+      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);
 
   virtual int32_t Dim() const = 0;
 
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-model.cc b/sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
index e5fa26eed1..48d7f19e06 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
@@ -8,6 +8,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorModel::Impl {
       Init(buf.data(), buf.size());
     }
   }
-#endif
 
   Ort::Value Compute(Ort::Value x) const {
     std::array<Ort::Value, 1> inputs = {std::move(x)};
@@ -68,7 +76,11 @@ class SpeakerEmbeddingExtractorModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -84,8 +96,14 @@ class SpeakerEmbeddingExtractorModel::Impl {
     std::string framework;
     SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
     if (framework != "wespeaker" && framework != "3d-speaker") {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Expect a wespeaker or a 3d-speaker model, given: %{public}s",
+          framework.c_str());
+#else
       SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s",
                        framework.c_str());
+#endif
       exit(-1);
     }
   }
@@ -111,11 +129,10 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
     const SpeakerEmbeddingExtractorConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
-    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default;
 
@@ -128,4 +145,14 @@ Ort::Value SpeakerEmbeddingExtractorModel::Compute(Ort::Value x) const {
   return impl_->Compute(std::move(x));
 }
 
+#if __ANDROID_API__ >= 9
+template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
+    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
+#if __OHOS__
+template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
+    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-model.h b/sherpa-onnx/csrc/speaker-embedding-extractor-model.h
index 83ef0cc0dc..6c6bdd01cd 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-model.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-model.h
@@ -6,11 +6,6 @@
 
 #include <memory>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"
 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorModel {
   explicit SpeakerEmbeddingExtractorModel(
       const SpeakerEmbeddingExtractorConfig &config);
 
-#if __ANDROID_API__ >= 9
-  SpeakerEmbeddingExtractorModel(AAssetManager *mgr,
+  template <typename Manager>
+  SpeakerEmbeddingExtractorModel(Manager *mgr,
                                  const SpeakerEmbeddingExtractorConfig &config);
-#endif
 
   ~SpeakerEmbeddingExtractorModel();
 
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
index 66ad15af35..ec1c44d688 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
@@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
       const SpeakerEmbeddingExtractorConfig &config)
       : model_(config) {}
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   SpeakerEmbeddingExtractorNeMoImpl(
-      AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
       : model_(mgr, config) {}
-#endif
 
   int32_t Dim() const override { return model_.GetMetaData().output_dim; }
 
@@ -54,9 +53,15 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
   std::vector<float> Compute(OnlineStream *s) const override {
     int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
     if (num_frames <= 0) {
+#if __OHOS__
+      SHERPA_ONNX_LOGE(
+          "Please make sure IsReady(s) returns true. num_frames: %{public}d",
+          num_frames);
+#else
       SHERPA_ONNX_LOGE(
           "Please make sure IsReady(s) returns true. num_frames: %d",
           num_frames);
+#endif
       return {};
     }
 
@@ -72,8 +77,14 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
       if (meta_data.feature_normalize_type == "per_feature") {
         NormalizePerFeature(features.data(), num_frames, feat_dim);
       } else {
+#if __OHOS__
+        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
+                         meta_data.feature_normalize_type.c_str());
+#else
+
         SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
                          meta_data.feature_normalize_type.c_str());
+#endif
         exit(-1);
       }
     }
@@ -122,7 +133,7 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
     auto variance = EX2 - EX.array().pow(2);
     auto stddev = variance.array().sqrt();
 
-    m = (m.rowwise() - EX).array().rowwise() / stddev.array();
+    m = (m.rowwise() - EX).array().rowwise() / (stddev.array() + 1e-5);
   }
 
  private:
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc
index 2e481b20fb..3983e1cb8a 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc
@@ -8,6 +8,15 @@
 #include <utility>
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
 #include "sherpa-onnx/csrc/session.h"
@@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
     }
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+  template <typename Manager>
+  Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
       : config_(config),
         env_(ORT_LOGGING_LEVEL_ERROR),
         sess_opts_(GetSessionOptions(config)),
@@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
       Init(buf.data(), buf.size());
     }
   }
-#endif
 
   Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const {
     std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
@@ -53,7 +61,7 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
     return std::move(outputs[0]);
   }
 
-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }
 
   const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const {
     return meta_data_;
@@ -73,7 +81,11 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
     if (config_.debug) {
       std::ostringstream os;
       PrintModelMetadata(os, meta_data);
+#if __OHOS__
+      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
+#else
       SHERPA_ONNX_LOGE("%s", os.str().c_str());
+#endif
     }
 
     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
@@ -93,7 +105,12 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
     std::string framework;
     SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
     if (framework != "nemo") {
+#if __OHOS__
+      SHERPA_ONNX_LOGE("Expect a NeMo model, given: %{public}s",
+                       framework.c_str());
+#else
       SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str());
+#endif
       exit(-1);
     }
   }
@@ -119,11 +136,10 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
     const SpeakerEmbeddingExtractorConfig &config)
     : impl_(std::make_unique<Impl>(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
-    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
     : impl_(std::make_unique<Impl>(mgr, config)) {}
-#endif
 
 SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() =
     default;
@@ -142,4 +158,14 @@ OrtAllocator *SpeakerEmbeddingExtractorNeMoModel::Allocator() const {
   return impl_->Allocator();
 }
 
+#if __ANDROID_API__ >= 9
+template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
+    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
+#if __OHOS__
+template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
+    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h
index af0623724f..ed61ee8b59 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h
@@ -6,11 +6,6 @@
 
 #include <memory>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"
 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorNeMoModel {
   explicit SpeakerEmbeddingExtractorNeMoModel(
       const SpeakerEmbeddingExtractorConfig &config);
 
-#if __ANDROID_API__ >= 9
+  template <typename Manager>
   SpeakerEmbeddingExtractorNeMoModel(
-      AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
-#endif
+      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);
 
   ~SpeakerEmbeddingExtractorNeMoModel();
 
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor.cc b/sherpa-onnx/csrc/speaker-embedding-extractor.cc
index 1c99de1a0e..5d52fb2f9c 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor.cc
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor.cc
@@ -6,6 +6,15 @@
 
 #include <vector>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
@@ -26,12 +35,12 @@ void SpeakerEmbeddingExtractorConfig::Register(ParseOptions *po) {
 
 bool SpeakerEmbeddingExtractorConfig::Validate() const {
   if (model.empty()) {
-    SHERPA_ONNX_LOGE("Please provide --model");
+    SHERPA_ONNX_LOGE("Please provide a speaker embedding extractor model");
     return false;
   }
 
   if (!FileExists(model)) {
-    SHERPA_ONNX_LOGE("--speaker-embedding-model: '%s' does not exist",
+    SHERPA_ONNX_LOGE("speaker embedding extractor model: '%s' does not exist",
                      model.c_str());
     return false;
   }
@@ -55,11 +64,10 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
     const SpeakerEmbeddingExtractorConfig &config)
     : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
-    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
+    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
     : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {}
-#endif
 
 SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default;
 
@@ -77,4 +85,14 @@ std::vector<float> SpeakerEmbeddingExtractor::Compute(OnlineStream *s) const {
   return impl_->Compute(s);
 }
 
+#if __ANDROID_API__ >= 9
+template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
+    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
+#if __OHOS__
+template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
+    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/speaker-embedding-extractor.h b/sherpa-onnx/csrc/speaker-embedding-extractor.h
index 4d9783c859..068e6b8d36 100644
--- a/sherpa-onnx/csrc/speaker-embedding-extractor.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor.h
@@ -9,11 +9,6 @@
 #include <string>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/online-stream.h"
 #include "sherpa-onnx/csrc/parse-options.h"
 
@@ -45,10 +40,9 @@ class SpeakerEmbeddingExtractor {
   explicit SpeakerEmbeddingExtractor(
       const SpeakerEmbeddingExtractorConfig &config);
 
-#if __ANDROID_API__ >= 9
-  SpeakerEmbeddingExtractor(AAssetManager *mgr,
+  template <typename Manager>
+  SpeakerEmbeddingExtractor(Manager *mgr,
                             const SpeakerEmbeddingExtractorConfig &config);
-#endif
 
   ~SpeakerEmbeddingExtractor();
 
diff --git a/sherpa-onnx/csrc/spoken-language-identification-impl.cc b/sherpa-onnx/csrc/spoken-language-identification-impl.cc
index 109b48789e..5b29df4842 100644
--- a/sherpa-onnx/csrc/spoken-language-identification-impl.cc
+++ b/sherpa-onnx/csrc/spoken-language-identification-impl.cc
@@ -42,8 +42,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
 
   Ort::AllocatorWithDefaultOptions allocator;
   auto model_type =
-      meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
-  if (!model_type) {
+      LookupCustomModelMetaData(meta_data, "model_type", allocator);
+  if (model_type.empty()) {
     SHERPA_ONNX_LOGE(
         "No model_type in the metadata!\n"
         "Please make sure you have added metadata to the model.\n\n"
@@ -54,11 +54,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
     return ModelType::kUnknown;
   }
 
-  auto model_type_str = std::string(model_type.get());
-  if (model_type_str.find("whisper") == 0) {
+  if (model_type.find("whisper") == 0) {
     return ModelType::kWhisper;
   } else {
-    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.get());
+    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
     return ModelType::kUnknown;
   }
 }
diff --git a/sherpa-onnx/csrc/symbol-table.cc b/sherpa-onnx/csrc/symbol-table.cc
index 5655c03a8e..8456cf7770 100644
--- a/sherpa-onnx/csrc/symbol-table.cc
+++ b/sherpa-onnx/csrc/symbol-table.cc
@@ -4,22 +4,152 @@
 
 #include "sherpa-onnx/csrc/symbol-table.h"
 
+#include <algorithm>
 #include <cassert>
+#include <cctype>
 #include <fstream>
 #include <sstream>
+#include <string>
+#include <strstream>
+#include <utility>
 
 #if __ANDROID_API__ >= 9
-#include <strstream>
 
 #include "android/asset_manager.h"
 #include "android/asset_manager_jni.h"
 #endif
 
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/base64-decode.h"
+#include "sherpa-onnx/csrc/bbpe.h"
+#include "sherpa-onnx/csrc/lexicon.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
+#include "sherpa-onnx/csrc/text-utils.h"
 
 namespace sherpa_onnx {
 
+namespace {
+// copied from
+// https://stackoverflow.com/questions/216823/how-to-trim-a-stdstring
+const char *ws = " \t\n\r\f\v";
+
+// trim from end of string (right)
+inline void TrimRight(std::string *s, const char *t = ws) {
+  s->erase(s->find_last_not_of(t) + 1);
+}
+
+// trim from beginning of string (left)
+inline void TrimLeft(std::string *s, const char *t = ws) {
+  s->erase(0, s->find_first_not_of(t));
+}
+
+// trim from both ends of string (right then left)
+inline void Trim(std::string *s, const char *t = ws) {
+  TrimRight(s, t);
+  TrimLeft(s, t);
+}
+
+bool IsByteBPE(const char *s, int32_t n) {
+  const uint8_t *p = reinterpret_cast<const uint8_t *>(s);
+  if (n >= 3 && p[0] == 0xe2 && p[1] == 0x96 && p[2] == 0x81) {
+    return IsByteBPE(s + 3, n - 3);
+  }
+
+  for (int32_t i = 0; i != n; ++i) {
+    if (p[i] > 0xc6) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool IsByteBPE(const std::unordered_map<std::string, int32_t> &sym2id) {
+  uint8_t max_v = 0;
+  for (const auto &p : sym2id) {
+    const auto &s = p.first;
+    if (!IsByteBPE(s.c_str(), s.size())) {
+      return false;
+    }
+
+    uint8_t m = 0;
+    if (s.size() >= 3) {
+      const uint8_t *p = reinterpret_cast<const uint8_t *>(s.c_str());
+
+      if (p[0] == 0xe2 && p[1] == 0x96 && p[2] == 0x81) {
+        if (s.size() > 3) {
+          m = *std::max_element(
+              reinterpret_cast<const uint8_t *>(s.data()) + 3,
+              reinterpret_cast<const uint8_t *>(s.data()) + s.size());
+        } else {
+          m = 0;
+        }
+      } else {
+        m = *std::max_element(
+            reinterpret_cast<const uint8_t *>(s.data()),
+            reinterpret_cast<const uint8_t *>(s.data()) + s.size());
+      }
+    } else {
+      m = *std::max_element(
+          reinterpret_cast<const uint8_t *>(s.data()),
+          reinterpret_cast<const uint8_t *>(s.data()) + s.size());
+    }
+
+    max_v = (m > max_v) ? m : max_v;
+  }
+
+  return static_cast<uint8_t>(max_v) == 0xc6;
+}
+
+}  // namespace
+
+std::unordered_map<std::string, int32_t> ReadTokens(
+    std::istream &is,
+    std::unordered_map<int32_t, std::string> *id2token /*= nullptr*/) {
+  std::unordered_map<std::string, int32_t> token2id;
+
+  std::string line;
+
+  std::string sym;
+  int32_t id = -1;
+  while (std::getline(is, line)) {
+    Trim(&line);
+    std::istringstream iss(line);
+    iss >> sym;
+    if (iss.eof()) {
+      id = atoi(sym.c_str());
+      sym = " ";
+    } else {
+      iss >> id;
+    }
+
+    // eat the trailing \r\n on windows
+    iss >> std::ws;
+    if (!iss.eof()) {
+      SHERPA_ONNX_LOGE("Error: %s", line.c_str());
+      exit(-1);
+    }
+
+#if 0
+    if (token2id.count(sym)) {
+      SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
+                       sym.c_str(), line.c_str(), token2id.at(sym));
+      exit(-1);
+    }
+#endif
+    if (id2token) {
+      id2token->insert({id, sym});
+    }
+
+    token2id.insert({std::move(sym), id});
+  }
+
+  return token2id;
+}
+
 SymbolTable::SymbolTable(const std::string &filename, bool is_file) {
   if (is_file) {
     std::ifstream is(filename);
@@ -30,33 +160,17 @@ SymbolTable::SymbolTable(const std::string &filename, bool is_file) {
   }
 }
 
-#if __ANDROID_API__ >= 9
-SymbolTable::SymbolTable(AAssetManager *mgr, const std::string &filename) {
+template <typename Manager>
+SymbolTable::SymbolTable(Manager *mgr, const std::string &filename) {
   auto buf = ReadFile(mgr, filename);
 
   std::istrstream is(buf.data(), buf.size());
   Init(is);
 }
-#endif
 
 void SymbolTable::Init(std::istream &is) {
-  std::string sym;
-  int32_t id = 0;
-  while (is >> sym >> id) {
-#if 0
-    // we disable the test here since for some multi-lingual BPE models
-    // from NeMo, the same symbol can appear multiple times with different IDs.
-    if (sym != " ") {
-      assert(sym2id_.count(sym) == 0);
-    }
-#endif
-
-    assert(id2sym_.count(id) == 0);
-
-    sym2id_.insert({sym, id});
-    id2sym_.insert({id, sym});
-  }
-  assert(is.eof());
+  sym2id_ = ReadTokens(is, &id2sym_);
+  is_bbpe_ = IsByteBPE(sym2id_);
 }
 
 std::string SymbolTable::ToString() const {
@@ -70,7 +184,7 @@ std::string SymbolTable::ToString() const {
 
 const std::string SymbolTable::operator[](int32_t id) const {
   std::string sym = id2sym_.at(id);
-  if (sym.size() >= 3) {
+  if (sym.size() >= 3 && !is_bbpe_) {
     // For BPE-based models, we replace ▁ with a space
     // Unicode 9601, hex 0x2581, utf8 0xe29681
     const uint8_t *p = reinterpret_cast<const uint8_t *>(sym.c_str());
@@ -79,8 +193,10 @@ const std::string SymbolTable::operator[](int32_t id) const {
     }
   }
 
-  // for byte-level BPE
+  // for BPE with byte_fallback
   // id 0 is blank, id 1 is sos/eos, id 2 is unk
+  //
+  // Note: For moonshine models, 0 is <unk>, 1, is <s>, 2 is</s>
   if (id >= 3 && id <= 258 && sym.size() == 6 && sym[0] == '<' &&
       sym[1] == '0' && sym[2] == 'x' && sym[5] == '>') {
     std::ostringstream os;
@@ -116,4 +232,41 @@ void SymbolTable::ApplyBase64Decode() {
   }
 }
 
+std::string SymbolTable::DecodeByteBpe(const std::string &text) const {
+  if (!is_bbpe_) {
+    return text;
+  }
+  auto v = SplitUtf8(text);
+
+  const auto &bbpe_table = GetByteBpeTable();
+  std::string ans;
+  for (const auto &s : v) {
+    if (s == "▁") {
+      if (!ans.empty() && ans.back() != ' ' && std::isprint(ans.back())) {
+        ans.push_back(' ');
+      }
+    } else if (bbpe_table.count(s)) {
+      ans.push_back(bbpe_table.at(s));
+    } else if (std::isprint(s[0])) {
+      ans.append(s);
+    } else {
+      // Should not happen
+      SHERPA_ONNX_LOGE("Skip OOV: %s from %s", s.c_str(), text.c_str());
+    }
+  }
+
+  // TODO(fangjun): Filter invalid utf-8 sequences
+  return ans;
+}
+
+#if __ANDROID_API__ >= 9
+template SymbolTable::SymbolTable(AAssetManager *mgr,
+                                  const std::string &filename);
+#endif
+
+#if __OHOS__
+template SymbolTable::SymbolTable(NativeResourceManager *mgr,
+                                  const std::string &filename);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/symbol-table.h b/sherpa-onnx/csrc/symbol-table.h
index 2c17b4d5e4..5950d60086 100644
--- a/sherpa-onnx/csrc/symbol-table.h
+++ b/sherpa-onnx/csrc/symbol-table.h
@@ -5,16 +5,23 @@
 #ifndef SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
 #define SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
 
+#include <istream>
 #include <string>
 #include <unordered_map>
-
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
+#include <vector>
 
 namespace sherpa_onnx {
 
+// The same token can be mapped to different integer IDs, so
+// we need an id2token argument here.
+std::unordered_map<std::string, int32_t> ReadTokens(
+    std::istream &is,
+    std::unordered_map<int32_t, std::string> *id2token = nullptr);
+
+std::vector<int32_t> ConvertTokensToIds(
+    const std::unordered_map<std::string, int32_t> &token2id,
+    const std::vector<std::string> &tokens);
+
 /// It manages mapping between symbols and integer IDs.
 class SymbolTable {
  public:
@@ -27,9 +34,8 @@ class SymbolTable {
   /// Fields are separated by space(s).
   explicit SymbolTable(const std::string &filename, bool is_file = true);
 
-#if __ANDROID_API__ >= 9
-  SymbolTable(AAssetManager *mgr, const std::string &filename);
-#endif
+  template <typename Manager>
+  SymbolTable(Manager *mgr, const std::string &filename);
 
   /// Return a string representation of this symbol table
   std::string ToString() const;
@@ -50,12 +56,17 @@ class SymbolTable {
 
   int32_t NumSymbols() const { return id2sym_.size(); }
 
+  std::string DecodeByteBpe(const std::string &text) const;
+
+  bool IsByteBpe() const { return is_bbpe_; }
+
  private:
   void Init(std::istream &is);
 
  private:
   std::unordered_map<std::string, int32_t> sym2id_;
   std::unordered_map<int32_t, std::string> id2sym_;
+  bool is_bbpe_ = false;
 };
 
 std::ostream &operator<<(std::ostream &os, const SymbolTable &symbol_table);
diff --git a/sherpa-onnx/csrc/text-utils-test.cc b/sherpa-onnx/csrc/text-utils-test.cc
new file mode 100644
index 0000000000..15558f166c
--- /dev/null
+++ b/sherpa-onnx/csrc/text-utils-test.cc
@@ -0,0 +1,50 @@
+// sherpa-onnx/csrc/text-utils-test.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/text-utils.h"
+
+#include "gtest/gtest.h"
+
+namespace sherpa_onnx {
+
+TEST(RemoveInvalidUtf8Sequences, Case1) {
+  std::vector<uint8_t> v = {
+      0xe4, 0xbb, 0x8a,                                  // 今
+      0xe5, 0xa4, 0xa9,                                  // 天
+      'i',  's',  ' ',  'M', 'o', 'd', 'a', 'y',  ',',   // is Monday,
+      ' ',  'w',  'i',  'e', ' ', 'h', 'e', 'i',  0xc3,  // wie heißen Size
+      0x9f, 'e',  'n',  ' ', 'S', 'i', 'e', 0xf0, 0x9d, 0x84, 0x81};
+
+  std::vector<uint8_t> v0 = v;
+  v0[1] = 0xc0;  // make the first 3 bytes an invalid utf8 character
+  std::string s0{v0.begin(), v0.end()};
+  EXPECT_EQ(s0.size(), v0.size());
+
+  auto s = RemoveInvalidUtf8Sequences(s0);  // should remove 今
+
+  v0 = v;
+  // v0[23] == 0xc3
+  // v0[24] == 0x9f
+
+  v0[23] = 0xc1;
+
+  s0 = {v0.begin(), v0.end()};
+  s = RemoveInvalidUtf8Sequences(s0);  // should remove ß
+
+  EXPECT_EQ(s.size() + 2, v.size());
+
+  v0 = v;
+  // v0[31] = 0xf0;
+  // v0[32] = 0x9d;
+  // v0[33] = 0x84;
+  // v0[34] = 0x81;
+  v0[31] = 0xf5;
+
+  s0 = {v0.begin(), v0.end()};
+  s = RemoveInvalidUtf8Sequences(s0);
+
+  EXPECT_EQ(s.size() + 4, v.size());
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/text-utils.cc b/sherpa-onnx/csrc/text-utils.cc
index 3f12e1460d..7259ed7c4a 100644
--- a/sherpa-onnx/csrc/text-utils.cc
+++ b/sherpa-onnx/csrc/text-utils.cc
@@ -396,4 +396,110 @@ void ToLowerCase(std::string *in_out) {
                  [](unsigned char c) { return std::tolower(c); });
 }
 
+static inline bool InRange(uint8_t x, uint8_t low, uint8_t high) {
+  return low <= x && x <= high;
+}
+
+/*
+Please see
+https://stackoverflow.com/questions/6555015/check-for-invalid-utf8
+
+
+Table 3-7. Well-Formed UTF-8 Byte Sequences
+
+Code Points        First Byte Second Byte Third Byte Fourth Byte
+U+0000..U+007F     00..7F
+U+0080..U+07FF     C2..DF     80..BF
+U+0800..U+0FFF     E0         A0..BF      80..BF
+U+1000..U+CFFF     E1..EC     80..BF      80..BF
+U+D000..U+D7FF     ED         80..9F      80..BF
+U+E000..U+FFFF     EE..EF     80..BF      80..BF
+U+10000..U+3FFFF   F0         90..BF      80..BF     80..BF
+U+40000..U+FFFFF   F1..F3     80..BF      80..BF     80..BF
+U+100000..U+10FFFF F4         80..8F      80..BF     80..BF
+ */
+std::string RemoveInvalidUtf8Sequences(const std::string &text,
+                                       bool show_debug_msg /*= false*/) {
+  int32_t n = static_cast<int32_t>(text.size());
+
+  std::string ans;
+  ans.reserve(n);
+
+  int32_t i = 0;
+  const uint8_t *p = reinterpret_cast<const uint8_t *>(text.data());
+  while (i < n) {
+    if (p[i] <= 0x7f) {
+      ans.append(text, i, 1);
+      i += 1;
+      continue;
+    }
+
+    if (InRange(p[i], 0xc2, 0xdf) && i + 1 < n &&
+        InRange(p[i + 1], 0x80, 0xbf)) {
+      ans.append(text, i, 2);
+      i += 2;
+      continue;
+    }
+
+    if (p[i] == 0xe0 && i + 2 < n && InRange(p[i + 1], 0xa0, 0xbf) &&
+        InRange(p[i + 2], 0x80, 0xbf)) {
+      ans.append(text, i, 3);
+      i += 3;
+      continue;
+    }
+
+    if (InRange(p[i], 0xe1, 0xec) && i + 2 < n &&
+        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
+      ans.append(text, i, 3);
+      i += 3;
+      continue;
+    }
+
+    if (p[i] == 0xed && i + 2 < n && InRange(p[i + 1], 0x80, 0x9f) &&
+        InRange(p[i + 2], 0x80, 0xbf)) {
+      ans.append(text, i, 3);
+      i += 3;
+      continue;
+    }
+
+    if (InRange(p[i], 0xee, 0xef) && i + 2 < n &&
+        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
+      ans.append(text, i, 3);
+      i += 3;
+      continue;
+    }
+
+    if (p[i] == 0xf0 && i + 3 < n && InRange(p[i + 1], 0x90, 0xbf) &&
+        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
+      ans.append(text, i, 4);
+      i += 4;
+      continue;
+    }
+
+    if (InRange(p[i], 0xf1, 0xf3) && i + 3 < n &&
+        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf) &&
+        InRange(p[i + 3], 0x80, 0xbf)) {
+      ans.append(text, i, 4);
+      i += 4;
+      continue;
+    }
+
+    if (p[i] == 0xf4 && i + 3 < n && InRange(p[i + 1], 0x80, 0x8f) &&
+        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
+      ans.append(text, i, 4);
+      i += 4;
+      continue;
+    }
+
+    if (show_debug_msg) {
+      SHERPA_ONNX_LOGE("Ignore invalid utf8 sequence at pos: %d, value: %02x",
+                       i, p[i]);
+    }
+
+    i += 1;
+  }
+
+  return ans;
+}
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/text-utils.h b/sherpa-onnx/csrc/text-utils.h
index a0b968d8a3..a271370606 100644
--- a/sherpa-onnx/csrc/text-utils.h
+++ b/sherpa-onnx/csrc/text-utils.h
@@ -124,6 +124,9 @@ std::vector<std::string> SplitUtf8(const std::string &text);
 std::string ToLowerCase(const std::string &s);
 void ToLowerCase(std::string *in_out);
 
+std::string RemoveInvalidUtf8Sequences(const std::string &text,
+                                       bool show_debug_msg = false);
+
 }  // namespace sherpa_onnx
 
 #endif  // SHERPA_ONNX_CSRC_TEXT_UTILS_H_
diff --git a/sherpa-onnx/csrc/vad-model.cc b/sherpa-onnx/csrc/vad-model.cc
index be9a5e7fe5..58203bb9c4 100644
--- a/sherpa-onnx/csrc/vad-model.cc
+++ b/sherpa-onnx/csrc/vad-model.cc
@@ -4,6 +4,15 @@
 
 #include "sherpa-onnx/csrc/vad-model.h"
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/silero-vad-model.h"
 
 namespace sherpa_onnx {
@@ -13,12 +22,20 @@ std::unique_ptr<VadModel> VadModel::Create(const VadModelConfig &config) {
   return std::make_unique<SileroVadModel>(config);
 }
 
-#if __ANDROID_API__ >= 9
-std::unique_ptr<VadModel> VadModel::Create(AAssetManager *mgr,
+template <typename Manager>
+std::unique_ptr<VadModel> VadModel::Create(Manager *mgr,
                                            const VadModelConfig &config) {
   // TODO(fangjun): Support other VAD models.
   return std::make_unique<SileroVadModel>(mgr, config);
 }
+
+#if __ANDROID_API__ >= 9
+template std::unique_ptr<VadModel> VadModel::Create(
+    AAssetManager *mgr, const VadModelConfig &config);
 #endif
 
+#if __OHOS__
+template std::unique_ptr<VadModel> VadModel::Create(
+    NativeResourceManager *mgr, const VadModelConfig &config);
+#endif
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/vad-model.h b/sherpa-onnx/csrc/vad-model.h
index 81028f2f1a..aac68bb176 100644
--- a/sherpa-onnx/csrc/vad-model.h
+++ b/sherpa-onnx/csrc/vad-model.h
@@ -6,11 +6,6 @@
 
 #include <memory>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/vad-model-config.h"
 
 namespace sherpa_onnx {
@@ -21,10 +16,9 @@ class VadModel {
 
   static std::unique_ptr<VadModel> Create(const VadModelConfig &config);
 
-#if __ANDROID_API__ >= 9
-  static std::unique_ptr<VadModel> Create(AAssetManager *mgr,
+  template <typename Manager>
+  static std::unique_ptr<VadModel> Create(Manager *mgr,
                                           const VadModelConfig &config);
-#endif
 
   // reset the internal model states
   virtual void Reset() = 0;
diff --git a/sherpa-onnx/csrc/voice-activity-detector.cc b/sherpa-onnx/csrc/voice-activity-detector.cc
index c20d3476dd..8bc8f7c942 100644
--- a/sherpa-onnx/csrc/voice-activity-detector.cc
+++ b/sherpa-onnx/csrc/voice-activity-detector.cc
@@ -8,6 +8,15 @@
 #include <queue>
 #include <utility>
 
+#if __ANDROID_API__ >= 9
+#include "android/asset_manager.h"
+#include "android/asset_manager_jni.h"
+#endif
+
+#if __OHOS__
+#include "rawfile/raw_file_manager.h"
+#endif
+
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/vad-model.h"
 
@@ -22,15 +31,14 @@ class VoiceActivityDetector::Impl {
     Init();
   }
 
-#if __ANDROID_API__ >= 9
-  Impl(AAssetManager *mgr, const VadModelConfig &config,
+  template <typename Manager>
+  Impl(Manager *mgr, const VadModelConfig &config,
        float buffer_size_in_seconds = 60)
       : model_(VadModel::Create(mgr, config)),
         config_(config),
         buffer_(buffer_size_in_seconds * config.sample_rate) {
     Init();
   }
-#endif
 
   void AcceptWaveform(const float *samples, int32_t n) {
     if (buffer_.Size() > max_utterance_length_) {
@@ -177,12 +185,11 @@ VoiceActivityDetector::VoiceActivityDetector(
     const VadModelConfig &config, float buffer_size_in_seconds /*= 60*/)
     : impl_(std::make_unique<Impl>(config, buffer_size_in_seconds)) {}
 
-#if __ANDROID_API__ >= 9
+template <typename Manager>
 VoiceActivityDetector::VoiceActivityDetector(
-    AAssetManager *mgr, const VadModelConfig &config,
+    Manager *mgr, const VadModelConfig &config,
     float buffer_size_in_seconds /*= 60*/)
     : impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}
-#endif
 
 VoiceActivityDetector::~VoiceActivityDetector() = default;
 
@@ -212,4 +219,16 @@ const VadModelConfig &VoiceActivityDetector::GetConfig() const {
   return impl_->GetConfig();
 }
 
+#if __ANDROID_API__ >= 9
+template VoiceActivityDetector::VoiceActivityDetector(
+    AAssetManager *mgr, const VadModelConfig &config,
+    float buffer_size_in_seconds = 60);
+#endif
+
+#if __OHOS__
+template VoiceActivityDetector::VoiceActivityDetector(
+    NativeResourceManager *mgr, const VadModelConfig &config,
+    float buffer_size_in_seconds = 60);
+#endif
+
 }  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/voice-activity-detector.h b/sherpa-onnx/csrc/voice-activity-detector.h
index 9eb53c5544..015cedf040 100644
--- a/sherpa-onnx/csrc/voice-activity-detector.h
+++ b/sherpa-onnx/csrc/voice-activity-detector.h
@@ -7,11 +7,6 @@
 #include <memory>
 #include <vector>
 
-#if __ANDROID_API__ >= 9
-#include "android/asset_manager.h"
-#include "android/asset_manager_jni.h"
-#endif
-
 #include "sherpa-onnx/csrc/vad-model-config.h"
 
 namespace sherpa_onnx {
@@ -26,10 +21,9 @@ class VoiceActivityDetector {
   explicit VoiceActivityDetector(const VadModelConfig &config,
                                  float buffer_size_in_seconds = 60);
 
-#if __ANDROID_API__ >= 9
-  VoiceActivityDetector(AAssetManager *mgr, const VadModelConfig &config,
+  template <typename Manager>
+  VoiceActivityDetector(Manager *mgr, const VadModelConfig &config,
                         float buffer_size_in_seconds = 60);
-#endif
 
   ~VoiceActivityDetector();
 
diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile
index 69c3631b4c..0721daf168 100644
--- a/sherpa-onnx/java-api/Makefile
+++ b/sherpa-onnx/java-api/Makefile
@@ -26,6 +26,7 @@ java_files += OnlineRecognizer.java
 java_files += OfflineTransducerModelConfig.java
 java_files += OfflineParaformerModelConfig.java
 java_files += OfflineWhisperModelConfig.java
+java_files += OfflineMoonshineModelConfig.java
 java_files += OfflineNemoEncDecCtcModelConfig.java
 java_files += OfflineSenseVoiceModelConfig.java
 java_files += OfflineModelConfig.java
@@ -34,6 +35,8 @@ java_files += OfflineRecognizerResult.java
 java_files += OfflineStream.java
 java_files += OfflineRecognizer.java
 
+java_files += OfflineTtsKokoroModelConfig.java
+java_files += OfflineTtsMatchaModelConfig.java
 java_files += OfflineTtsVitsModelConfig.java
 java_files += OfflineTtsModelConfig.java
 java_files += OfflineTtsConfig.java
@@ -68,6 +71,15 @@ java_files += KeywordSpotterConfig.java
 java_files += KeywordSpotterResult.java
 java_files += KeywordSpotter.java
 
+java_files += OfflineSpeakerSegmentationPyannoteModelConfig.java
+java_files += OfflineSpeakerSegmentationModelConfig.java
+java_files += FastClusteringConfig.java
+java_files += OfflineSpeakerDiarizationConfig.java
+java_files += OfflineSpeakerDiarizationSegment.java
+java_files += OfflineSpeakerDiarizationCallback.java
+java_files += OfflineSpeakerDiarization.java
+
+
 class_files := $(java_files:%.java=%.class)
 
 java_files := $(addprefix src/$(package_dir)/,$(java_files))
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java
new file mode 100644
index 0000000000..f2e957259b
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java
@@ -0,0 +1,44 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class FastClusteringConfig {
+    private final int numClusters;
+    private final float threshold;
+
+    private FastClusteringConfig(Builder builder) {
+        this.numClusters = builder.numClusters;
+        this.threshold = builder.threshold;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public int getNumClusters() {
+        return numClusters;
+    }
+
+    public float getThreshold() {
+        return threshold;
+    }
+
+    public static class Builder {
+        private int numClusters = -1;
+        private float threshold = 0.5f;
+
+        public FastClusteringConfig build() {
+            return new FastClusteringConfig(this);
+        }
+
+        public Builder setNumClusters(int numClusters) {
+            this.numClusters = numClusters;
+            return this;
+        }
+
+        public Builder setThreshold(float threshold) {
+            this.threshold = threshold;
+            return this;
+        }
+    }
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotter.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotter.java
index a1b897b0d2..3565c05e47 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotter.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/KeywordSpotter.java
@@ -27,6 +27,10 @@ public void decode(OnlineStream s) {
         decode(ptr, s.getPtr());
     }
 
+    public void reset(OnlineStream s) {
+        reset(ptr, s.getPtr());
+    }
+
     public boolean isReady(OnlineStream s) {
         return isReady(ptr, s.getPtr());
     }
@@ -60,6 +64,8 @@ public void release() {
 
     private native void decode(long ptr, long streamPtr);
 
+    private native void reset(long ptr, long streamPtr);
+
     private native boolean isReady(long ptr, long streamPtr);
 
     private native Object[] getResult(long ptr, long streamPtr);
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
index 0576b0dbcc..4d0192b6b9 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
@@ -6,6 +6,7 @@ public class OfflineModelConfig {
     private final OfflineTransducerModelConfig transducer;
     private final OfflineParaformerModelConfig paraformer;
     private final OfflineWhisperModelConfig whisper;
+    private final OfflineMoonshineModelConfig moonshine;
     private final OfflineNemoEncDecCtcModelConfig nemo;
     private final OfflineSenseVoiceModelConfig senseVoice;
     private final String teleSpeech;
@@ -22,6 +23,7 @@ private OfflineModelConfig(Builder builder) {
         this.transducer = builder.transducer;
         this.paraformer = builder.paraformer;
         this.whisper = builder.whisper;
+        this.moonshine = builder.moonshine;
         this.nemo = builder.nemo;
         this.senseVoice = builder.senseVoice;
         this.teleSpeech = builder.teleSpeech;
@@ -50,6 +52,10 @@ public OfflineWhisperModelConfig getZipformer2Ctc() {
         return whisper;
     }
 
+    public OfflineMoonshineModelConfig getMoonshine() {
+        return moonshine;
+    }
+
     public OfflineSenseVoiceModelConfig getSenseVoice() {
         return senseVoice;
     }
@@ -90,6 +96,7 @@ public static class Builder {
         private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build();
         private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build();
         private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build();
+        private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build();
         private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build();
         private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
         private String teleSpeech = "";
@@ -135,6 +142,11 @@ public Builder setSenseVoice(OfflineSenseVoiceModelConfig senseVoice) {
             return this;
         }
 
+        public Builder setMoonshine(OfflineMoonshineModelConfig moonshine) {
+            this.moonshine = moonshine;
+            return this;
+        }
+
         public Builder setTokens(String tokens) {
             this.tokens = tokens;
             return this;
@@ -160,12 +172,14 @@ public Builder setModelType(String modelType) {
             return this;
         }
 
-        public void setModelingUnit(String modelingUnit) {
+        public Builder setModelingUnit(String modelingUnit) {
             this.modelingUnit = modelingUnit;
+            return this;
         }
 
-        public void setBpeVocab(String bpeVocab) {
+        public Builder setBpeVocab(String bpeVocab) {
             this.bpeVocab = bpeVocab;
+            return this;
         }
     }
 }
\ No newline at end of file
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java
new file mode 100644
index 0000000000..1a324bba29
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java
@@ -0,0 +1,70 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineMoonshineModelConfig {
+    private final String preprocessor;
+    private final String encoder;
+    private final String uncachedDecoder;
+    private final String cachedDecoder;
+
+    private OfflineMoonshineModelConfig(Builder builder) {
+        this.preprocessor = builder.preprocessor;
+        this.encoder = builder.encoder;
+        this.uncachedDecoder = builder.uncachedDecoder;
+        this.cachedDecoder = builder.cachedDecoder;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public String getPreprocessor() {
+        return preprocessor;
+    }
+
+    public String getEncoder() {
+        return encoder;
+    }
+
+    public String getUncachedDecoder() {
+        return uncachedDecoder;
+    }
+
+    public String getCachedDecoder() {
+        return cachedDecoder;
+    }
+
+    public static class Builder {
+        private String preprocessor = "";
+        private String encoder = "";
+        private String uncachedDecoder = "";
+        private String cachedDecoder = "";
+
+        public OfflineMoonshineModelConfig build() {
+            return new OfflineMoonshineModelConfig(this);
+        }
+
+        public Builder setPreprocessor(String preprocessor) {
+            this.preprocessor = preprocessor;
+            return this;
+        }
+
+        public Builder setEncoder(String encoder) {
+            this.encoder = encoder;
+            return this;
+        }
+
+        public Builder setUncachedDecoder(String uncachedDecoder) {
+            this.uncachedDecoder = uncachedDecoder;
+            return this;
+        }
+
+        public Builder setCachedDecoder(String cachedDecoder) {
+            this.cachedDecoder = cachedDecoder;
+            return this;
+        }
+    }
+
+
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java
new file mode 100644
index 0000000000..b75cd09ea1
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java
@@ -0,0 +1,61 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineSpeakerDiarization {
+    static {
+        System.loadLibrary("sherpa-onnx-jni");
+    }
+
+    private long ptr = 0;
+
+    public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
+        ptr = newFromFile(config);
+    }
+
+    public int getSampleRate() {
+        return getSampleRate(ptr);
+    }
+
+    // Only config.clustering is used. All other fields are ignored
+    public void setConfig(OfflineSpeakerDiarizationConfig config) {
+        setConfig(ptr, config);
+    }
+
+    public OfflineSpeakerDiarizationSegment[] process(float[] samples) {
+        return process(ptr, samples);
+    }
+
+    public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback) {
+        return processWithCallback(ptr, samples, callback, 0);
+    }
+
+    public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback, long arg) {
+        return processWithCallback(ptr, samples, callback, arg);
+    }
+
+    protected void finalize() throws Throwable {
+        release();
+    }
+
+    // You'd better call it manually if it is not used anymore
+    public void release() {
+        if (this.ptr == 0) {
+            return;
+        }
+        delete(this.ptr);
+        this.ptr = 0;
+    }
+
+    private native int getSampleRate(long ptr);
+
+    private native void delete(long ptr);
+
+    private native long newFromFile(OfflineSpeakerDiarizationConfig config);
+
+    private native void setConfig(long ptr, OfflineSpeakerDiarizationConfig config);
+
+    private native OfflineSpeakerDiarizationSegment[] process(long ptr, float[] samples);
+
+    private native OfflineSpeakerDiarizationSegment[] processWithCallback(long ptr, float[] samples, OfflineSpeakerDiarizationCallback callback, long arg);
+}
\ No newline at end of file
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java
new file mode 100644
index 0000000000..7787386d3d
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java
@@ -0,0 +1,8 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+@FunctionalInterface
+public interface OfflineSpeakerDiarizationCallback {
+    Integer invoke(int numProcessedChunks, int numTotalCunks, long arg);
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java
new file mode 100644
index 0000000000..9965c5742f
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java
@@ -0,0 +1,79 @@
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineSpeakerDiarizationConfig {
+    private final OfflineSpeakerSegmentationModelConfig segmentation;
+    private final SpeakerEmbeddingExtractorConfig embedding;
+    private final FastClusteringConfig clustering;
+    private final float minDurationOn;
+    private final float minDurationOff;
+
+    private OfflineSpeakerDiarizationConfig(Builder builder) {
+        this.segmentation = builder.segmentation;
+        this.embedding = builder.embedding;
+        this.clustering = builder.clustering;
+        this.minDurationOff = builder.minDurationOff;
+        this.minDurationOn = builder.minDurationOn;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public OfflineSpeakerSegmentationModelConfig getSegmentation() {
+        return segmentation;
+    }
+
+    public SpeakerEmbeddingExtractorConfig getEmbedding() {
+        return embedding;
+    }
+
+    public FastClusteringConfig getClustering() {
+        return clustering;
+    }
+
+    public float getMinDurationOff() {
+        return minDurationOff;
+    }
+
+    public float getMinDurationOn() {
+        return minDurationOn;
+    }
+
+    public static class Builder {
+        private OfflineSpeakerSegmentationModelConfig segmentation = OfflineSpeakerSegmentationModelConfig.builder().build();
+        private SpeakerEmbeddingExtractorConfig embedding = SpeakerEmbeddingExtractorConfig.builder().build();
+        private FastClusteringConfig clustering = FastClusteringConfig.builder().build();
+        private float minDurationOn = 0.2f;
+        private float minDurationOff = 0.5f;
+
+        public OfflineSpeakerDiarizationConfig build() {
+            return new OfflineSpeakerDiarizationConfig(this);
+        }
+
+        public Builder setSegmentation(OfflineSpeakerSegmentationModelConfig segmentation) {
+            this.segmentation = segmentation;
+            return this;
+        }
+
+        public Builder setEmbedding(SpeakerEmbeddingExtractorConfig embedding) {
+            this.embedding = embedding;
+            return this;
+        }
+
+        public Builder setClustering(FastClusteringConfig clustering) {
+            this.clustering = clustering;
+            return this;
+        }
+
+        public Builder setMinDurationOff(float minDurationOff) {
+            this.minDurationOff = minDurationOff;
+            return this;
+        }
+
+        public Builder setMinDurationOn(float minDurationOn) {
+            this.minDurationOn = minDurationOn;
+            return this;
+        }
+    }
+
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java
new file mode 100644
index 0000000000..1bb1a7635f
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java
@@ -0,0 +1,27 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineSpeakerDiarizationSegment {
+    private final float start;
+    private final float end;
+    private final int speaker;
+
+    public OfflineSpeakerDiarizationSegment(float start, float end, int speaker) {
+        this.start = start;
+        this.end = end;
+        this.speaker = speaker;
+    }
+
+    public float getStart() {
+        return start;
+    }
+
+    public float getEnd() {
+        return end;
+    }
+
+    public int getSpeaker() {
+        return speaker;
+    }
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java
new file mode 100644
index 0000000000..55df6c2956
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java
@@ -0,0 +1,52 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineSpeakerSegmentationModelConfig {
+    private final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
+    private final int numThreads;
+    private final boolean debug;
+    private final String provider;
+
+    private OfflineSpeakerSegmentationModelConfig(Builder builder) {
+        this.pyannote = builder.pyannote;
+        this.numThreads = builder.numThreads;
+        this.debug = builder.debug;
+        this.provider = builder.provider;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public static class Builder {
+        private OfflineSpeakerSegmentationPyannoteModelConfig pyannote = OfflineSpeakerSegmentationPyannoteModelConfig.builder().build();
+        private int numThreads = 1;
+        private boolean debug = true;
+        private String provider = "cpu";
+
+        public OfflineSpeakerSegmentationModelConfig build() {
+            return new OfflineSpeakerSegmentationModelConfig(this);
+        }
+
+        public Builder setPyannote(OfflineSpeakerSegmentationPyannoteModelConfig pyannote) {
+            this.pyannote = pyannote;
+            return this;
+        }
+
+        public Builder setNumThreads(int numThreads) {
+            this.numThreads = numThreads;
+            return this;
+        }
+
+        public Builder setDebug(boolean debug) {
+            this.debug = debug;
+            return this;
+        }
+
+        public Builder setProvider(String provider) {
+            this.provider = provider;
+            return this;
+        }
+    }
+}
\ No newline at end of file
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java
new file mode 100644
index 0000000000..51fd998742
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java
@@ -0,0 +1,32 @@
+// Copyright 2024 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineSpeakerSegmentationPyannoteModelConfig {
+    private final String model;
+
+    private OfflineSpeakerSegmentationPyannoteModelConfig(Builder builder) {
+        this.model = builder.model;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public String getModel() {
+        return model;
+    }
+
+    public static class Builder {
+        private String model = "";
+
+        public OfflineSpeakerSegmentationPyannoteModelConfig build() {
+            return new OfflineSpeakerSegmentationPyannoteModelConfig(this);
+        }
+
+        public Builder setModel(String model) {
+            this.model = model;
+            return this;
+        }
+    }
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
index 396594a966..2fc1d45dde 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
@@ -1,3 +1,5 @@
+// Copyright 2024 Xiaomi Corporation
+
 package com.k2fsa.sherpa.onnx;
 
 @FunctionalInterface
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java
new file mode 100644
index 0000000000..67fbf5ea89
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java
@@ -0,0 +1,96 @@
+// Copyright 2025 Xiaomi Corporation
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineTtsKokoroModelConfig {
+    private final String model;
+    private final String voices;
+    private final String tokens;
+    private final String lexicon;
+    private final String dataDir;
+    private final String dictDir;
+    private final float lengthScale;
+
+    private OfflineTtsKokoroModelConfig(Builder builder) {
+        this.model = builder.model;
+        this.voices = builder.voices;
+        this.tokens = builder.tokens;
+        this.lexicon = builder.lexicon;
+        this.dataDir = builder.dataDir;
+        this.dictDir = builder.dictDir;
+        this.lengthScale = builder.lengthScale;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public String getModel() {
+        return model;
+    }
+
+    public String getVoices() {
+        return voices;
+    }
+
+    public String getTokens() {
+        return tokens;
+    }
+
+    public String getDataDir() {
+        return dataDir;
+    }
+
+    public float getLengthScale() {
+        return lengthScale;
+    }
+
+
+    public static class Builder {
+        private String model = "";
+        private String voices = "";
+        private String tokens = "";
+        private String lexicon = "";
+        private String dataDir = "";
+        private String dictDir = "";
+        private float lengthScale = 1.0f;
+
+        public OfflineTtsKokoroModelConfig build() {
+            return new OfflineTtsKokoroModelConfig(this);
+        }
+
+        public Builder setModel(String model) {
+            this.model = model;
+            return this;
+        }
+
+        public Builder setVoices(String voices) {
+            this.voices = voices;
+            return this;
+        }
+
+        public Builder setTokens(String tokens) {
+            this.tokens = tokens;
+            return this;
+        }
+
+        public Builder setLexicon(String lexicon) {
+            this.lexicon = lexicon;
+            return this;
+        }
+
+        public Builder setDataDir(String dataDir) {
+            this.dataDir = dataDir;
+            return this;
+        }
+
+        public Builder setDictDir(String dictDir) {
+            this.dictDir = dictDir;
+            return this;
+        }
+
+        public Builder setLengthScale(float lengthScale) {
+            this.lengthScale = lengthScale;
+            return this;
+        }
+    }
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java
new file mode 100644
index 0000000000..8a95aea755
--- /dev/null
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java
@@ -0,0 +1,116 @@
+// Copyright 2025 Xiaomi Corporation
+
+package com.k2fsa.sherpa.onnx;
+
+public class OfflineTtsMatchaModelConfig {
+    private final String acousticModel;
+    private final String vocoder;
+    private final String lexicon;
+    private final String tokens;
+    private final String dataDir;
+    private final String dictDir;
+    private final float noiseScale;
+    private final float lengthScale;
+
+    private OfflineTtsMatchaModelConfig(Builder builder) {
+        this.acousticModel = builder.acousticModel;
+        this.vocoder = builder.vocoder;
+        this.lexicon = builder.lexicon;
+        this.tokens = builder.tokens;
+        this.dataDir = builder.dataDir;
+        this.dictDir = builder.dictDir;
+        this.noiseScale = builder.noiseScale;
+        this.lengthScale = builder.lengthScale;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public String getAcousticModel() {
+        return acousticModel;
+    }
+
+    public String getVocoder() {
+        return vocoder;
+    }
+
+    public String getLexicon() {
+        return lexicon;
+    }
+
+    public String getTokens() {
+        return tokens;
+    }
+
+    public String getDataDir() {
+        return dataDir;
+    }
+
+    public String getDictDir() {
+        return dictDir;
+    }
+
+    public float getLengthScale() {
+        return lengthScale;
+    }
+
+    public float getNoiseScale() {
+        return noiseScale;
+    }
+
+    public static class Builder {
+        private String acousticModel = "";
+        private String vocoder = "";
+        private String lexicon = "";
+        private String tokens = "";
+        private String dataDir = "";
+        private String dictDir = "";
+        private float noiseScale = 1.0f;
+        private float lengthScale = 1.0f;
+
+        public OfflineTtsMatchaModelConfig build() {
+            return new OfflineTtsMatchaModelConfig(this);
+        }
+
+        public Builder setAcousticModel(String acousticModel) {
+            this.acousticModel = acousticModel;
+            return this;
+        }
+
+        public Builder setVocoder(String vocoder) {
+            this.vocoder = vocoder;
+            return this;
+        }
+
+        public Builder setTokens(String tokens) {
+            this.tokens = tokens;
+            return this;
+        }
+
+        public Builder setLexicon(String lexicon) {
+            this.lexicon = lexicon;
+            return this;
+        }
+
+        public Builder setDataDir(String dataDir) {
+            this.dataDir = dataDir;
+            return this;
+        }
+
+        public Builder setDictDir(String dictDir) {
+            this.dictDir = dictDir;
+            return this;
+        }
+
+        public Builder setNoiseScale(float noiseScale) {
+            this.noiseScale = noiseScale;
+            return this;
+        }
+
+        public Builder setLengthScale(float lengthScale) {
+            this.lengthScale = lengthScale;
+            return this;
+        }
+    }
+}
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
index 52960217c3..24df8a5d3a 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
@@ -4,12 +4,16 @@
 
 public class OfflineTtsModelConfig {
     private final OfflineTtsVitsModelConfig vits;
+    private final OfflineTtsMatchaModelConfig matcha;
+    private final OfflineTtsKokoroModelConfig kokoro;
     private final int numThreads;
     private final boolean debug;
     private final String provider;
 
     private OfflineTtsModelConfig(Builder builder) {
         this.vits = builder.vits;
+        this.matcha = builder.matcha;
+        this.kokoro = builder.kokoro;
         this.numThreads = builder.numThreads;
         this.debug = builder.debug;
         this.provider = builder.provider;
@@ -23,8 +27,18 @@ public OfflineTtsVitsModelConfig getVits() {
         return vits;
     }
 
+    public OfflineTtsMatchaModelConfig getMatcha() {
+        return matcha;
+    }
+
+    public OfflineTtsKokoroModelConfig getKokoro() {
+        return kokoro;
+    }
+
     public static class Builder {
         private OfflineTtsVitsModelConfig vits = OfflineTtsVitsModelConfig.builder().build();
+        private OfflineTtsMatchaModelConfig matcha = OfflineTtsMatchaModelConfig.builder().build();
+        private OfflineTtsKokoroModelConfig kokoro = OfflineTtsKokoroModelConfig.builder().build();
         private int numThreads = 1;
         private boolean debug = true;
         private String provider = "cpu";
@@ -38,6 +52,16 @@ public Builder setVits(OfflineTtsVitsModelConfig vits) {
             return this;
         }
 
+        public Builder setMatcha(OfflineTtsMatchaModelConfig matcha) {
+            this.matcha = matcha;
+            return this;
+        }
+
+        public Builder setKokoro(OfflineTtsKokoroModelConfig kokoro) {
+            this.kokoro = kokoro;
+            return this;
+        }
+
         public Builder setNumThreads(int numThreads) {
             this.numThreads = numThreads;
             return this;
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
index 4cfc9eebd7..35bfd41c6f 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
@@ -60,9 +60,9 @@ public float getNoiseScaleW() {
     }
 
     public static class Builder {
-        private String model;
+        private String model = "";
         private String lexicon = "";
-        private String tokens;
+        private String tokens = "";
         private String dataDir = "";
         private String dictDir = "";
         private float noiseScale = 0.667f;
diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java
index ffc688f348..80f800cdcb 100644
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java
@@ -50,5 +50,4 @@ public Builder setProvider(String provider) {
             return this;
         }
     }
-
 }
diff --git a/sherpa-onnx/jni/CMakeLists.txt b/sherpa-onnx/jni/CMakeLists.txt
index 998379084e..d761bf4e00 100644
--- a/sherpa-onnx/jni/CMakeLists.txt
+++ b/sherpa-onnx/jni/CMakeLists.txt
@@ -33,7 +33,13 @@ if(SHERPA_ONNX_ENABLE_TTS)
   )
 endif()
 
-add_library(sherpa-onnx-jni ${sources})
+if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
+  list(APPEND sources
+    offline-speaker-diarization.cc
+  )
+endif()
+
+add_library(sherpa-onnx-jni SHARED ${sources})
 
 target_compile_definitions(sherpa-onnx-jni PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
 target_compile_definitions(sherpa-onnx-jni PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
diff --git a/sherpa-onnx/jni/audio-tagging.cc b/sherpa-onnx/jni/audio-tagging.cc
index ff8db0089d..7ad6e7d53e 100644
--- a/sherpa-onnx/jni/audio-tagging.cc
+++ b/sherpa-onnx/jni/audio-tagging.cc
@@ -70,6 +70,7 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
 
diff --git a/sherpa-onnx/jni/keyword-spotter.cc b/sherpa-onnx/jni/keyword-spotter.cc
index ca0c229c2b..0dc5685efe 100644
--- a/sherpa-onnx/jni/keyword-spotter.cc
+++ b/sherpa-onnx/jni/keyword-spotter.cc
@@ -115,10 +115,12 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetKwsConfig(env, _config);
   SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
   auto kws = new sherpa_onnx::KeywordSpotter(
 #if __ANDROID_API__ >= 9
       mgr,
@@ -159,6 +161,15 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_decode(
   kws->DecodeStream(stream);
 }
 
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_reset(
+    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
+  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
+  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);
+
+  kws->Reset(stream);
+}
+
 SHERPA_ONNX_EXTERN_C
 JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_createStream(
     JNIEnv *env, jobject /*obj*/, jlong ptr, jstring keywords) {
diff --git a/sherpa-onnx/jni/offline-punctuation.cc b/sherpa-onnx/jni/offline-punctuation.cc
index 5056a3ac45..efe03cac04 100644
--- a/sherpa-onnx/jni/offline-punctuation.cc
+++ b/sherpa-onnx/jni/offline-punctuation.cc
@@ -53,10 +53,12 @@ Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config);
   SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
   auto model = new sherpa_onnx::OfflinePunctuation(
 #if __ANDROID_API__ >= 9
       mgr,
diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc
index 8c1265bba7..7df79f3462 100644
--- a/sherpa-onnx/jni/offline-recognizer.cc
+++ b/sherpa-onnx/jni/offline-recognizer.cc
@@ -174,6 +174,39 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
   ans.model_config.whisper.tail_paddings =
       env->GetIntField(whisper_config, fid);
 
+  // moonshine
+  fid = env->GetFieldID(model_config_cls, "moonshine",
+                        "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;");
+  jobject moonshine_config = env->GetObjectField(model_config, fid);
+  jclass moonshine_config_cls = env->GetObjectClass(moonshine_config);
+
+  fid = env->GetFieldID(moonshine_config_cls, "preprocessor",
+                        "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(moonshine_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model_config.moonshine.preprocessor = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(moonshine_config_cls, "encoder", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(moonshine_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model_config.moonshine.encoder = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(moonshine_config_cls, "uncachedDecoder",
+                        "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(moonshine_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model_config.moonshine.uncached_decoder = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(moonshine_config_cls, "cachedDecoder",
+                        "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(moonshine_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model_config.moonshine.cached_decoder = p;
+  env->ReleaseStringUTFChars(s, p);
+
   // sense voice
   fid = env->GetFieldID(model_config_cls, "senseVoice",
                         "Lcom/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig;");
@@ -233,10 +266,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env,
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetOfflineConfig(env, _config);
   SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
   auto model = new sherpa_onnx::OfflineRecognizer(
 #if __ANDROID_API__ >= 9
       mgr,
diff --git a/sherpa-onnx/jni/offline-speaker-diarization.cc b/sherpa-onnx/jni/offline-speaker-diarization.cc
new file mode 100644
index 0000000000..ba4e14bc3f
--- /dev/null
+++ b/sherpa-onnx/jni/offline-speaker-diarization.cc
@@ -0,0 +1,237 @@
+// sherpa-onnx/jni/offline-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+
+#include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/jni/common.h"
+
+namespace sherpa_onnx {
+
+static OfflineSpeakerDiarizationConfig GetOfflineSpeakerDiarizationConfig(
+    JNIEnv *env, jobject config) {
+  OfflineSpeakerDiarizationConfig ans;
+
+  jclass cls = env->GetObjectClass(config);
+  jfieldID fid;
+
+  //---------- segmentation ----------
+  fid = env->GetFieldID(
+      cls, "segmentation",
+      "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig;");
+  jobject segmentation_config = env->GetObjectField(config, fid);
+  jclass segmentation_config_cls = env->GetObjectClass(segmentation_config);
+
+  fid = env->GetFieldID(
+      segmentation_config_cls, "pyannote",
+      "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig;");
+  jobject pyannote_config = env->GetObjectField(segmentation_config, fid);
+  jclass pyannote_config_cls = env->GetObjectClass(pyannote_config);
+
+  fid = env->GetFieldID(pyannote_config_cls, "model", "Ljava/lang/String;");
+  jstring s = (jstring)env->GetObjectField(pyannote_config, fid);
+  const char *p = env->GetStringUTFChars(s, nullptr);
+  ans.segmentation.pyannote.model = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(segmentation_config_cls, "numThreads", "I");
+  ans.segmentation.num_threads = env->GetIntField(segmentation_config, fid);
+
+  fid = env->GetFieldID(segmentation_config_cls, "debug", "Z");
+  ans.segmentation.debug = env->GetBooleanField(segmentation_config, fid);
+
+  fid = env->GetFieldID(segmentation_config_cls, "provider",
+                        "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(segmentation_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.segmentation.provider = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  //---------- embedding ----------
+  fid = env->GetFieldID(
+      cls, "embedding",
+      "Lcom/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig;");
+  jobject embedding_config = env->GetObjectField(config, fid);
+  jclass embedding_config_cls = env->GetObjectClass(embedding_config);
+
+  fid = env->GetFieldID(embedding_config_cls, "model", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(embedding_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.embedding.model = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(embedding_config_cls, "numThreads", "I");
+  ans.embedding.num_threads = env->GetIntField(embedding_config, fid);
+
+  fid = env->GetFieldID(embedding_config_cls, "debug", "Z");
+  ans.embedding.debug = env->GetBooleanField(embedding_config, fid);
+
+  fid = env->GetFieldID(embedding_config_cls, "provider", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(embedding_config, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.embedding.provider = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  //---------- clustering ----------
+  fid = env->GetFieldID(cls, "clustering",
+                        "Lcom/k2fsa/sherpa/onnx/FastClusteringConfig;");
+  jobject clustering_config = env->GetObjectField(config, fid);
+  jclass clustering_config_cls = env->GetObjectClass(clustering_config);
+
+  fid = env->GetFieldID(clustering_config_cls, "numClusters", "I");
+  ans.clustering.num_clusters = env->GetIntField(clustering_config, fid);
+
+  fid = env->GetFieldID(clustering_config_cls, "threshold", "F");
+  ans.clustering.threshold = env->GetFloatField(clustering_config, fid);
+
+  // its own fields
+  fid = env->GetFieldID(cls, "minDurationOn", "F");
+  ans.min_duration_on = env->GetFloatField(config, fid);
+
+  fid = env->GetFieldID(cls, "minDurationOff", "F");
+  ans.min_duration_off = env->GetFloatField(config, fid);
+
+  return ans;
+}
+
+}  // namespace sherpa_onnx
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT jlong JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromAsset(
+    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
+#if __ANDROID_API__ >= 9
+  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
+  if (!mgr) {
+    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
+  }
+#endif
+
+  auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config);
+  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
+  auto sd = new sherpa_onnx::OfflineSpeakerDiarization(
+#if __ANDROID_API__ >= 9
+      mgr,
+#endif
+      config);
+
+  return (jlong)sd;
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT jlong JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromFile(
+    JNIEnv *env, jobject /*obj*/, jobject _config) {
+  auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config);
+  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
+  if (!config.Validate()) {
+    SHERPA_ONNX_LOGE("Errors found in config!");
+    return 0;
+  }
+
+  auto sd = new sherpa_onnx::OfflineSpeakerDiarization(config);
+
+  return (jlong)sd;
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT void JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_setConfig(
+    JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) {
+  auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config);
+  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
+  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
+  sd->SetConfig(config);
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT void JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_delete(JNIEnv * /*env*/,
+                                                            jobject /*obj*/,
+                                                            jlong ptr) {
+  delete reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
+}
+
+static jobjectArray ProcessImpl(
+    JNIEnv *env,
+    const std::vector<sherpa_onnx::OfflineSpeakerDiarizationSegment>
+        &segments) {
+  jclass cls =
+      env->FindClass("com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment");
+
+  jobjectArray obj_arr =
+      (jobjectArray)env->NewObjectArray(segments.size(), cls, nullptr);
+
+  jmethodID constructor = env->GetMethodID(cls, "<init>", "(FFI)V");
+
+  for (int32_t i = 0; i != segments.size(); ++i) {
+    const auto &s = segments[i];
+    jobject segment =
+        env->NewObject(cls, constructor, s.Start(), s.End(), s.Speaker());
+    env->SetObjectArrayElement(obj_arr, i, segment);
+  }
+
+  return obj_arr;
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT jobjectArray JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_process(
+    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
+  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
+
+  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
+  jsize n = env->GetArrayLength(samples);
+  auto segments = sd->Process(p, n).SortByStartTime();
+  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
+
+  return ProcessImpl(env, segments);
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT jobjectArray JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback(
+    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
+    jobject callback, jlong arg) {
+  std::function<int32_t(int32_t, int32_t, void *)> callback_wrapper =
+      [env, callback](int32_t num_processed_chunks, int32_t num_total_chunks,
+                      void *data) -> int {
+    jclass cls = env->GetObjectClass(callback);
+
+    jmethodID mid = env->GetMethodID(cls, "invoke", "(IIJ)Ljava/lang/Integer;");
+    if (mid == nullptr) {
+      SHERPA_ONNX_LOGE("Failed to get the callback. Ignore it.");
+      return 0;
+    }
+
+    jobject ret = env->CallObjectMethod(callback, mid, num_processed_chunks,
+                                        num_total_chunks, (jlong)data);
+    jclass jklass = env->GetObjectClass(ret);
+    jmethodID int_value_mid = env->GetMethodID(jklass, "intValue", "()I");
+    return env->CallIntMethod(ret, int_value_mid);
+  };
+
+  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
+
+  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
+  jsize n = env->GetArrayLength(samples);
+  auto segments =
+      sd->Process(p, n, callback_wrapper, reinterpret_cast<void *>(arg))
+          .SortByStartTime();
+  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
+
+  return ProcessImpl(env, segments);
+}
+
+SHERPA_ONNX_EXTERN_C
+JNIEXPORT jint JNICALL
+Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_getSampleRate(
+    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
+  return reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr)
+      ->SampleRate();
+}
diff --git a/sherpa-onnx/jni/offline-tts.cc b/sherpa-onnx/jni/offline-tts.cc
index 43a93e0e0b..8d8f902917 100644
--- a/sherpa-onnx/jni/offline-tts.cc
+++ b/sherpa-onnx/jni/offline-tts.cc
@@ -20,6 +20,7 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
   jobject model = env->GetObjectField(config, fid);
   jclass model_config_cls = env->GetObjectClass(model);
 
+  // vits
   fid = env->GetFieldID(model_config_cls, "vits",
                         "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;");
   jobject vits = env->GetObjectField(model, fid);
@@ -64,6 +65,99 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
   fid = env->GetFieldID(vits_cls, "lengthScale", "F");
   ans.model.vits.length_scale = env->GetFloatField(vits, fid);
 
+  // matcha
+  fid = env->GetFieldID(model_config_cls, "matcha",
+                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig;");
+  jobject matcha = env->GetObjectField(model, fid);
+  jclass matcha_cls = env->GetObjectClass(matcha);
+
+  fid = env->GetFieldID(matcha_cls, "acousticModel", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.acoustic_model = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "vocoder", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.vocoder = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "lexicon", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.lexicon = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "tokens", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.tokens = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "dataDir", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.data_dir = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "dictDir", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(matcha, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.matcha.dict_dir = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(matcha_cls, "noiseScale", "F");
+  ans.model.matcha.noise_scale = env->GetFloatField(matcha, fid);
+
+  fid = env->GetFieldID(matcha_cls, "lengthScale", "F");
+  ans.model.matcha.length_scale = env->GetFloatField(matcha, fid);
+
+  // kokoro
+  fid = env->GetFieldID(model_config_cls, "kokoro",
+                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig;");
+  jobject kokoro = env->GetObjectField(model, fid);
+  jclass kokoro_cls = env->GetObjectClass(kokoro);
+
+  fid = env->GetFieldID(kokoro_cls, "model", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.model = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "voices", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.voices = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "tokens", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.tokens = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "lexicon", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.lexicon = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "dataDir", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.data_dir = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "dictDir", "Ljava/lang/String;");
+  s = (jstring)env->GetObjectField(kokoro, fid);
+  p = env->GetStringUTFChars(s, nullptr);
+  ans.model.kokoro.dict_dir = p;
+  env->ReleaseStringUTFChars(s, p);
+
+  fid = env->GetFieldID(kokoro_cls, "lengthScale", "F");
+  ans.model.kokoro.length_scale = env->GetFloatField(kokoro, fid);
+
   fid = env->GetFieldID(model_config_cls, "numThreads", "I");
   ans.model.num_threads = env->GetIntField(model, fid);
 
@@ -105,6 +199,7 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
@@ -158,7 +253,6 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/,
                                                    jlong ptr, jstring text,
                                                    jint sid, jfloat speed) {
   const char *p_text = env->GetStringUTFChars(text, nullptr);
-  SHERPA_ONNX_LOGE("string is: %s", p_text);
 
   auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
       p_text, sid, speed);
@@ -184,7 +278,6 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
     JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid,
     jfloat speed, jobject callback) {
   const char *p_text = env->GetStringUTFChars(text, nullptr);
-  SHERPA_ONNX_LOGE("string is: %s", p_text);
 
   std::function<int32_t(const float *, int32_t, float)> callback_wrapper =
       [env, callback](const float *samples, int32_t n,
@@ -223,8 +316,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
     return env->CallIntMethod(should_continue, int_value_mid);
   };
 
-  auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
-      p_text, sid, speed, callback_wrapper);
+  auto tts = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr);
+  auto audio = tts->Generate(p_text, sid, speed, callback_wrapper);
 
   jfloatArray samples_arr = env->NewFloatArray(audio.samples.size());
   env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(),
diff --git a/sherpa-onnx/jni/online-recognizer.cc b/sherpa-onnx/jni/online-recognizer.cc
index 1793cf73b8..dbe205c4ea 100644
--- a/sherpa-onnx/jni/online-recognizer.cc
+++ b/sherpa-onnx/jni/online-recognizer.cc
@@ -267,6 +267,7 @@ Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromAsset(JNIEnv *env,
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetConfig(env, _config);
diff --git a/sherpa-onnx/jni/speaker-embedding-extractor.cc b/sherpa-onnx/jni/speaker-embedding-extractor.cc
index b1190bffc9..33d630ee6b 100644
--- a/sherpa-onnx/jni/speaker-embedding-extractor.cc
+++ b/sherpa-onnx/jni/speaker-embedding-extractor.cc
@@ -45,6 +45,7 @@ Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config);
diff --git a/sherpa-onnx/jni/spoken-language-identification.cc b/sherpa-onnx/jni/spoken-language-identification.cc
index 278c6adbf7..fcb6f228aa 100644
--- a/sherpa-onnx/jni/spoken-language-identification.cc
+++ b/sherpa-onnx/jni/spoken-language-identification.cc
@@ -62,6 +62,7 @@ Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
 
diff --git a/sherpa-onnx/jni/voice-activity-detector.cc b/sherpa-onnx/jni/voice-activity-detector.cc
index 319edd09b5..a30423f70b 100644
--- a/sherpa-onnx/jni/voice-activity-detector.cc
+++ b/sherpa-onnx/jni/voice-activity-detector.cc
@@ -71,10 +71,12 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromAsset(
   AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
   if (!mgr) {
     SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
+    return 0;
   }
 #endif
   auto config = sherpa_onnx::GetVadModelConfig(env, _config);
   SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
+
   auto model = new sherpa_onnx::VoiceActivityDetector(
 #if __ANDROID_API__ >= 9
       mgr,
diff --git a/sherpa-onnx/kotlin-api/AudioTagging.kt b/sherpa-onnx/kotlin-api/AudioTagging.kt
index 9b241ee561..beec31e6ad 100644
--- a/sherpa-onnx/kotlin-api/AudioTagging.kt
+++ b/sherpa-onnx/kotlin-api/AudioTagging.kt
@@ -15,8 +15,8 @@ data class AudioTaggingModelConfig(
 )
 
 data class AudioTaggingConfig(
-    var model: AudioTaggingModelConfig,
-    var labels: String,
+    var model: AudioTaggingModelConfig = AudioTaggingModelConfig(),
+    var labels: String = "",
     var topK: Int = 5,
 )
 
diff --git a/sherpa-onnx/kotlin-api/KeywordSpotter.kt b/sherpa-onnx/kotlin-api/KeywordSpotter.kt
index ea21436132..5b3cdbb788 100644
--- a/sherpa-onnx/kotlin-api/KeywordSpotter.kt
+++ b/sherpa-onnx/kotlin-api/KeywordSpotter.kt
@@ -5,7 +5,7 @@ import android.content.res.AssetManager
 
 data class KeywordSpotterConfig(
     var featConfig: FeatureConfig = FeatureConfig(),
-    var modelConfig: OnlineModelConfig,
+    var modelConfig: OnlineModelConfig = OnlineModelConfig(),
     var maxActivePaths: Int = 4,
     var keywordsFile: String = "keywords.txt",
     var keywordsScore: Float = 1.5f,
@@ -49,6 +49,7 @@ class KeywordSpotter(
     }
 
     fun decode(stream: OnlineStream) = decode(ptr, stream.ptr)
+    fun reset(stream: OnlineStream) = reset(ptr, stream.ptr)
     fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr)
     fun getResult(stream: OnlineStream): KeywordSpotterResult {
         val objArray = getResult(ptr, stream.ptr)
@@ -74,6 +75,7 @@ class KeywordSpotter(
     private external fun createStream(ptr: Long, keywords: String): Long
     private external fun isReady(ptr: Long, streamPtr: Long): Boolean
     private external fun decode(ptr: Long, streamPtr: Long)
+    private external fun reset(ptr: Long, streamPtr: Long)
     private external fun getResult(ptr: Long, streamPtr: Long): Array<Any>
 
     companion object {
diff --git a/sherpa-onnx/kotlin-api/OfflinePunctuation.kt b/sherpa-onnx/kotlin-api/OfflinePunctuation.kt
index 8dfcbd63c1..3b6013495d 100644
--- a/sherpa-onnx/kotlin-api/OfflinePunctuation.kt
+++ b/sherpa-onnx/kotlin-api/OfflinePunctuation.kt
@@ -3,7 +3,7 @@ package com.k2fsa.sherpa.onnx
 import android.content.res.AssetManager
 
 data class OfflinePunctuationModelConfig(
-    var ctTransformer: String,
+    var ctTransformer: String = "",
     var numThreads: Int = 1,
     var debug: Boolean = false,
     var provider: String = "cpu",
diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
index 203278cb7a..3f32932348 100644
--- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
+++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
@@ -33,6 +33,13 @@ data class OfflineWhisperModelConfig(
     var tailPaddings: Int = 1000, // Padding added at the end of the samples
 )
 
+data class OfflineMoonshineModelConfig(
+    var preprocessor: String = "",
+    var encoder: String = "",
+    var uncachedDecoder: String = "",
+    var cachedDecoder: String = "",
+)
+
 data class OfflineSenseVoiceModelConfig(
     var model: String = "",
     var language: String = "",
@@ -43,6 +50,7 @@ data class OfflineModelConfig(
     var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),
     var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),
     var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),
+    var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(),
     var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(),
     var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
     var teleSpeech: String = "",
@@ -50,14 +58,14 @@ data class OfflineModelConfig(
     var debug: Boolean = false,
     var provider: String = "cpu",
     var modelType: String = "",
-    var tokens: String,
+    var tokens: String = "",
     var modelingUnit: String = "",
     var bpeVocab: String = "",
 )
 
 data class OfflineRecognizerConfig(
     var featConfig: FeatureConfig = FeatureConfig(),
-    var modelConfig: OfflineModelConfig,
+    var modelConfig: OfflineModelConfig = OfflineModelConfig(),
     // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
     var decodingMethod: String = "greedy_search",
     var maxActivePaths: Int = 4,
@@ -394,6 +402,68 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
                 modelType = "transducer",
             )
         }
+
+        19 -> {
+            val modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24"
+            return OfflineModelConfig(
+                nemo = OfflineNemoEncDecCtcModelConfig(
+                    model = "$modelDir/model.int8.onnx",
+                ),
+                tokens = "$modelDir/tokens.txt",
+            )
+        }
+
+        20 -> {
+            val modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24"
+            return OfflineModelConfig(
+                transducer = OfflineTransducerModelConfig(
+                    encoder = "$modelDir/encoder.int8.onnx",
+                    decoder = "$modelDir/decoder.onnx",
+                    joiner = "$modelDir/joiner.onnx",
+                ),
+                tokens = "$modelDir/tokens.txt",
+                modelType = "nemo_transducer",
+            )
+        }
+
+        21 -> {
+            val modelDir = "sherpa-onnx-moonshine-tiny-en-int8"
+            return OfflineModelConfig(
+                moonshine = OfflineMoonshineModelConfig(
+                    preprocessor = "$modelDir/preprocess.onnx",
+                    encoder = "$modelDir/encode.int8.onnx",
+                    uncachedDecoder = "$modelDir/uncached_decode.int8.onnx",
+                    cachedDecoder = "$modelDir/cached_decode.int8.onnx",
+                ),
+                tokens = "$modelDir/tokens.txt",
+            )
+        }
+
+        22 -> {
+            val modelDir = "sherpa-onnx-moonshine-base-en-int8"
+            return OfflineModelConfig(
+                moonshine = OfflineMoonshineModelConfig(
+                    preprocessor = "$modelDir/preprocess.onnx",
+                    encoder = "$modelDir/encode.int8.onnx",
+                    uncachedDecoder = "$modelDir/uncached_decode.int8.onnx",
+                    cachedDecoder = "$modelDir/cached_decode.int8.onnx",
+                ),
+                tokens = "$modelDir/tokens.txt",
+            )
+        }
+
+        23 -> {
+            val modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22"
+            return OfflineModelConfig(
+                transducer = OfflineTransducerModelConfig(
+                    encoder = "$modelDir/encoder-epoch-34-avg-19.int8.onnx",
+                    decoder = "$modelDir/decoder-epoch-34-avg-19.onnx",
+                    joiner = "$modelDir/joiner-epoch-34-avg-19.int8.onnx",
+                ),
+                tokens = "$modelDir/tokens.txt",
+                modelType = "transducer",
+            )
+        }
     }
     return null
 }
diff --git a/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt b/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
new file mode 100644
index 0000000000..55113e833f
--- /dev/null
+++ b/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
@@ -0,0 +1,104 @@
+package com.k2fsa.sherpa.onnx
+
+import android.content.res.AssetManager
+
+data class OfflineSpeakerSegmentationPyannoteModelConfig(
+    var model: String = "",
+)
+
+data class OfflineSpeakerSegmentationModelConfig(
+    var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = OfflineSpeakerSegmentationPyannoteModelConfig(),
+    var numThreads: Int = 1,
+    var debug: Boolean = false,
+    var provider: String = "cpu",
+)
+
+data class FastClusteringConfig(
+    var numClusters: Int = -1,
+    var threshold: Float = 0.5f,
+)
+
+data class OfflineSpeakerDiarizationConfig(
+    var segmentation: OfflineSpeakerSegmentationModelConfig = OfflineSpeakerSegmentationModelConfig(),
+    var embedding: SpeakerEmbeddingExtractorConfig = SpeakerEmbeddingExtractorConfig(),
+    var clustering: FastClusteringConfig = FastClusteringConfig(),
+    var minDurationOn: Float = 0.2f,
+    var minDurationOff: Float = 0.5f,
+)
+
+data class OfflineSpeakerDiarizationSegment(
+    val start: Float, // in seconds
+    val end: Float, // in seconds
+    val speaker: Int, // ID of the speaker; count from 0
+)
+
+class OfflineSpeakerDiarization(
+    assetManager: AssetManager? = null,
+    val config: OfflineSpeakerDiarizationConfig,
+) {
+    private var ptr: Long
+
+    init {
+        ptr = if (assetManager != null) {
+            newFromAsset(assetManager, config)
+        } else {
+            newFromFile(config)
+        }
+    }
+
+    protected fun finalize() {
+        if (ptr != 0L) {
+            delete(ptr)
+            ptr = 0
+        }
+    }
+
+    fun release() = finalize()
+
+    // Only config.clustering is used. All other fields in config
+    // are ignored
+    fun setConfig(config: OfflineSpeakerDiarizationConfig) = setConfig(ptr, config)
+
+    fun sampleRate() = getSampleRate(ptr)
+
+    fun process(samples: FloatArray) = process(ptr, samples)
+
+    fun processWithCallback(
+        samples: FloatArray,
+        callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
+        arg: Long = 0,
+    ) = processWithCallback(ptr, samples, callback, arg)
+
+    private external fun delete(ptr: Long)
+
+    private external fun newFromAsset(
+        assetManager: AssetManager,
+        config: OfflineSpeakerDiarizationConfig,
+    ): Long
+
+    private external fun newFromFile(
+        config: OfflineSpeakerDiarizationConfig,
+    ): Long
+
+    private external fun setConfig(ptr: Long, config: OfflineSpeakerDiarizationConfig)
+
+    private external fun getSampleRate(ptr: Long): Int
+
+    private external fun process(
+        ptr: Long,
+        samples: FloatArray
+    ): Array<OfflineSpeakerDiarizationSegment>
+
+    private external fun processWithCallback(
+        ptr: Long,
+        samples: FloatArray,
+        callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
+        arg: Long,
+    ): Array<OfflineSpeakerDiarizationSegment>
+
+    companion object {
+        init {
+            System.loadLibrary("sherpa-onnx-jni")
+        }
+    }
+}
diff --git a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt
index 7ddefdf32d..15476f995e 100644
--- a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt
+++ b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt
@@ -38,7 +38,7 @@ data class OnlineModelConfig(
     var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
     var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),
     var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(),
-    var tokens: String,
+    var tokens: String = "",
     var numThreads: Int = 1,
     var debug: Boolean = false,
     var provider: String = "cpu",
@@ -60,7 +60,7 @@ data class OnlineCtcFstDecoderConfig(
 
 data class OnlineRecognizerConfig(
     var featConfig: FeatureConfig = FeatureConfig(),
-    var modelConfig: OnlineModelConfig,
+    var modelConfig: OnlineModelConfig = OnlineModelConfig(),
     var lmConfig: OnlineLMConfig = OnlineLMConfig(),
     var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(),
     var endpointConfig: EndpointConfig = EndpointConfig(),
diff --git a/sherpa-onnx/kotlin-api/Speaker.kt b/sherpa-onnx/kotlin-api/Speaker.kt
index 93b1b9e4e7..e148bba698 100644
--- a/sherpa-onnx/kotlin-api/Speaker.kt
+++ b/sherpa-onnx/kotlin-api/Speaker.kt
@@ -3,13 +3,6 @@ package com.k2fsa.sherpa.onnx
 import android.content.res.AssetManager
 import android.util.Log
 
-data class SpeakerEmbeddingExtractorConfig(
-    val model: String,
-    var numThreads: Int = 1,
-    var debug: Boolean = false,
-    var provider: String = "cpu",
-)
-
 class SpeakerEmbeddingExtractor(
     assetManager: AssetManager? = null,
     config: SpeakerEmbeddingExtractorConfig,
diff --git a/sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt b/sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
new file mode 100644
index 0000000000..0ae2bb9b1b
--- /dev/null
+++ b/sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
@@ -0,0 +1,8 @@
+package com.k2fsa.sherpa.onnx
+
+data class SpeakerEmbeddingExtractorConfig(
+    val model: String = "",
+    var numThreads: Int = 1,
+    var debug: Boolean = false,
+    var provider: String = "cpu",
+)
diff --git a/sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt b/sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
index 00caca2812..07ddafc4d4 100644
--- a/sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
+++ b/sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
@@ -3,13 +3,13 @@ package com.k2fsa.sherpa.onnx
 import android.content.res.AssetManager
 
 data class SpokenLanguageIdentificationWhisperConfig(
-    var encoder: String,
-    var decoder: String,
+    var encoder: String = "",
+    var decoder: String = "",
     var tailPaddings: Int = -1,
 )
 
 data class SpokenLanguageIdentificationConfig(
-    var whisper: SpokenLanguageIdentificationWhisperConfig,
+    var whisper: SpokenLanguageIdentificationWhisperConfig = SpokenLanguageIdentificationWhisperConfig(),
     var numThreads: Int = 1,
     var debug: Boolean = false,
     var provider: String = "cpu",
diff --git a/sherpa-onnx/kotlin-api/Tts.kt b/sherpa-onnx/kotlin-api/Tts.kt
new file mode 100644
index 0000000000..c68610653e
--- /dev/null
+++ b/sherpa-onnx/kotlin-api/Tts.kt
@@ -0,0 +1,282 @@
+// Copyright (c)  2023  Xiaomi Corporation
+package com.k2fsa.sherpa.onnx
+
+import android.content.res.AssetManager
+
+data class OfflineTtsVitsModelConfig(
+    var model: String = "",
+    var lexicon: String = "",
+    var tokens: String = "",
+    var dataDir: String = "",
+    var dictDir: String = "",
+    var noiseScale: Float = 0.667f,
+    var noiseScaleW: Float = 0.8f,
+    var lengthScale: Float = 1.0f,
+)
+
+data class OfflineTtsMatchaModelConfig(
+    var acousticModel: String = "",
+    var vocoder: String = "",
+    var lexicon: String = "",
+    var tokens: String = "",
+    var dataDir: String = "",
+    var dictDir: String = "",
+    var noiseScale: Float = 1.0f,
+    var lengthScale: Float = 1.0f,
+)
+
+data class OfflineTtsKokoroModelConfig(
+    var model: String = "",
+    var voices: String = "",
+    var tokens: String = "",
+    var dataDir: String = "",
+    var lexicon: String = "",
+    var dictDir: String = "",
+    var lengthScale: Float = 1.0f,
+)
+
+data class OfflineTtsModelConfig(
+    var vits: OfflineTtsVitsModelConfig = OfflineTtsVitsModelConfig(),
+    var matcha: OfflineTtsMatchaModelConfig = OfflineTtsMatchaModelConfig(),
+    var kokoro: OfflineTtsKokoroModelConfig = OfflineTtsKokoroModelConfig(),
+    var numThreads: Int = 1,
+    var debug: Boolean = false,
+    var provider: String = "cpu",
+)
+
+data class OfflineTtsConfig(
+    var model: OfflineTtsModelConfig = OfflineTtsModelConfig(),
+    var ruleFsts: String = "",
+    var ruleFars: String = "",
+    var maxNumSentences: Int = 1,
+)
+
+class GeneratedAudio(
+    val samples: FloatArray,
+    val sampleRate: Int,
+) {
+    fun save(filename: String) =
+        saveImpl(filename = filename, samples = samples, sampleRate = sampleRate)
+
+    private external fun saveImpl(
+        filename: String,
+        samples: FloatArray,
+        sampleRate: Int
+    ): Boolean
+}
+
+class OfflineTts(
+    assetManager: AssetManager? = null,
+    var config: OfflineTtsConfig,
+) {
+    private var ptr: Long
+
+    init {
+        ptr = if (assetManager != null) {
+            newFromAsset(assetManager, config)
+        } else {
+            newFromFile(config)
+        }
+    }
+
+    fun sampleRate() = getSampleRate(ptr)
+
+    fun numSpeakers() = getNumSpeakers(ptr)
+
+    fun generate(
+        text: String,
+        sid: Int = 0,
+        speed: Float = 1.0f
+    ): GeneratedAudio {
+        val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
+        return GeneratedAudio(
+            samples = objArray[0] as FloatArray,
+            sampleRate = objArray[1] as Int
+        )
+    }
+
+    fun generateWithCallback(
+        text: String,
+        sid: Int = 0,
+        speed: Float = 1.0f,
+        callback: (samples: FloatArray) -> Int
+    ): GeneratedAudio {
+        val objArray = generateWithCallbackImpl(
+            ptr,
+            text = text,
+            sid = sid,
+            speed = speed,
+            callback = callback
+        )
+        return GeneratedAudio(
+            samples = objArray[0] as FloatArray,
+            sampleRate = objArray[1] as Int
+        )
+    }
+
+    fun allocate(assetManager: AssetManager? = null) {
+        if (ptr == 0L) {
+            ptr = if (assetManager != null) {
+                newFromAsset(assetManager, config)
+            } else {
+                newFromFile(config)
+            }
+        }
+    }
+
+    fun free() {
+        if (ptr != 0L) {
+            delete(ptr)
+            ptr = 0
+        }
+    }
+
+    protected fun finalize() {
+        if (ptr != 0L) {
+            delete(ptr)
+            ptr = 0
+        }
+    }
+
+    fun release() = finalize()
+
+    private external fun newFromAsset(
+        assetManager: AssetManager,
+        config: OfflineTtsConfig,
+    ): Long
+
+    private external fun newFromFile(
+        config: OfflineTtsConfig,
+    ): Long
+
+    private external fun delete(ptr: Long)
+    private external fun getSampleRate(ptr: Long): Int
+    private external fun getNumSpeakers(ptr: Long): Int
+
+    // The returned array has two entries:
+    //  - the first entry is an 1-D float array containing audio samples.
+    //    Each sample is normalized to the range [-1, 1]
+    //  - the second entry is the sample rate
+    private external fun generateImpl(
+        ptr: Long,
+        text: String,
+        sid: Int = 0,
+        speed: Float = 1.0f
+    ): Array<Any>
+
+    private external fun generateWithCallbackImpl(
+        ptr: Long,
+        text: String,
+        sid: Int = 0,
+        speed: Float = 1.0f,
+        callback: (samples: FloatArray) -> Int
+    ): Array<Any>
+
+    companion object {
+        init {
+            System.loadLibrary("sherpa-onnx-jni")
+        }
+    }
+}
+
+// please refer to
+// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
+// to download models
+fun getOfflineTtsConfig(
+    modelDir: String,
+    modelName: String, // for VITS
+    acousticModelName: String, // for Matcha
+    vocoder: String, // for Matcha
+    voices: String, // for Kokoro
+    lexicon: String,
+    dataDir: String,
+    dictDir: String,
+    ruleFsts: String,
+    ruleFars: String,
+    numThreads: Int? = null
+): OfflineTtsConfig {
+    // For Matcha TTS, please set
+    // acousticModelName, vocoder
+
+    // For Kokoro TTS, please set
+    // modelName, voices
+
+    // For VITS, please set
+    // modelName
+
+    val numberOfThreads = if (numThreads != null) {
+        numThreads
+    } else if (voices.isNotEmpty()) {
+        // for Kokoro TTS models, we use more threads
+        4
+    } else {
+        2
+    }
+
+    if (modelName.isEmpty() && acousticModelName.isEmpty()) {
+        throw IllegalArgumentException("Please specify a TTS model")
+    }
+
+    if (modelName.isNotEmpty() && acousticModelName.isNotEmpty()) {
+        throw IllegalArgumentException("Please specify either a VITS or a Matcha model, but not both")
+    }
+
+    if (acousticModelName.isNotEmpty() && vocoder.isEmpty()) {
+        throw IllegalArgumentException("Please provide vocoder for Matcha TTS")
+    }
+
+    val vits = if (modelName.isNotEmpty() && voices.isEmpty()) {
+        OfflineTtsVitsModelConfig(
+            model = "$modelDir/$modelName",
+            lexicon = "$modelDir/$lexicon",
+            tokens = "$modelDir/tokens.txt",
+            dataDir = dataDir,
+            dictDir = dictDir,
+        )
+    } else {
+        OfflineTtsVitsModelConfig()
+    }
+
+    val matcha = if (acousticModelName.isNotEmpty()) {
+        OfflineTtsMatchaModelConfig(
+            acousticModel = "$modelDir/$acousticModelName",
+            vocoder = vocoder,
+            lexicon = "$modelDir/$lexicon",
+            tokens = "$modelDir/tokens.txt",
+            dictDir = dictDir,
+            dataDir = dataDir,
+        )
+    } else {
+        OfflineTtsMatchaModelConfig()
+    }
+
+    val kokoro = if (voices.isNotEmpty()) {
+        OfflineTtsKokoroModelConfig(
+            model = "$modelDir/$modelName",
+            voices = "$modelDir/$voices",
+            tokens = "$modelDir/tokens.txt",
+            dataDir = dataDir,
+            lexicon = when {
+                lexicon == "" -> lexicon
+                "," in lexicon -> lexicon
+                else -> "$modelDir/$lexicon"
+            },
+            dictDir = dictDir,
+        )
+    } else {
+        OfflineTtsKokoroModelConfig()
+    }
+
+    return OfflineTtsConfig(
+        model = OfflineTtsModelConfig(
+            vits = vits,
+            matcha = matcha,
+            kokoro = kokoro,
+            numThreads = numberOfThreads,
+            debug = true,
+            provider = "cpu",
+        ),
+        ruleFsts = ruleFsts,
+        ruleFars = ruleFars,
+    )
+}
diff --git a/sherpa-onnx/kotlin-api/Vad.kt b/sherpa-onnx/kotlin-api/Vad.kt
index 08a4585050..cdecc3ca28 100644
--- a/sherpa-onnx/kotlin-api/Vad.kt
+++ b/sherpa-onnx/kotlin-api/Vad.kt
@@ -4,7 +4,7 @@ package com.k2fsa.sherpa.onnx
 import android.content.res.AssetManager
 
 data class SileroVadModelConfig(
-    var model: String,
+    var model: String = "",
     var threshold: Float = 0.5F,
     var minSilenceDuration: Float = 0.25F,
     var minSpeechDuration: Float = 0.25F,
@@ -13,7 +13,7 @@ data class SileroVadModelConfig(
 )
 
 data class VadModelConfig(
-    var sileroVadModelConfig: SileroVadModelConfig,
+    var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(),
     var sampleRate: Int = 16000,
     var numThreads: Int = 1,
     var provider: String = "cpu",
@@ -112,5 +112,5 @@ fun getVadModelConfig(type: Int): VadModelConfig? {
             )
         }
     }
-    return null;
+    return null
 }
diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas
index 7f05793e12..9163a63cde 100644
--- a/sherpa-onnx/pascal-api/sherpa_onnx.pas
+++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas
@@ -62,11 +62,40 @@   TSherpaOnnxOfflineTtsVitsModelConfig = record
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig);
   end;
 
+  TSherpaOnnxOfflineTtsMatchaModelConfig = record
+    AcousticModel: AnsiString;
+    Vocoder: AnsiString;
+    Lexicon: AnsiString;
+    Tokens: AnsiString;
+    DataDir: AnsiString;
+    NoiseScale: Single;
+    LengthScale: Single;
+    DictDir: AnsiString;
+
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
+  end;
+
+  TSherpaOnnxOfflineTtsKokoroModelConfig = record
+    Model: AnsiString;
+    Voices: AnsiString;
+    Tokens: AnsiString;
+    DataDir: AnsiString;
+    LengthScale: Single;
+    DictDir: AnsiString;
+    Lexicon: AnsiString;
+
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig);
+  end;
+
   TSherpaOnnxOfflineTtsModelConfig = record
     Vits: TSherpaOnnxOfflineTtsVitsModelConfig;
     NumThreads: Integer;
     Debug: Boolean;
     Provider: AnsiString;
+    Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig;
+    Kokoro: TSherpaOnnxOfflineTtsKokoroModelConfig;
 
     function ToString: AnsiString;
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig);
@@ -102,7 +131,7 @@   TSherpaOnnxOfflineTts = class
 
     function Generate(Text: AnsiString; SpeakerId: Integer;
       Speed: Single;
-      Callback:PSherpaOnnxGeneratedAudioCallbackWithArg;
+      Callback: PSherpaOnnxGeneratedAudioCallbackWithArg;
       Arg: Pointer
       ): TSherpaOnnxGeneratedAudio; overload;
 
@@ -250,6 +279,14 @@   TSherpaOnnxOfflineWhisperModelConfig = record
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
   end;
 
+  TSherpaOnnxOfflineMoonshineModelConfig = record
+    Preprocessor: AnsiString;
+    Encoder: AnsiString;
+    UncachedDecoder: AnsiString;
+    CachedDecoder: AnsiString;
+    function ToString: AnsiString;
+  end;
+
   TSherpaOnnxOfflineTdnnModelConfig = record
     Model: AnsiString;
     function ToString: AnsiString;
@@ -285,6 +322,7 @@   TSherpaOnnxOfflineModelConfig = record
     BpeVocab: AnsiString;
     TeleSpeechCtc: AnsiString;
     SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
+    Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
     function ToString: AnsiString;
   end;
@@ -398,6 +436,78 @@   TSherpaOnnxVoiceActivityDetector = class
     property GetHandle: Pointer Read Handle;
   end;
 
+
+  TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record
+    Model: AnsiString;
+    function ToString: AnsiString;
+  end;
+
+  TSherpaOnnxOfflineSpeakerSegmentationModelConfig = record
+    Pyannote: TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;
+    NumThreads: Integer;
+    Debug: Boolean;
+    Provider: AnsiString;
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig);
+  end;
+
+  TSherpaOnnxFastClusteringConfig = record
+    NumClusters: Integer;
+    Threshold: Single;
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig);
+  end;
+
+  TSherpaOnnxSpeakerEmbeddingExtractorConfig = record
+    Model: AnsiString;
+    NumThreads: Integer;
+    Debug: Boolean;
+    Provider: AnsiString;
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig);
+  end;
+
+  TSherpaOnnxOfflineSpeakerDiarizationConfig = record
+    Segmentation: TSherpaOnnxOfflineSpeakerSegmentationModelConfig;
+    Embedding: TSherpaOnnxSpeakerEmbeddingExtractorConfig;
+    Clustering: TSherpaOnnxFastClusteringConfig;
+    MinDurationOn: Single;
+    MinDurationOff: Single;
+    function ToString: AnsiString;
+    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+  end;
+
+  TSherpaOnnxOfflineSpeakerDiarizationSegment = record
+    Start: Single;
+    Stop: Single;
+    Speaker: Integer;
+    function ToString: AnsiString;
+  end;
+
+  TSherpaOnnxOfflineSpeakerDiarizationSegmentArray = array of TSherpaOnnxOfflineSpeakerDiarizationSegment;
+
+  PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = ^TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg;
+
+  TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = function(
+      NumProcessChunks: cint32;
+      NumTotalChunks: cint32): cint32; cdecl;
+
+  TSherpaOnnxOfflineSpeakerDiarization = class
+  private
+    Handle: Pointer;
+    SampleRate: Integer;
+    _Config: TSherpaOnnxOfflineSpeakerDiarizationConfig;
+  public
+    constructor Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+    destructor Destroy; override;
+    procedure SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+    function Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload;
+    function Process(Samples: array of Single; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload;
+    property GetHandle: Pointer Read Handle;
+    property GetSampleRate: Integer Read SampleRate;
+  end;
+
+
   { It supports reading a single channel wave with 16-bit encoded samples.
     Samples are normalized to the range [-1, 1].
   }
@@ -545,6 +655,12 @@   SherpaOnnxOfflineWhisperModelConfig = record
     Task: PAnsiChar;
     TailPaddings: cint32;
   end;
+  SherpaOnnxOfflineMoonshineModelConfig = record
+    Preprocessor: PAnsiChar;
+    Encoder: PAnsiChar;
+    UncachedDecoder: PAnsiChar;
+    CachedDecoder: PAnsiChar;
+  end;
   SherpaOnnxOfflineTdnnModelConfig = record
     Model: PAnsiChar;
   end;
@@ -572,6 +688,7 @@   SherpaOnnxOfflineModelConfig = record
     BpeVocab: PAnsiChar;
     TeleSpeechCtc: PAnsiChar;
     SenseVoice:  SherpaOnnxOfflineSenseVoiceModelConfig;
+    Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
   end;
 
   SherpaOnnxOfflineRecognizerConfig = record
@@ -625,11 +742,34 @@   SherpaOnnxOfflineTtsVitsModelConfig = record
     DictDir: PAnsiChar;
   end;
 
+  SherpaOnnxOfflineTtsMatchaModelConfig = record
+    AcousticModel: PAnsiChar;
+    Vocoder: PAnsiChar;
+    Lexicon: PAnsiChar;
+    Tokens: PAnsiChar;
+    DataDir: PAnsiChar;
+    NoiseScale: cfloat;
+    LengthScale: cfloat;
+    DictDir: PAnsiChar;
+  end;
+
+  SherpaOnnxOfflineTtsKokoroModelConfig = record
+    Model: PAnsiChar;
+    Voices: PAnsiChar;
+    Tokens: PAnsiChar;
+    DataDir: PAnsiChar;
+    LengthScale: cfloat;
+    DictDir: PAnsiChar;
+    Lexicon: PAnsiChar;
+  end;
+
   SherpaOnnxOfflineTtsModelConfig = record
     Vits: SherpaOnnxOfflineTtsVitsModelConfig;
     NumThreads: cint32;
     Debug: cint32;
     Provider: PAnsiChar;
+    Matcha: SherpaOnnxOfflineTtsMatchaModelConfig;
+    Kokoro: SherpaOnnxOfflineTtsKokoroModelConfig;
   end;
 
   SherpaOnnxOfflineTtsConfig = record
@@ -656,6 +796,47 @@   SherpaOnnxResampleOut = record
 
   PSherpaOnnxResampleOut = ^SherpaOnnxResampleOut;
 
+  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record
+    Model: PAnsiChar;
+  end;
+
+  SherpaOnnxOfflineSpeakerSegmentationModelConfig = record
+    Pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;
+    NumThreads: cint32;
+    Debug: cint32;
+    Provider: PAnsiChar;
+  end;
+
+  SherpaOnnxFastClusteringConfig = record
+    NumClusters: cint32;
+    Threshold: cfloat;
+  end;
+
+  SherpaOnnxSpeakerEmbeddingExtractorConfig = record
+    Model: PAnsiChar;
+    NumThreads: cint32;
+    Debug: cint32;
+    Provider: PAnsiChar;
+  end;
+
+  SherpaOnnxOfflineSpeakerDiarizationConfig = record
+    Segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig;
+    Embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig;
+    Clustering: SherpaOnnxFastClusteringConfig;
+    MinDurationOn: cfloat;
+    MinDurationOff: cfloat;
+  end;
+
+  SherpaOnnxOfflineSpeakerDiarizationSegment = record
+    Start: cfloat;
+    Stop: cfloat;
+    Speaker: cint32;
+  end;
+
+  PSherpaOnnxOfflineSpeakerDiarizationSegment = ^SherpaOnnxOfflineSpeakerDiarizationSegment;
+
+  PSherpaOnnxOfflineSpeakerDiarizationConfig = ^SherpaOnnxOfflineSpeakerDiarizationConfig;
+
 function SherpaOnnxCreateLinearResampler(SampleRateInHz: cint32;
   SampleRateOutHz: cint32;
   FilterCutoffHz: cfloat;
@@ -677,6 +858,37 @@ procedure SherpaOnnxLinearResamplerResampleFree(P: PSherpaOnnxResampleOut); cdec
 procedure SherpaOnnxLinearResamplerReset(P: Pointer); cdecl;
   external SherpaOnnxLibName;
 
+function SherpaOnnxCreateOfflineSpeakerDiarization(Config: PSherpaOnnxOfflineSpeakerDiarizationConfig): Pointer; cdecl;
+  external SherpaOnnxLibName;
+
+procedure SherpaOnnxDestroyOfflineSpeakerDiarization(P: Pointer); cdecl;
+  external SherpaOnnxLibName;
+
+function SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(P: Pointer): cint32; cdecl;
+  external SherpaOnnxLibName;
+
+procedure SherpaOnnxOfflineSpeakerDiarizationSetConfig(P: Pointer; Config: PSherpaOnnxOfflineSpeakerDiarizationConfig); cdecl;
+  external SherpaOnnxLibName;
+
+function SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(P: Pointer): cint32; cdecl;
+  external SherpaOnnxLibName;
+
+function SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(P: Pointer): PSherpaOnnxOfflineSpeakerDiarizationSegment; cdecl;
+  external SherpaOnnxLibName;
+
+procedure SherpaOnnxOfflineSpeakerDiarizationDestroySegment(P: Pointer); cdecl;
+  external SherpaOnnxLibName;
+
+function SherpaOnnxOfflineSpeakerDiarizationProcess(P: Pointer; Samples: pcfloat; N: cint32): Pointer; cdecl;
+  external SherpaOnnxLibName;
+
+function SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(P: Pointer;
+  Samples: pcfloat; N: cint32;  Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): Pointer; cdecl;
+  external SherpaOnnxLibName;
+
+procedure SherpaOnnxOfflineSpeakerDiarizationDestroyResult(P: Pointer); cdecl;
+  external SherpaOnnxLibName;
+
 function SherpaOnnxCreateOfflineTts(Config: PSherpaOnnxOfflineTtsConfig): Pointer; cdecl;
   external SherpaOnnxLibName;
 
@@ -1168,6 +1380,16 @@ function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
     [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
 end;
 
+function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' +
+    'Preprocessor := %s, ' +
+    'Encoder := %s, ' +
+    'UncachedDecoder := %s, ' +
+    'CachedDecoder := %s)',
+    [Self.Preprocessor, Self.Encoder, Self.UncachedDecoder, Self.CachedDecoder]);
+end;
+
 function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString;
 begin
   Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)',
@@ -1209,13 +1431,14 @@ function TSherpaOnnxOfflineModelConfig.ToString: AnsiString;
     'ModelingUnit := %s, ' +
     'BpeVocab := %s, ' +
     'TeleSpeechCtc := %s, ' +
-    'SenseVoice := %s' +
+    'SenseVoice := %s, ' +
+    'Moonshine := %s' +
     ')',
     [Self.Transducer.ToString, Self.Paraformer.ToString,
      Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
      Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
      Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
-     Self.TeleSpeechCtc, Self.SenseVoice.ToString
+     Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString
      ]);
 end;
 
@@ -1263,7 +1486,6 @@ constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecogn
 
   C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model);
 
-
   C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
   C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
   C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
@@ -1277,6 +1499,11 @@ constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecogn
   C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language);
   C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn);
 
+  C.ModelConfig.Moonshine.Preprocessor := PAnsiChar(Config.ModelConfig.Moonshine.Preprocessor);
+  C.ModelConfig.Moonshine.Encoder := PAnsiChar(Config.ModelConfig.Moonshine.Encoder);
+  C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
+  C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
+
   C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
   C.LMConfig.Scale := Config.LMConfig.Scale;
 
@@ -1678,15 +1905,61 @@ function TSherpaOnnxOfflineTtsVitsModelConfig.ToString: AnsiString;
   Dest.LengthScale := 1.0;
 end;
 
+function TSherpaOnnxOfflineTtsMatchaModelConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineTtsMatchaModelConfig(' +
+    'AcousticModel := %s, ' +
+    'Vocoder := %s, ' +
+    'Lexicon := %s, ' +
+    'Tokens := %s, ' +
+    'DataDir := %s, ' +
+    'NoiseScale := %.2f, ' +
+    'LengthScale := %.2f, ' +
+    'DictDir := %s' +
+    ')',
+    [Self.AcousticModel, Self.Vocoder, Self.Lexicon, Self.Tokens,
+     Self.DataDir, Self.NoiseScale, Self.LengthScale, Self.DictDir
+    ]);
+end;
+
+class operator TSherpaOnnxOfflineTtsMatchaModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
+begin
+  Dest.NoiseScale := 0.667;
+  Dest.LengthScale := 1.0;
+end;
+
+function TSherpaOnnxOfflineTtsKokoroModelConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineTtsKokoroModelConfig(' +
+    'Model := %s, ' +
+    'Voices := %s, ' +
+    'Tokens := %s, ' +
+    'DataDir := %s, ' +
+    'LengthScale := %.2f, ' +
+    'DictDir := %s, ' +
+    'Lexicon := %s' +
+    ')',
+    [Self.Model, Self.Voices, Self.Tokens, Self.DataDir, Self.LengthScale,
+     Self.DictDir, Self.Lexicon]);
+end;
+
+class operator TSherpaOnnxOfflineTtsKokoroModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig);
+begin
+  Dest.LengthScale := 1.0;
+end;
+
 function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString;
 begin
   Result := Format('TSherpaOnnxOfflineTtsModelConfig(' +
     'Vits := %s, ' +
     'NumThreads := %d, ' +
     'Debug := %s, ' +
-    'Provider := %s' +
+    'Provider := %s, ' +
+    'Matcha := %s, ' +
+    'Kokoro := %s' +
     ')',
-    [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider
+    [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider,
+     Self.Matcha.ToString, Self.Kokoro.ToString
     ]);
 end;
 
@@ -1730,6 +2003,23 @@ constructor TSherpaOnnxOfflineTts.Create(Config: TSherpaOnnxOfflineTtsConfig);
   C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale;
   C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir);
 
+  C.Model.Matcha.AcousticModel := PAnsiChar(Config.Model.Matcha.AcousticModel);
+  C.Model.Matcha.Vocoder := PAnsiChar(Config.Model.Matcha.Vocoder);
+  C.Model.Matcha.Lexicon := PAnsiChar(Config.Model.Matcha.Lexicon);
+  C.Model.Matcha.Tokens := PAnsiChar(Config.Model.Matcha.Tokens);
+  C.Model.Matcha.DataDir := PAnsiChar(Config.Model.Matcha.DataDir);
+  C.Model.Matcha.NoiseScale := Config.Model.Matcha.NoiseScale;
+  C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale;
+  C.Model.Matcha.DictDir := PAnsiChar(Config.Model.Matcha.DictDir);
+
+  C.Model.Kokoro.Model := PAnsiChar(Config.Model.Kokoro.Model);
+  C.Model.Kokoro.Voices := PAnsiChar(Config.Model.Kokoro.Voices);
+  C.Model.Kokoro.Tokens := PAnsiChar(Config.Model.Kokoro.Tokens);
+  C.Model.Kokoro.DataDir := PAnsiChar(Config.Model.Kokoro.DataDir);
+  C.Model.Kokoro.LengthScale := Config.Model.Kokoro.LengthScale;
+  C.Model.Kokoro.DictDir := PAnsiChar(Config.Model.Kokoro.DictDir);
+  C.Model.Kokoro.Lexicon := PAnsiChar(Config.Model.Kokoro.Lexicon);
+
   C.Model.NumThreads := Config.Model.NumThreads;
   C.Model.Provider := PAnsiChar(Config.Model.Provider);
   C.Model.Debug := Ord(Config.Model.Debug);
@@ -1773,7 +2063,7 @@ function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer;
 
 function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer;
   Speed: Single;
-  Callback:PSherpaOnnxGeneratedAudioCallbackWithArg;
+  Callback: PSherpaOnnxGeneratedAudioCallbackWithArg;
   Arg: Pointer
   ): TSherpaOnnxGeneratedAudio;
 var
@@ -1847,4 +2137,195 @@ procedure TSherpaOnnxLinearResampler.Reset;
   SherpaOnnxLinearResamplerReset(Self.Handle);
 end;
 
+function TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' +
+    'Model := %s)',[Self.Model]);
+end;
+
+function TSherpaOnnxOfflineSpeakerSegmentationModelConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' +
+    'Pyannote := %s, ' +
+    'NumThreads := %d, ' +
+    'Debug := %s, ' +
+    'Provider := %s)',
+    [Self.Pyannote.ToString, Self.NumThreads,
+     Self.Debug.ToString, Self.Provider]);
+end;
+
+class operator TSherpaOnnxOfflineSpeakerSegmentationModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig);
+begin
+  Dest.NumThreads := 1;
+  Dest.Debug := False;
+  Dest.Provider := 'cpu';
+end;
+
+function TSherpaOnnxFastClusteringConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxFastClusteringConfig(' +
+    'NumClusters := %d, Threshold := %.3f)',
+    [Self.NumClusters, Self.Threshold]);
+end;
+
+class operator TSherpaOnnxFastClusteringConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig);
+begin
+  Dest.NumClusters := -1;
+  Dest.Threshold := 0.5;
+end;
+
+function TSherpaOnnxSpeakerEmbeddingExtractorConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxSpeakerEmbeddingExtractorConfig(' +
+    'Model := %s, '+
+    'NumThreads := %d, '+
+    'Debug := %s, '+
+    'Provider := %s)',
+    [Self.Model, Self.NumThreads, Self.Debug.ToString, Self.Provider]);
+end;
+
+class operator TSherpaOnnxSpeakerEmbeddingExtractorConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig);
+begin
+  Dest.NumThreads := 1;
+  Dest.Debug := False;
+  Dest.Provider := 'cpu';
+end;
+
+function TSherpaOnnxOfflineSpeakerDiarizationConfig.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineSpeakerDiarizationConfig(' +
+    'Segmentation := %s, '+
+    'Embedding := %s, '+
+    'Clustering := %s, '+
+    'MinDurationOn := %.3f, '+
+    'MinDurationOff := %.3f)',
+    [Self.Segmentation.ToString, Self.Embedding.ToString,
+     Self.Clustering.ToString, Self.MinDurationOn, Self.MinDurationOff]);
+end;
+
+class operator TSherpaOnnxOfflineSpeakerDiarizationConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+begin
+  Dest.MinDurationOn := 0.2;
+  Dest.MinDurationOff := 0.5;
+end;
+
+function TSherpaOnnxOfflineSpeakerDiarizationSegment.ToString: AnsiString;
+begin
+  Result := Format('TSherpaOnnxOfflineSpeakerDiarizationSegment(' +
+    'Start := %.3f, '+
+    'Stop := %.3f, '+
+    'Speaker := %d)',
+    [Self.Start, Self.Stop, Self.Speaker]);
+end;
+
+constructor TSherpaOnnxOfflineSpeakerDiarization.Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+var
+  C: SherpaOnnxOfflineSpeakerDiarizationConfig;
+begin
+  C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig);
+  C.Segmentation.Pyannote.Model := PAnsiChar(Config.Segmentation.Pyannote.Model);
+  C.Segmentation.NumThreads := Config.Segmentation.NumThreads;
+  C.Segmentation.Debug := Ord(Config.Segmentation.Debug);
+  C.Segmentation.Provider := PAnsiChar(Config.Segmentation.Provider);
+
+  C.Embedding.Model := PAnsiChar(Config.Embedding.Model);
+  C.Embedding.NumThreads := Config.Embedding.NumThreads;
+  C.Embedding.Debug := Ord(Config.Embedding.Debug);
+  C.Embedding.Provider := PAnsiChar(Config.Embedding.Provider);
+
+  C.Clustering.NumClusters := Config.Clustering.NumClusters;
+  C.Clustering.Threshold := Config.Clustering.Threshold;
+
+  C.MinDurationOn := Config.MinDurationOn;
+  C.MinDurationOff := Config.MinDurationOff;
+
+  Self.Handle := SherpaOnnxCreateOfflineSpeakerDiarization(@C);
+  Self._Config := Config;
+  Self.SampleRate :=  0;
+
+  if Self.Handle <> nil then
+    begin
+      Self.SampleRate := SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(Self.Handle);
+    end;
+end;
+
+destructor TSherpaOnnxOfflineSpeakerDiarization.Destroy;
+begin
+  SherpaOnnxDestroyOfflineSpeakerDiarization(Self.Handle);
+  Self.Handle := nil;
+end;
+
+procedure TSherpaOnnxOfflineSpeakerDiarization.SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
+var
+  C: SherpaOnnxOfflineSpeakerDiarizationConfig;
+begin
+  C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig);
+
+  C.Clustering.NumClusters := Config.Clustering.NumClusters;
+  C.Clustering.Threshold := Config.Clustering.Threshold;
+
+  SherpaOnnxOfflineSpeakerDiarizationSetConfig(Self.Handle, @C);
+end;
+
+function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
+var
+  R: Pointer;
+  NumSegments: Integer;
+  I: Integer;
+  Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment;
+begin
+  Result := nil;
+
+  R := SherpaOnnxOfflineSpeakerDiarizationProcess(Self.Handle, pcfloat(Samples), Length(Samples));
+  if R = nil then
+    begin
+      Exit
+    end;
+  NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R);
+
+  Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R);
+
+  SetLength(Result, NumSegments);
+  for I := Low(Result) to High(Result) do
+    begin
+      Result[I].Start := Segments[I].Start;
+      Result[I].Stop := Segments[I].Stop;
+      Result[I].Speaker := Segments[I].Speaker;
+    end;
+
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments);
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R);
+end;
+
+function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single;
+  callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
+var
+  R: Pointer;
+  NumSegments: Integer;
+  I: Integer;
+  Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment;
+begin
+  Result := nil;
+
+  R := SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(Self.Handle, pcfloat(Samples), Length(Samples), callback);
+  if R = nil then
+    begin
+      Exit
+    end;
+  NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R);
+
+  Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R);
+
+  SetLength(Result, NumSegments);
+  for I := Low(Result) to High(Result) do
+    begin
+      Result[I].Start := Segments[I].Start;
+      Result[I].Stop := Segments[I].Stop;
+      Result[I].Speaker := Segments[I].Speaker;
+    end;
+
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments);
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R);
+end;
+
 end.
diff --git a/sherpa-onnx/python/csrc/CMakeLists.txt b/sherpa-onnx/python/csrc/CMakeLists.txt
index 7fd5efa331..a4c15713c2 100644
--- a/sherpa-onnx/python/csrc/CMakeLists.txt
+++ b/sherpa-onnx/python/csrc/CMakeLists.txt
@@ -11,6 +11,7 @@ set(srcs
   offline-ctc-fst-decoder-config.cc
   offline-lm-config.cc
   offline-model-config.cc
+  offline-moonshine-model-config.cc
   offline-nemo-enc-dec-ctc-model-config.cc
   offline-paraformer-model-config.cc
   offline-punctuation.cc
@@ -53,6 +54,8 @@ endif()
 
 if(SHERPA_ONNX_ENABLE_TTS)
   list(APPEND srcs
+    offline-tts-kokoro-model-config.cc
+    offline-tts-matcha-model-config.cc
     offline-tts-model-config.cc
     offline-tts-vits-model-config.cc
     offline-tts.cc
@@ -62,6 +65,8 @@ endif()
 if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
   list(APPEND srcs
     fast-clustering.cc
+    offline-speaker-diarization-result.cc
+    offline-speaker-diarization.cc
   )
 endif()
 
diff --git a/sherpa-onnx/python/csrc/features.cc b/sherpa-onnx/python/csrc/features.cc
index 4f179999ac..63c0143c68 100644
--- a/sherpa-onnx/python/csrc/features.cc
+++ b/sherpa-onnx/python/csrc/features.cc
@@ -19,7 +19,7 @@ static void PybindFeatureExtractorConfig(py::module *m) {
       .def_readwrite("feature_dim", &PyClass::feature_dim)
       .def_readwrite("low_freq", &PyClass::low_freq)
       .def_readwrite("high_freq", &PyClass::high_freq)
-      .def_readwrite("dither", &PyClass::high_freq)
+      .def_readwrite("dither", &PyClass::dither)
       .def("__str__", &PyClass::ToString);
 }
 
diff --git a/sherpa-onnx/python/csrc/keyword-spotter.cc b/sherpa-onnx/python/csrc/keyword-spotter.cc
index 144992605d..4a48ada4f1 100644
--- a/sherpa-onnx/python/csrc/keyword-spotter.cc
+++ b/sherpa-onnx/python/csrc/keyword-spotter.cc
@@ -67,6 +67,7 @@ void PybindKeywordSpotter(py::module *m) {
           py::arg("keywords"), py::call_guard<py::gil_scoped_release>())
       .def("is_ready", &PyClass::IsReady,
            py::call_guard<py::gil_scoped_release>())
+      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
       .def("decode_stream", &PyClass::DecodeStream,
            py::call_guard<py::gil_scoped_release>())
       .def(
diff --git a/sherpa-onnx/python/csrc/offline-model-config.cc b/sherpa-onnx/python/csrc/offline-model-config.cc
index f498bd7e2a..d999486bc4 100644
--- a/sherpa-onnx/python/csrc/offline-model-config.cc
+++ b/sherpa-onnx/python/csrc/offline-model-config.cc
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "sherpa-onnx/csrc/offline-model-config.h"
+#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-paraformer-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-sense-voice-model-config.h"
@@ -28,6 +29,7 @@ void PybindOfflineModelConfig(py::module *m) {
   PybindOfflineZipformerCtcModelConfig(m);
   PybindOfflineWenetCtcModelConfig(m);
   PybindOfflineSenseVoiceModelConfig(m);
+  PybindOfflineMoonshineModelConfig(m);
 
   using PyClass = OfflineModelConfig;
   py::class_<PyClass>(*m, "OfflineModelConfig")
@@ -39,7 +41,8 @@ void PybindOfflineModelConfig(py::module *m) {
               const OfflineWhisperModelConfig &, const OfflineTdnnModelConfig &,
               const OfflineZipformerCtcModelConfig &,
               const OfflineWenetCtcModelConfig &,
-              const OfflineSenseVoiceModelConfig &, const std::string &,
+              const OfflineSenseVoiceModelConfig &,
+              const OfflineMoonshineModelConfig &, const std::string &,
               const std::string &, int32_t, bool, const std::string &,
               const std::string &, const std::string &, const std::string &>(),
           py::arg("transducer") = OfflineTransducerModelConfig(),
@@ -50,6 +53,7 @@ void PybindOfflineModelConfig(py::module *m) {
           py::arg("zipformer_ctc") = OfflineZipformerCtcModelConfig(),
           py::arg("wenet_ctc") = OfflineWenetCtcModelConfig(),
           py::arg("sense_voice") = OfflineSenseVoiceModelConfig(),
+          py::arg("moonshine") = OfflineMoonshineModelConfig(),
           py::arg("telespeech_ctc") = "", py::arg("tokens"),
           py::arg("num_threads"), py::arg("debug") = false,
           py::arg("provider") = "cpu", py::arg("model_type") = "",
@@ -62,6 +66,7 @@ void PybindOfflineModelConfig(py::module *m) {
       .def_readwrite("zipformer_ctc", &PyClass::zipformer_ctc)
       .def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
       .def_readwrite("sense_voice", &PyClass::sense_voice)
+      .def_readwrite("moonshine", &PyClass::moonshine)
       .def_readwrite("telespeech_ctc", &PyClass::telespeech_ctc)
       .def_readwrite("tokens", &PyClass::tokens)
       .def_readwrite("num_threads", &PyClass::num_threads)
diff --git a/sherpa-onnx/python/csrc/offline-moonshine-model-config.cc b/sherpa-onnx/python/csrc/offline-moonshine-model-config.cc
new file mode 100644
index 0000000000..14bea382b1
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-moonshine-model-config.cc
@@ -0,0 +1,28 @@
+// sherpa-onnx/python/csrc/offline-moonshine-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
+
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineMoonshineModelConfig(py::module *m) {
+  using PyClass = OfflineMoonshineModelConfig;
+  py::class_<PyClass>(*m, "OfflineMoonshineModelConfig")
+      .def(py::init<const std::string &, const std::string &,
+                    const std::string &, const std::string &>(),
+           py::arg("preprocessor"), py::arg("encoder"),
+           py::arg("uncached_decoder"), py::arg("cached_decoder"))
+      .def_readwrite("preprocessor", &PyClass::preprocessor)
+      .def_readwrite("encoder", &PyClass::encoder)
+      .def_readwrite("uncached_decoder", &PyClass::uncached_decoder)
+      .def_readwrite("cached_decoder", &PyClass::cached_decoder)
+      .def("__str__", &PyClass::ToString);
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/offline-moonshine-model-config.h b/sherpa-onnx/python/csrc/offline-moonshine-model-config.h
new file mode 100644
index 0000000000..1b30f9f949
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-moonshine-model-config.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/offline-moonshine-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
+#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineMoonshineModelConfig(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc b/sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc
new file mode 100644
index 0000000000..d058c26a23
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc
@@ -0,0 +1,32 @@
+// sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/python/csrc/offline-speaker-diarization-result.h"
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
+
+namespace sherpa_onnx {
+
+static void PybindOfflineSpeakerDiarizationSegment(py::module *m) {
+  using PyClass = OfflineSpeakerDiarizationSegment;
+  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationSegment")
+      .def_property_readonly("start", &PyClass::Start)
+      .def_property_readonly("end", &PyClass::End)
+      .def_property_readonly("duration", &PyClass::Duration)
+      .def_property_readonly("speaker", &PyClass::Speaker)
+      .def_property("text", &PyClass::Text, &PyClass::SetText)
+      .def("__str__", &PyClass::ToString);
+}
+
+void PybindOfflineSpeakerDiarizationResult(py::module *m) {
+  PybindOfflineSpeakerDiarizationSegment(m);
+  using PyClass = OfflineSpeakerDiarizationResult;
+  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationResult")
+      .def_property_readonly("num_speakers", &PyClass::NumSpeakers)
+      .def_property_readonly("num_segments", &PyClass::NumSegments)
+      .def("sort_by_start_time", &PyClass::SortByStartTime)
+      .def("sort_by_speaker", &PyClass::SortBySpeaker);
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/offline-speaker-diarization-result.h b/sherpa-onnx/python/csrc/offline-speaker-diarization-result.h
new file mode 100644
index 0000000000..2c11e40734
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-speaker-diarization-result.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/offline-speaker-diarization-result.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
+#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineSpeakerDiarizationResult(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
diff --git a/sherpa-onnx/python/csrc/offline-speaker-diarization.cc b/sherpa-onnx/python/csrc/offline-speaker-diarization.cc
new file mode 100644
index 0000000000..b3a332f701
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-speaker-diarization.cc
@@ -0,0 +1,93 @@
+// sherpa-onnx/python/csrc/offline-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/python/csrc/offline-speaker-diarization.h"
+
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
+#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"
+
+namespace sherpa_onnx {
+
+static void PybindOfflineSpeakerSegmentationPyannoteModelConfig(py::module *m) {
+  using PyClass = OfflineSpeakerSegmentationPyannoteModelConfig;
+  py::class_<PyClass>(*m, "OfflineSpeakerSegmentationPyannoteModelConfig")
+      .def(py::init<>())
+      .def(py::init<const std::string &>(), py::arg("model"))
+      .def_readwrite("model", &PyClass::model)
+      .def("__str__", &PyClass::ToString)
+      .def("validate", &PyClass::Validate);
+}
+
+static void PybindOfflineSpeakerSegmentationModelConfig(py::module *m) {
+  PybindOfflineSpeakerSegmentationPyannoteModelConfig(m);
+
+  using PyClass = OfflineSpeakerSegmentationModelConfig;
+  py::class_<PyClass>(*m, "OfflineSpeakerSegmentationModelConfig")
+      .def(py::init<>())
+      .def(py::init<const OfflineSpeakerSegmentationPyannoteModelConfig &,
+                    int32_t, bool, const std::string &>(),
+           py::arg("pyannote"), py::arg("num_threads") = 1,
+           py::arg("debug") = false, py::arg("provider") = "cpu")
+      .def_readwrite("pyannote", &PyClass::pyannote)
+      .def_readwrite("num_threads", &PyClass::num_threads)
+      .def_readwrite("debug", &PyClass::debug)
+      .def_readwrite("provider", &PyClass::provider)
+      .def("__str__", &PyClass::ToString)
+      .def("validate", &PyClass::Validate);
+}
+
+static void PybindOfflineSpeakerDiarizationConfig(py::module *m) {
+  PybindOfflineSpeakerSegmentationModelConfig(m);
+
+  using PyClass = OfflineSpeakerDiarizationConfig;
+  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationConfig")
+      .def(py::init<const OfflineSpeakerSegmentationModelConfig &,
+                    const SpeakerEmbeddingExtractorConfig &,
+                    const FastClusteringConfig &, float, float>(),
+           py::arg("segmentation"), py::arg("embedding"), py::arg("clustering"),
+           py::arg("min_duration_on") = 0.3, py::arg("min_duration_off") = 0.5)
+      .def_readwrite("segmentation", &PyClass::segmentation)
+      .def_readwrite("embedding", &PyClass::embedding)
+      .def_readwrite("clustering", &PyClass::clustering)
+      .def_readwrite("min_duration_on", &PyClass::min_duration_on)
+      .def_readwrite("min_duration_off", &PyClass::min_duration_off)
+      .def("__str__", &PyClass::ToString)
+      .def("validate", &PyClass::Validate);
+}
+
+void PybindOfflineSpeakerDiarization(py::module *m) {
+  PybindOfflineSpeakerDiarizationConfig(m);
+
+  using PyClass = OfflineSpeakerDiarization;
+  py::class_<PyClass>(*m, "OfflineSpeakerDiarization")
+      .def(py::init<const OfflineSpeakerDiarizationConfig &>(),
+           py::arg("config"))
+      .def_property_readonly("sample_rate", &PyClass::SampleRate)
+      .def("set_config", &PyClass::SetConfig, py::arg("config"))
+      .def(
+          "process",
+          [](const PyClass &self, const std::vector<float> samples,
+             std::function<int32_t(int32_t, int32_t)> callback) {
+            if (!callback) {
+              return self.Process(samples.data(), samples.size());
+            }
+
+            std::function<int32_t(int32_t, int32_t, void *)> callback_wrapper =
+                [callback](int32_t processed_chunks, int32_t num_chunks,
+                           void *) -> int32_t {
+              callback(processed_chunks, num_chunks);
+              return 0;
+            };
+
+            return self.Process(samples.data(), samples.size(),
+                                callback_wrapper);
+          },
+          py::arg("samples"), py::arg("callback") = py::none());
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/offline-speaker-diarization.h b/sherpa-onnx/python/csrc/offline-speaker-diarization.h
new file mode 100644
index 0000000000..5233430622
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-speaker-diarization.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/offline-speaker-diarization.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
+#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineSpeakerDiarization(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
diff --git a/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc b/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
new file mode 100644
index 0000000000..d9a00ca4e5
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
@@ -0,0 +1,35 @@
+// sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#include "sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h"
+
+#include <string>
+
+#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineTtsKokoroModelConfig(py::module *m) {
+  using PyClass = OfflineTtsKokoroModelConfig;
+
+  py::class_<PyClass>(*m, "OfflineTtsKokoroModelConfig")
+      .def(py::init<>())
+      .def(py::init<const std::string &, const std::string &,
+                    const std::string &, const std::string &,
+                    const std::string &, const std::string &, float>(),
+           py::arg("model"), py::arg("voices"), py::arg("tokens"),
+           py::arg("lexicon") = "", py::arg("data_dir"),
+           py::arg("dict_dir") = "", py::arg("length_scale") = 1.0)
+      .def_readwrite("model", &PyClass::model)
+      .def_readwrite("voices", &PyClass::voices)
+      .def_readwrite("tokens", &PyClass::tokens)
+      .def_readwrite("lexicon", &PyClass::lexicon)
+      .def_readwrite("data_dir", &PyClass::data_dir)
+      .def_readwrite("dict_dir", &PyClass::dict_dir)
+      .def_readwrite("length_scale", &PyClass::length_scale)
+      .def("__str__", &PyClass::ToString)
+      .def("validate", &PyClass::Validate);
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h b/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h
new file mode 100644
index 0000000000..cc5f517abb
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
+#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineTtsKokoroModelConfig(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc b/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
new file mode 100644
index 0000000000..2c932174ef
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
@@ -0,0 +1,37 @@
+// sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#include "sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h"
+
+#include <string>
+
+#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineTtsMatchaModelConfig(py::module *m) {
+  using PyClass = OfflineTtsMatchaModelConfig;
+
+  py::class_<PyClass>(*m, "OfflineTtsMatchaModelConfig")
+      .def(py::init<>())
+      .def(py::init<const std::string &, const std::string &,
+                    const std::string &, const std::string &,
+                    const std::string &, const std::string &, float, float>(),
+           py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"),
+           py::arg("tokens"), py::arg("data_dir") = "",
+           py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
+           py::arg("length_scale") = 1.0)
+      .def_readwrite("acoustic_model", &PyClass::acoustic_model)
+      .def_readwrite("vocoder", &PyClass::vocoder)
+      .def_readwrite("lexicon", &PyClass::lexicon)
+      .def_readwrite("tokens", &PyClass::tokens)
+      .def_readwrite("data_dir", &PyClass::data_dir)
+      .def_readwrite("dict_dir", &PyClass::dict_dir)
+      .def_readwrite("noise_scale", &PyClass::noise_scale)
+      .def_readwrite("length_scale", &PyClass::length_scale)
+      .def("__str__", &PyClass::ToString)
+      .def("validate", &PyClass::Validate);
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h b/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h
new file mode 100644
index 0000000000..09b0c5c980
--- /dev/null
+++ b/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
+#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOfflineTtsMatchaModelConfig(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/python/csrc/offline-tts-model-config.cc b/sherpa-onnx/python/csrc/offline-tts-model-config.cc
index e5e86d9684..99769957de 100644
--- a/sherpa-onnx/python/csrc/offline-tts-model-config.cc
+++ b/sherpa-onnx/python/csrc/offline-tts-model-config.cc
@@ -7,22 +7,33 @@
 #include <string>
 
 #include "sherpa-onnx/csrc/offline-tts-model-config.h"
+#include "sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h"
+#include "sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-tts-vits-model-config.h"
 
 namespace sherpa_onnx {
 
 void PybindOfflineTtsModelConfig(py::module *m) {
   PybindOfflineTtsVitsModelConfig(m);
+  PybindOfflineTtsMatchaModelConfig(m);
+  PybindOfflineTtsKokoroModelConfig(m);
 
   using PyClass = OfflineTtsModelConfig;
 
   py::class_<PyClass>(*m, "OfflineTtsModelConfig")
       .def(py::init<>())
-      .def(py::init<const OfflineTtsVitsModelConfig &, int32_t, bool,
+      .def(py::init<const OfflineTtsVitsModelConfig &,
+                    const OfflineTtsMatchaModelConfig &,
+                    const OfflineTtsKokoroModelConfig &, int32_t, bool,
                     const std::string &>(),
-           py::arg("vits"), py::arg("num_threads") = 1,
-           py::arg("debug") = false, py::arg("provider") = "cpu")
+           py::arg("vits") = OfflineTtsVitsModelConfig{},
+           py::arg("matcha") = OfflineTtsMatchaModelConfig{},
+           py::arg("kokoro") = OfflineTtsKokoroModelConfig{},
+           py::arg("num_threads") = 1, py::arg("debug") = false,
+           py::arg("provider") = "cpu")
       .def_readwrite("vits", &PyClass::vits)
+      .def_readwrite("matcha", &PyClass::matcha)
+      .def_readwrite("kokoro", &PyClass::kokoro)
       .def_readwrite("num_threads", &PyClass::num_threads)
       .def_readwrite("debug", &PyClass::debug)
       .def_readwrite("provider", &PyClass::provider)
diff --git a/sherpa-onnx/python/csrc/sherpa-onnx.cc b/sherpa-onnx/python/csrc/sherpa-onnx.cc
index f668d626c6..c73022f178 100644
--- a/sherpa-onnx/python/csrc/sherpa-onnx.cc
+++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc
@@ -37,6 +37,8 @@
 
 #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
 #include "sherpa-onnx/python/csrc/fast-clustering.h"
+#include "sherpa-onnx/python/csrc/offline-speaker-diarization-result.h"
+#include "sherpa-onnx/python/csrc/offline-speaker-diarization.h"
 #endif
 
 namespace sherpa_onnx {
@@ -74,14 +76,16 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
   PybindOfflineTts(&m);
 #endif
 
-#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
-  PybindFastClustering(&m);
-#endif
-
   PybindSpeakerEmbeddingExtractor(&m);
   PybindSpeakerEmbeddingManager(&m);
   PybindSpokenLanguageIdentification(&m);
 
+#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
+  PybindFastClustering(&m);
+  PybindOfflineSpeakerDiarizationResult(&m);
+  PybindOfflineSpeakerDiarization(&m);
+#endif
+
   PybindAlsa(&m);
 }
 
diff --git a/sherpa-onnx/python/csrc/vad-model.cc b/sherpa-onnx/python/csrc/vad-model.cc
index f304fd0ae3..743628d380 100644
--- a/sherpa-onnx/python/csrc/vad-model.cc
+++ b/sherpa-onnx/python/csrc/vad-model.cc
@@ -4,6 +4,7 @@
 
 #include "sherpa-onnx/python/csrc/vad-model.h"
 
+#include <memory>
 #include <vector>
 
 #include "sherpa-onnx/csrc/vad-model.h"
@@ -13,8 +14,10 @@ namespace sherpa_onnx {
 void PybindVadModel(py::module *m) {
   using PyClass = VadModel;
   py::class_<PyClass>(*m, "VadModel")
-      .def_static("create", &PyClass::Create, py::arg("config"),
-                  py::call_guard<py::gil_scoped_release>())
+      .def_static("create",
+                  (std::unique_ptr<VadModel>(*)(const VadModelConfig &))(
+                      &PyClass::Create),
+                  py::arg("config"), py::call_guard<py::gil_scoped_release>())
       .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
       .def(
           "is_speech",
diff --git a/sherpa-onnx/python/sherpa_onnx/__init__.py b/sherpa-onnx/python/sherpa_onnx/__init__.py
index 3568447b35..5eeeffa566 100644
--- a/sherpa-onnx/python/sherpa_onnx/__init__.py
+++ b/sherpa-onnx/python/sherpa_onnx/__init__.py
@@ -11,9 +11,17 @@
     OfflinePunctuation,
     OfflinePunctuationConfig,
     OfflinePunctuationModelConfig,
+    OfflineSpeakerDiarization,
+    OfflineSpeakerDiarizationConfig,
+    OfflineSpeakerDiarizationResult,
+    OfflineSpeakerDiarizationSegment,
+    OfflineSpeakerSegmentationModelConfig,
+    OfflineSpeakerSegmentationPyannoteModelConfig,
     OfflineStream,
     OfflineTts,
     OfflineTtsConfig,
+    OfflineTtsKokoroModelConfig,
+    OfflineTtsMatchaModelConfig,
     OfflineTtsModelConfig,
     OfflineTtsVitsModelConfig,
     OfflineZipformerAudioTaggingModelConfig,
diff --git a/sherpa-onnx/python/sherpa_onnx/keyword_spotter.py b/sherpa-onnx/python/sherpa_onnx/keyword_spotter.py
index 66d716984e..a9d8573f8d 100644
--- a/sherpa-onnx/python/sherpa_onnx/keyword_spotter.py
+++ b/sherpa-onnx/python/sherpa_onnx/keyword_spotter.py
@@ -104,8 +104,8 @@ def __init__(
         )
 
         provider_config = ProviderConfig(
-          provider=provider,
-          device = device,
+            provider=provider,
+            device=device,
         )
 
         model_config = OnlineModelConfig(
@@ -131,6 +131,9 @@ def __init__(
         )
         self.keyword_spotter = _KeywordSpotter(keywords_spotter_config)
 
+    def reset_stream(self, s: OnlineStream):
+        self.keyword_spotter.reset(s)
+
     def create_stream(self, keywords: Optional[str] = None):
         if keywords is None:
             return self.keyword_spotter.create_stream()
diff --git a/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py b/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
index e96271a587..3916660054 100644
--- a/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
@@ -8,13 +8,14 @@
     OfflineCtcFstDecoderConfig,
     OfflineLMConfig,
     OfflineModelConfig,
+    OfflineMoonshineModelConfig,
     OfflineNemoEncDecCtcModelConfig,
     OfflineParaformerModelConfig,
-    OfflineSenseVoiceModelConfig,
 )
 from _sherpa_onnx import OfflineRecognizer as _Recognizer
 from _sherpa_onnx import (
     OfflineRecognizerConfig,
+    OfflineSenseVoiceModelConfig,
     OfflineStream,
     OfflineTdnnModelConfig,
     OfflineTransducerModelConfig,
@@ -503,12 +504,12 @@ def from_whisper(
         e.g., tiny, tiny.en, base, base.en, etc.
 
         Args:
-          encoder_model:
-            Path to the encoder model, e.g., tiny-encoder.onnx,
-            tiny-encoder.int8.onnx, tiny-encoder.ort, etc.
-          decoder_model:
+          encoder:
             Path to the encoder model, e.g., tiny-encoder.onnx,
             tiny-encoder.int8.onnx, tiny-encoder.ort, etc.
+          decoder:
+            Path to the decoder model, e.g., tiny-decoder.onnx,
+            tiny-decoder.int8.onnx, tiny-decoder.ort, etc.
           tokens:
             Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
             columns::
@@ -570,6 +571,87 @@ def from_whisper(
         self.config = recognizer_config
         return self
 
+    @classmethod
+    def from_moonshine(
+        cls,
+        preprocessor: str,
+        encoder: str,
+        uncached_decoder: str,
+        cached_decoder: str,
+        tokens: str,
+        num_threads: int = 1,
+        decoding_method: str = "greedy_search",
+        debug: bool = False,
+        provider: str = "cpu",
+        rule_fsts: str = "",
+        rule_fars: str = "",
+    ):
+        """
+        Please refer to
+        `<https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html>`_
+        to download pre-trained models for different kinds of moonshine models,
+        e.g., tiny, base, etc.
+
+        Args:
+          preprocessor:
+            Path to the preprocessor model, e.g., preprocess.onnx
+          encoder:
+            Path to the encoder model, e.g., encode.int8.onnx
+          uncached_decoder:
+            Path to the uncached decoder model, e.g., uncached_decode.int8.onnx,
+          cached_decoder:
+            Path to the cached decoder model, e.g., cached_decode.int8.onnx,
+          tokens:
+            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
+            columns::
+
+                symbol integer_id
+
+          num_threads:
+            Number of threads for neural network computation.
+          decoding_method:
+            Valid values: greedy_search.
+          debug:
+            True to show debug messages.
+          provider:
+            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
+          rule_fsts:
+            If not empty, it specifies fsts for inverse text normalization.
+            If there are multiple fsts, they are separated by a comma.
+          rule_fars:
+            If not empty, it specifies fst archives for inverse text normalization.
+            If there are multiple archives, they are separated by a comma.
+        """
+        self = cls.__new__(cls)
+        model_config = OfflineModelConfig(
+            moonshine=OfflineMoonshineModelConfig(
+                preprocessor=preprocessor,
+                encoder=encoder,
+                uncached_decoder=uncached_decoder,
+                cached_decoder=cached_decoder,
+            ),
+            tokens=tokens,
+            num_threads=num_threads,
+            debug=debug,
+            provider=provider,
+        )
+
+        unused_feat_config = FeatureExtractorConfig(
+            sampling_rate=16000,
+            feature_dim=80,
+        )
+
+        recognizer_config = OfflineRecognizerConfig(
+            model_config=model_config,
+            feat_config=unused_feat_config,
+            decoding_method=decoding_method,
+            rule_fsts=rule_fsts,
+            rule_fars=rule_fars,
+        )
+        self.recognizer = _Recognizer(recognizer_config)
+        self.config = recognizer_config
+        return self
+
     @classmethod
     def from_tdnn_ctc(
         cls,
diff --git a/sherpa-onnx/python/tests/test_keyword_spotter.py b/sherpa-onnx/python/tests/test_keyword_spotter.py
index f4d79830a2..62691d1833 100755
--- a/sherpa-onnx/python/tests/test_keyword_spotter.py
+++ b/sherpa-onnx/python/tests/test_keyword_spotter.py
@@ -98,6 +98,9 @@ def test_zipformer_transducer_en(self):
                     if r:
                         print(f"{r} is detected.")
                         results[i] += f"{r}/"
+
+                        keyword_spotter.reset_stream(s)
+
                 if len(ready_list) == 0:
                     break
                 keyword_spotter.decode_streams(ready_list)
@@ -158,6 +161,9 @@ def test_zipformer_transducer_cn(self):
                     if r:
                         print(f"{r} is detected.")
                         results[i] += f"{r}/"
+
+                        keyword_spotter.reset_stream(s)
+
                 if len(ready_list) == 0:
                     break
                 keyword_spotter.decode_streams(ready_list)
diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore
index 97b559df42..91dea72abf 100644
--- a/swift-api-examples/.gitignore
+++ b/swift-api-examples/.gitignore
@@ -2,7 +2,7 @@ decode-file
 decode-file-non-streaming
 generate-subtitles
 spoken-language-identification
-tts
+tts-vits
 vits-vctk
 sherpa-onnx-paraformer-zh-2023-09-14
 !*.sh
@@ -10,3 +10,7 @@ sherpa-onnx-paraformer-zh-2023-09-14
 streaming-hlg-decode-file
 keyword-spotting-from-file
 add-punctuations
+tts-matcha-zh
+tts-matcha-en
+tts-kokoro-en
+tts-kokoro-zh-en
diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift
index 778bccb9bf..cb64218b7a 100644
--- a/swift-api-examples/SherpaOnnx.swift
+++ b/swift-api-examples/SherpaOnnx.swift
@@ -357,6 +357,20 @@ func sherpaOnnxOfflineWhisperModelConfig(
   )
 }
 
+func sherpaOnnxOfflineMoonshineModelConfig(
+  preprocessor: String = "",
+  encoder: String = "",
+  uncachedDecoder: String = "",
+  cachedDecoder: String = ""
+) -> SherpaOnnxOfflineMoonshineModelConfig {
+  return SherpaOnnxOfflineMoonshineModelConfig(
+    preprocessor: toCPointer(preprocessor),
+    encoder: toCPointer(encoder),
+    uncached_decoder: toCPointer(uncachedDecoder),
+    cached_decoder: toCPointer(cachedDecoder)
+  )
+}
+
 func sherpaOnnxOfflineTdnnModelConfig(
   model: String = ""
 ) -> SherpaOnnxOfflineTdnnModelConfig {
@@ -401,7 +415,8 @@ func sherpaOnnxOfflineModelConfig(
   modelingUnit: String = "cjkchar",
   bpeVocab: String = "",
   teleSpeechCtc: String = "",
-  senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig()
+  senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
+  moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig()
 ) -> SherpaOnnxOfflineModelConfig {
   return SherpaOnnxOfflineModelConfig(
     transducer: transducer,
@@ -417,7 +432,8 @@ func sherpaOnnxOfflineModelConfig(
     modeling_unit: toCPointer(modelingUnit),
     bpe_vocab: toCPointer(bpeVocab),
     telespeech_ctc: toCPointer(teleSpeechCtc),
-    sense_voice: senseVoice
+    sense_voice: senseVoice,
+    moonshine: moonshine
   )
 }
 
@@ -703,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
 
 // offline tts
 func sherpaOnnxOfflineTtsVitsModelConfig(
-  model: String,
-  lexicon: String,
-  tokens: String,
+  model: String = "",
+  lexicon: String = "",
+  tokens: String = "",
   dataDir: String = "",
   noiseScale: Float = 0.667,
   noiseScaleW: Float = 0.8,
@@ -720,11 +736,56 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
     noise_scale: noiseScale,
     noise_scale_w: noiseScaleW,
     length_scale: lengthScale,
-    dict_dir: toCPointer(dictDir))
+    dict_dir: toCPointer(dictDir)
+  )
+}
+
+func sherpaOnnxOfflineTtsMatchaModelConfig(
+  acousticModel: String = "",
+  vocoder: String = "",
+  lexicon: String = "",
+  tokens: String = "",
+  dataDir: String = "",
+  noiseScale: Float = 0.667,
+  lengthScale: Float = 1.0,
+  dictDir: String = ""
+) -> SherpaOnnxOfflineTtsMatchaModelConfig {
+  return SherpaOnnxOfflineTtsMatchaModelConfig(
+    acoustic_model: toCPointer(acousticModel),
+    vocoder: toCPointer(vocoder),
+    lexicon: toCPointer(lexicon),
+    tokens: toCPointer(tokens),
+    data_dir: toCPointer(dataDir),
+    noise_scale: noiseScale,
+    length_scale: lengthScale,
+    dict_dir: toCPointer(dictDir)
+  )
+}
+
+func sherpaOnnxOfflineTtsKokoroModelConfig(
+  model: String = "",
+  voices: String = "",
+  tokens: String = "",
+  dataDir: String = "",
+  lengthScale: Float = 1.0,
+  dictDir: String = "",
+  lexicon: String = ""
+) -> SherpaOnnxOfflineTtsKokoroModelConfig {
+  return SherpaOnnxOfflineTtsKokoroModelConfig(
+    model: toCPointer(model),
+    voices: toCPointer(voices),
+    tokens: toCPointer(tokens),
+    data_dir: toCPointer(dataDir),
+    length_scale: lengthScale,
+    dict_dir: toCPointer(dictDir),
+    lexicon: toCPointer(lexicon)
+  )
 }
 
 func sherpaOnnxOfflineTtsModelConfig(
-  vits: SherpaOnnxOfflineTtsVitsModelConfig,
+  vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
+  matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
+  kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(),
   numThreads: Int = 1,
   debug: Int = 0,
   provider: String = "cpu"
@@ -733,7 +794,9 @@ func sherpaOnnxOfflineTtsModelConfig(
     vits: vits,
     num_threads: Int32(numThreads),
     debug: Int32(debug),
-    provider: toCPointer(provider)
+    provider: toCPointer(provider),
+    matcha: matcha,
+    kokoro: kokoro
   )
 }
 
@@ -741,7 +804,7 @@ func sherpaOnnxOfflineTtsConfig(
   model: SherpaOnnxOfflineTtsModelConfig,
   ruleFsts: String = "",
   ruleFars: String = "",
-  maxNumSentences: Int = 2
+  maxNumSentences: Int = 1
 ) -> SherpaOnnxOfflineTtsConfig {
   return SherpaOnnxOfflineTtsConfig(
     model: model,
@@ -1017,6 +1080,10 @@ class SherpaOnnxKeywordSpotterWrapper {
     SherpaOnnxDecodeKeywordStream(spotter, stream)
   }
 
+  func reset() {
+    SherpaOnnxResetKeywordStream(spotter, stream)
+  }
+
   func getResult() -> SherpaOnnxKeywordResultWrapper {
     let result: UnsafePointer<SherpaOnnxKeywordResult>? = SherpaOnnxGetKeywordResult(
       spotter, stream)
@@ -1078,3 +1145,167 @@ class SherpaOnnxOfflinePunctuationWrapper {
     return ans
   }
 }
+
+func sherpaOnnxOnlinePunctuationModelConfig(
+  cnnBiLstm: String,
+  bpeVocab: String,
+  numThreads: Int = 1,
+  debug: Int = 0,
+  provider: String = "cpu"
+) -> SherpaOnnxOnlinePunctuationModelConfig {
+  return SherpaOnnxOnlinePunctuationModelConfig(
+    cnn_bilstm: toCPointer(cnnBiLstm),
+    bpe_vocab: toCPointer(bpeVocab),
+    num_threads: Int32(numThreads),
+    debug: Int32(debug),
+    provider: toCPointer(provider))
+}
+
+func sherpaOnnxOnlinePunctuationConfig(
+  model: SherpaOnnxOnlinePunctuationModelConfig
+) -> SherpaOnnxOnlinePunctuationConfig {
+  return SherpaOnnxOnlinePunctuationConfig(model: model)
+}
+
+class SherpaOnnxOnlinePunctuationWrapper {
+  /// A pointer to the underlying counterpart in C
+  let ptr: OpaquePointer!
+
+  /// Constructor taking a model config
+  init(
+    config: UnsafePointer<SherpaOnnxOnlinePunctuationConfig>!
+  ) {
+    ptr = SherpaOnnxCreateOnlinePunctuation(config)
+  }
+
+  deinit {
+    if let ptr {
+      SherpaOnnxDestroyOnlinePunctuation(ptr)
+    }
+  }
+
+  func addPunct(text: String) -> String {
+    let cText = SherpaOnnxOnlinePunctuationAddPunct(ptr, toCPointer(text))
+    let ans = String(cString: cText!)
+    SherpaOnnxOnlinePunctuationFreeText(cText)
+    return ans
+  }
+}
+
+func sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: String)
+  -> SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
+{
+  return SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: toCPointer(model))
+}
+
+func sherpaOnnxOfflineSpeakerSegmentationModelConfig(
+  pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig,
+  numThreads: Int = 1,
+  debug: Int = 0,
+  provider: String = "cpu"
+) -> SherpaOnnxOfflineSpeakerSegmentationModelConfig {
+  return SherpaOnnxOfflineSpeakerSegmentationModelConfig(
+    pyannote: pyannote,
+    num_threads: Int32(numThreads),
+    debug: Int32(debug),
+    provider: toCPointer(provider)
+  )
+}
+
+func sherpaOnnxFastClusteringConfig(numClusters: Int = -1, threshold: Float = 0.5)
+  -> SherpaOnnxFastClusteringConfig
+{
+  return SherpaOnnxFastClusteringConfig(num_clusters: Int32(numClusters), threshold: threshold)
+}
+
+func sherpaOnnxSpeakerEmbeddingExtractorConfig(
+  model: String,
+  numThreads: Int = 1,
+  debug: Int = 0,
+  provider: String = "cpu"
+) -> SherpaOnnxSpeakerEmbeddingExtractorConfig {
+  return SherpaOnnxSpeakerEmbeddingExtractorConfig(
+    model: toCPointer(model),
+    num_threads: Int32(numThreads),
+    debug: Int32(debug),
+    provider: toCPointer(provider)
+  )
+}
+
+func sherpaOnnxOfflineSpeakerDiarizationConfig(
+  segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig,
+  embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig,
+  clustering: SherpaOnnxFastClusteringConfig,
+  minDurationOn: Float = 0.3,
+  minDurationOff: Float = 0.5
+) -> SherpaOnnxOfflineSpeakerDiarizationConfig {
+  return SherpaOnnxOfflineSpeakerDiarizationConfig(
+    segmentation: segmentation,
+    embedding: embedding,
+    clustering: clustering,
+    min_duration_on: minDurationOn,
+    min_duration_off: minDurationOff
+  )
+}
+
+struct SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper {
+  var start: Float = 0
+  var end: Float = 0
+  var speaker: Int = 0
+}
+
+class SherpaOnnxOfflineSpeakerDiarizationWrapper {
+  /// A pointer to the underlying counterpart in C
+  let impl: OpaquePointer!
+
+  init(
+    config: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationConfig>!
+  ) {
+    impl = SherpaOnnxCreateOfflineSpeakerDiarization(config)
+  }
+
+  deinit {
+    if let impl {
+      SherpaOnnxDestroyOfflineSpeakerDiarization(impl)
+    }
+  }
+
+  var sampleRate: Int {
+    return Int(SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(impl))
+  }
+
+  // only config.clustering is used. All other fields are ignored
+  func setConfig(config: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationConfig>!) {
+    SherpaOnnxOfflineSpeakerDiarizationSetConfig(impl, config)
+  }
+
+  func process(samples: [Float]) -> [SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper] {
+    let result = SherpaOnnxOfflineSpeakerDiarizationProcess(
+      impl, samples, Int32(samples.count))
+
+    if result == nil {
+      return []
+    }
+
+    let numSegments = Int(SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result))
+
+    let p: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationSegment>? =
+      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result)
+
+    if p == nil {
+      return []
+    }
+
+    var ans: [SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper] = []
+    for i in 0..<numSegments {
+      ans.append(
+        SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper(
+          start: p![i].start, end: p![i].end, speaker: Int(p![i].speaker)))
+    }
+
+    SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p)
+    SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result)
+
+    return ans
+  }
+}
diff --git a/swift-api-examples/add-punctuation-online.swift b/swift-api-examples/add-punctuation-online.swift
new file mode 100644
index 0000000000..79af921eb7
--- /dev/null
+++ b/swift-api-examples/add-punctuation-online.swift
@@ -0,0 +1,35 @@
+func run() {
+    let model = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
+    let bpe = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
+    
+    // Create model config
+    let modelConfig = sherpaOnnxOnlinePunctuationModelConfig(
+        cnnBiLstm: model,
+        bpeVocab: bpe
+    )
+    
+    // Create punctuation config
+    var config = sherpaOnnxOnlinePunctuationConfig(model: modelConfig)
+    
+    // Create punctuation instance
+    let punct = SherpaOnnxOnlinePunctuationWrapper(config: &config)
+    
+    // Test texts
+    let textList = [
+        "how are you i am fine thank you",
+        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
+    ]
+    
+    // Process each text
+  for i in 0..<textList.count {
+    let t = punct.addPunct(text: textList[i])
+    print("\nresult is:\n\(t)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/decode-file-non-streaming.swift b/swift-api-examples/decode-file-non-streaming.swift
index a60777832a..2e5e9f9ac1 100644
--- a/swift-api-examples/decode-file-non-streaming.swift
+++ b/swift-api-examples/decode-file-non-streaming.swift
@@ -18,6 +18,7 @@ func run() {
   var modelType = "whisper"
   // modelType = "paraformer"
   // modelType = "sense_voice"
+  // modelType = "moonshine"
 
   if modelType == "whisper" {
     let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
@@ -61,6 +62,24 @@ func run() {
       debug: 0,
       senseVoice: senseVoiceConfig
     )
+  } else if modelType == "moonshine" {
+    let preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"
+    let encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"
+    let uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"
+    let cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"
+    let tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"
+    let moonshine = sherpaOnnxOfflineMoonshineModelConfig(
+      preprocessor: preprocessor,
+      encoder: encoder,
+      uncachedDecoder: uncachedDecoder,
+      cachedDecoder: cachedDecoder
+    )
+
+    modelConfig = sherpaOnnxOfflineModelConfig(
+      tokens: tokens,
+      debug: 0,
+      moonshine: moonshine
+    )
   } else {
     print("Please specify a supported modelType \(modelType)")
     return
@@ -80,6 +99,8 @@ func run() {
   var filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"
   if modelType == "sense_voice" {
     filePath = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
+  } else if modelType == "moonshine" {
+    filePath = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
   }
   let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
   let audioFile = try! AVAudioFile(forReading: fileURL as URL)
diff --git a/swift-api-examples/keyword-spotting-from-file.swift b/swift-api-examples/keyword-spotting-from-file.swift
index 08487eb4af..498852a83a 100644
--- a/swift-api-examples/keyword-spotting-from-file.swift
+++ b/swift-api-examples/keyword-spotting-from-file.swift
@@ -70,6 +70,9 @@ func run() {
     spotter.decode()
     let keyword = spotter.getResult().keyword
     if keyword != "" {
+      // Remember to call reset() right after detecting a keyword
+      spotter.reset()
+
       print("Detected: \(keyword)")
     }
   }
diff --git a/swift-api-examples/run-add-punctuations-online.sh b/swift-api-examples/run-add-punctuations-online.sh
new file mode 100755
index 0000000000..fe77c1aad7
--- /dev/null
+++ b/swift-api-examples/run-add-punctuations-online.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# Download and extract the online punctuation model if not exists
+if [ ! -d ./sherpa-onnx-online-punct-en-2024-08-06 ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
+  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
+  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
+fi
+
+if [ ! -e ./add-punctuation-online ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./add-punctuation-online.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o ./add-punctuation-online
+
+  strip ./add-punctuation-online
+else
+  echo "./add-punctuation-online exists - skip building"
+fi
+
+# Set library path and run the executable
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./add-punctuation-online 
\ No newline at end of file
diff --git a/swift-api-examples/run-speaker-diarization.sh b/swift-api-examples/run-speaker-diarization.sh
new file mode 100755
index 0000000000..b8a603ea2a
--- /dev/null
+++ b/swift-api-examples/run-speaker-diarization.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+fi
+
+if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+fi
+
+if [ ! -f ./0-four-speakers-zh.wav ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+fi
+
+if [ ! -e ./speaker-diarization ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./speaker-diarization.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o speaker-diarization
+
+  strip speaker-diarization
+else
+  echo "./speaker-diarization exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./speaker-diarization
diff --git a/swift-api-examples/run-tts-kokoro-en.sh b/swift-api-examples/run-tts-kokoro-en.sh
new file mode 100755
index 0000000000..2c7408d8ba
--- /dev/null
+++ b/swift-api-examples/run-tts-kokoro-en.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+if [ ! -e ./tts-kokoro-en ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./tts-kokoro-en.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o tts-kokoro-en
+
+  strip tts-kokoro-en
+else
+  echo "./tts-kokoro-en exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./tts-kokoro-en
diff --git a/swift-api-examples/run-tts-kokoro-zh-en.sh b/swift-api-examples/run-tts-kokoro-zh-en.sh
new file mode 100755
index 0000000000..d7763cc876
--- /dev/null
+++ b/swift-api-examples/run-tts-kokoro-zh-en.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+  tar xf kokoro-multi-lang-v1_0.tar.bz2
+  rm kokoro-multi-lang-v1_0.tar.bz2
+fi
+
+if [ ! -e ./tts-kokoro-zh-en ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./tts-kokoro-zh-en.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o tts-kokoro-zh-en
+
+  strip tts-kokoro-zh-en
+else
+  echo "./tts-kokoro-zh-en exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./tts-kokoro-zh-en
diff --git a/swift-api-examples/run-tts-matcha-en.sh b/swift-api-examples/run-tts-matcha-en.sh
new file mode 100755
index 0000000000..f472b090b5
--- /dev/null
+++ b/swift-api-examples/run-tts-matcha-en.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+  rm matcha-icefall-en_US-ljspeech.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+if [ ! -e ./tts-matcha-en ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./tts-matcha-en.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o tts-matcha-en
+
+  strip tts-matcha-en
+else
+  echo "./tts-matcha-en exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./tts-matcha-en
diff --git a/swift-api-examples/run-tts-matcha-zh.sh b/swift-api-examples/run-tts-matcha-zh.sh
new file mode 100755
index 0000000000..5d4f75c1ec
--- /dev/null
+++ b/swift-api-examples/run-tts-matcha-zh.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+# to download more models
+if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+  tar xvf matcha-icefall-zh-baker.tar.bz2
+  rm matcha-icefall-zh-baker.tar.bz2
+fi
+
+if [ ! -f ./hifigan_v2.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+fi
+
+if [ ! -e ./tts-matcha-zh ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./tts-matcha-zh.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o tts-matcha-zh
+
+  strip tts-matcha-zh
+else
+  echo "./tts-matcha-zh exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./tts-matcha-zh
diff --git a/swift-api-examples/run-tts.sh b/swift-api-examples/run-tts-vits.sh
similarity index 83%
rename from swift-api-examples/run-tts.sh
rename to swift-api-examples/run-tts-vits.sh
index 5604a43a84..b4722c0ac5 100755
--- a/swift-api-examples/run-tts.sh
+++ b/swift-api-examples/run-tts-vits.sh
@@ -15,22 +15,22 @@ if [ ! -d ./vits-piper-en_US-amy-low ]; then
   rm vits-piper-en_US-amy-low.tar.bz2
 fi
 
-if [ ! -e ./tts ]; then
+if [ ! -e ./tts-vits ]; then
   # Note: We use -lc++ to link against libc++ instead of libstdc++
   swiftc \
     -lc++ \
     -I ../build-swift-macos/install/include \
     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
-    ./tts.swift  ./SherpaOnnx.swift \
+    ./tts-vits.swift  ./SherpaOnnx.swift \
     -L ../build-swift-macos/install/lib/ \
     -l sherpa-onnx \
     -l onnxruntime \
-    -o tts
+    -o tts-vits
 
-  strip tts
+  strip tts-vits
 else
-  echo "./tts exists - skip building"
+  echo "./tts-vits exists - skip building"
 fi
 
 export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
-./tts
+./tts-vits
diff --git a/swift-api-examples/speaker-diarization.swift b/swift-api-examples/speaker-diarization.swift
new file mode 100644
index 0000000000..2191f90d77
--- /dev/null
+++ b/swift-api-examples/speaker-diarization.swift
@@ -0,0 +1,56 @@
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  let segmentationModel = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
+  let embeddingExtractorModel = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
+  let waveFilename = "./0-four-speakers-zh.wav"
+
+  // There are 4 speakers in ./0-four-speakers-zh.wav, so we use 4 here
+  let numSpeakers = 4
+  var config = sherpaOnnxOfflineSpeakerDiarizationConfig(
+    segmentation: sherpaOnnxOfflineSpeakerSegmentationModelConfig(
+      pyannote: sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: segmentationModel)),
+    embedding: sherpaOnnxSpeakerEmbeddingExtractorConfig(model: embeddingExtractorModel),
+    clustering: sherpaOnnxFastClusteringConfig(numClusters: numSpeakers)
+  )
+
+  let sd = SherpaOnnxOfflineSpeakerDiarizationWrapper(config: &config)
+
+  let fileURL: NSURL = NSURL(fileURLWithPath: waveFilename)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(Int(audioFormat.sampleRate) == sd.sampleRate)
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  print("Started!")
+  let segments = sd.process(samples: array)
+  for i in 0..<segments.count {
+    print("\(segments[i].start) -- \(segments[i].end) speaker_\(segments[i].speaker)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/tts-kokoro-en.swift b/swift-api-examples/tts-kokoro-en.swift
new file mode 100644
index 0000000000..a0459cf8d4
--- /dev/null
+++ b/swift-api-examples/tts-kokoro-en.swift
@@ -0,0 +1,65 @@
+class MyClass {
+  func playSamples(samples: [Float]) {
+    print("Play \(samples.count) samples")
+  }
+}
+
+func run() {
+  let model = "./kokoro-en-v0_19/model.onnx"
+  let voices = "./kokoro-en-v0_19/voices.bin"
+  let tokens = "./kokoro-en-v0_19/tokens.txt"
+  let dataDir = "./kokoro-en-v0_19/espeak-ng-data"
+  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+    model: model,
+    voices: voices,
+    tokens: tokens,
+    dataDir: dataDir
+  )
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
+  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  let myClass = MyClass()
+
+  // We use Unretained here so myClass must be kept alive as the callback is invoked
+  //
+  // See also
+  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+
+  let callback: TtsCallbackWithArg = { samples, n, arg in
+    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+    var savedSamples: [Float] = []
+    for index in 0..<n {
+      savedSamples.append(samples![Int(index)])
+    }
+
+    o.playSamples(samples: savedSamples)
+
+    // return 1 so that it continues generating
+    return 1
+  }
+
+  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+
+  let text =
+    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+  let sid = 0
+  let speed: Float = 1.0
+
+  let audio = tts.generateWithCallbackWithArg(
+    text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+  let filename = "test-kokoro-en.wav"
+  let ok = audio.save(filename: filename)
+  if ok == 1 {
+    print("\nSaved to:\(filename)")
+  } else {
+    print("Failed to save to \(filename)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/tts-kokoro-zh-en.swift b/swift-api-examples/tts-kokoro-zh-en.swift
new file mode 100644
index 0000000000..bd4e075dc0
--- /dev/null
+++ b/swift-api-examples/tts-kokoro-zh-en.swift
@@ -0,0 +1,69 @@
+class MyClass {
+  func playSamples(samples: [Float]) {
+    print("Play \(samples.count) samples")
+  }
+}
+
+func run() {
+  let model = "./kokoro-multi-lang-v1_0/model.onnx"
+  let voices = "./kokoro-multi-lang-v1_0/voices.bin"
+  let tokens = "./kokoro-multi-lang-v1_0/tokens.txt"
+  let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data"
+  let dictDir = "./kokoro-multi-lang-v1_0/dict"
+  let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt"
+  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+    model: model,
+    voices: voices,
+    tokens: tokens,
+    dataDir: dataDir,
+    dictDir: dictDir,
+    lexicon: lexicon
+  )
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
+  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  let myClass = MyClass()
+
+  // We use Unretained here so myClass must be kept alive as the callback is invoked
+  //
+  // See also
+  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+
+  let callback: TtsCallbackWithArg = { samples, n, arg in
+    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+    var savedSamples: [Float] = []
+    for index in 0..<n {
+      savedSamples.append(samples![Int(index)])
+    }
+
+    o.playSamples(samples: savedSamples)
+
+    // return 1 so that it continues generating
+    return 1
+  }
+
+  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+
+  let text =
+    "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
+  let sid = 0
+  let speed: Float = 1.0
+
+  let audio = tts.generateWithCallbackWithArg(
+    text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+  let filename = "test-kokoro-zh-en.wav"
+  let ok = audio.save(filename: filename)
+  if ok == 1 {
+    print("\nSaved to:\(filename)")
+  } else {
+    print("Failed to save to \(filename)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/tts-matcha-en.swift b/swift-api-examples/tts-matcha-en.swift
new file mode 100644
index 0000000000..ec55f72d25
--- /dev/null
+++ b/swift-api-examples/tts-matcha-en.swift
@@ -0,0 +1,65 @@
+class MyClass {
+  func playSamples(samples: [Float]) {
+    print("Play \(samples.count) samples")
+  }
+}
+
+func run() {
+  let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
+  let vocoder = "./hifigan_v2.onnx"
+  let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
+  let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
+  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
+    acousticModel: acousticModel,
+    vocoder: vocoder,
+    tokens: tokens,
+    dataDir: dataDir
+  )
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
+  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  let myClass = MyClass()
+
+  // We use Unretained here so myClass must be kept alive as the callback is invoked
+  //
+  // See also
+  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+
+  let callback: TtsCallbackWithArg = { samples, n, arg in
+    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+    var savedSamples: [Float] = []
+    for index in 0..<n {
+      savedSamples.append(samples![Int(index)])
+    }
+
+    o.playSamples(samples: savedSamples)
+
+    // return 1 so that it continues generating
+    return 1
+  }
+
+  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+
+  let text =
+    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+  let sid = 0
+  let speed: Float = 1.0
+
+  let audio = tts.generateWithCallbackWithArg(
+    text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+  let filename = "test-matcha-en.wav"
+  let ok = audio.save(filename: filename)
+  if ok == 1 {
+    print("\nSaved to:\(filename)")
+  } else {
+    print("Failed to save to \(filename)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/tts-matcha-zh.swift b/swift-api-examples/tts-matcha-zh.swift
new file mode 100644
index 0000000000..0b782f58c2
--- /dev/null
+++ b/swift-api-examples/tts-matcha-zh.swift
@@ -0,0 +1,68 @@
+class MyClass {
+  func playSamples(samples: [Float]) {
+    print("Play \(samples.count) samples")
+  }
+}
+
+func run() {
+  let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
+  let vocoder = "./hifigan_v2.onnx"
+  let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
+  let tokens = "./matcha-icefall-zh-baker/tokens.txt"
+  let dictDir = "./matcha-icefall-zh-baker/dict"
+  let ruleFsts =
+    "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
+  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
+    acousticModel: acousticModel,
+    vocoder: vocoder,
+    lexicon: lexicon,
+    tokens: tokens,
+    dictDir: dictDir
+  )
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
+  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)
+
+  let myClass = MyClass()
+
+  // We use Unretained here so myClass must be kept alive as the callback is invoked
+  //
+  // See also
+  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+
+  let callback: TtsCallbackWithArg = { samples, n, arg in
+    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+    var savedSamples: [Float] = []
+    for index in 0..<n {
+      savedSamples.append(samples![Int(index)])
+    }
+
+    o.playSamples(samples: savedSamples)
+
+    // return 1 so that it continues generating
+    return 1
+  }
+
+  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+
+  let text = "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+  let sid = 0
+  let speed: Float = 1.0
+
+  let audio = tts.generateWithCallbackWithArg(
+    text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+  let filename = "test-matcha-zh.wav"
+  let ok = audio.save(filename: filename)
+  if ok == 1 {
+    print("\nSaved to:\(filename)")
+  } else {
+    print("Failed to save to \(filename)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
diff --git a/swift-api-examples/tts.swift b/swift-api-examples/tts-vits.swift
similarity index 98%
rename from swift-api-examples/tts.swift
rename to swift-api-examples/tts-vits.swift
index fc8cc787cc..a8a13fcbc8 100644
--- a/swift-api-examples/tts.swift
+++ b/swift-api-examples/tts-vits.swift
@@ -47,7 +47,7 @@ func run() {
 
   let audio = tts.generateWithCallbackWithArg(
     text: text, callback: callback, arg: arg, sid: sid, speed: speed)
-  let filename = "test.wav"
+  let filename = "test-vits-en.wav"
   let ok = audio.save(filename: filename)
   if ok == 1 {
     print("\nSaved to:\(filename)")
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt
index b143e57b86..7dd6ce7b5f 100644
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -18,6 +18,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
   add_subdirectory(vad-asr)
 endif()
 
+if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
+  add_subdirectory(speaker-diarization)
+endif()
+
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
   add_subdirectory(nodejs)
 endif()
diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js
index 9b966090cd..f0c34c97ff 100644
--- a/wasm/asr/sherpa-onnx-asr.js
+++ b/wasm/asr/sherpa-onnx-asr.js
@@ -35,6 +35,10 @@ function freeConfig(config, Module) {
     freeConfig(config.whisper, Module)
   }
 
+  if ('moonshine' in config) {
+    freeConfig(config.moonshine, Module)
+  }
+
   if ('tdnn' in config) {
     freeConfig(config.tdnn, Module)
   }
@@ -563,7 +567,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
   const n = encoderLen + decoderLen + languageLen + taskLen;
   const buffer = Module._malloc(n);
 
-  const len = 5 * 4;  // 4 pointers
+  const len = 5 * 4;  // 4 pointers + 1 int32
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -598,6 +602,55 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
   }
 }
 
+function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
+  const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
+  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
+  const uncachedDecoderLen =
+      Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1;
+  const cachedDecoderLen =
+      Module.lengthBytesUTF8(config.cachedDecoder || '') + 1;
+
+  const n =
+      preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen;
+  const buffer = Module._malloc(n);
+
+  const len = 4 * 4;  // 4 pointers
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.stringToUTF8(
+      config.preprocessor || '', buffer + offset, preprocessorLen);
+  offset += preprocessorLen;
+
+  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
+  offset += encoderLen;
+
+  Module.stringToUTF8(
+      config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen);
+  offset += uncachedDecoderLen;
+
+  Module.stringToUTF8(
+      config.cachedDecoder || '', buffer + offset, cachedDecoderLen);
+  offset += cachedDecoderLen;
+
+  offset = 0;
+  Module.setValue(ptr, buffer + offset, 'i8*');
+  offset += preprocessorLen;
+
+  Module.setValue(ptr + 4, buffer + offset, 'i8*');
+  offset += encoderLen;
+
+  Module.setValue(ptr + 8, buffer + offset, 'i8*');
+  offset += uncachedDecoderLen;
+
+  Module.setValue(ptr + 12, buffer + offset, 'i8*');
+  offset += cachedDecoderLen;
+
+  return {
+    buffer: buffer, ptr: ptr, len: len,
+  }
+}
+
 function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
   const n = Module.lengthBytesUTF8(config.model || '') + 1;
   const buffer = Module._malloc(n);
@@ -693,6 +746,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
     };
   }
 
+  if (!('moonshine' in config)) {
+    config.moonshine = {
+      preprocessor: '',
+      encoder: '',
+      uncachedDecoder: '',
+      cachedDecoder: '',
+    };
+  }
+
   if (!('tdnn' in config)) {
     config.tdnn = {
       model: '',
@@ -724,8 +786,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
   const senseVoice =
       initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);
 
+  const moonshine =
+      initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
+
   const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
-      tdnn.len + 8 * 4 + senseVoice.len;
+      tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
 
   const ptr = Module._malloc(len);
 
@@ -745,7 +810,6 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
   Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
   offset += tdnn.len;
 
-
   const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
   const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
   const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
@@ -817,11 +881,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
   offset += 4;
 
   Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
+  offset += senseVoice.len;
+
+  Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
 
   return {
     buffer: buffer, ptr: ptr, len: len, transducer: transducer,
         paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
-        senseVoice: senseVoice,
+        senseVoice: senseVoice, moonshine: moonshine,
   }
 }
 
diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt
index 197aa38b1c..5620b80db2 100644
--- a/wasm/kws/CMakeLists.txt
+++ b/wasm/kws/CMakeLists.txt
@@ -17,6 +17,7 @@ set(exported_functions
   SherpaOnnxIsKeywordStreamReady
   SherpaOnnxOnlineStreamAcceptWaveform
   SherpaOnnxOnlineStreamInputFinished
+  SherpaOnnxResetKeywordStream
 )
 set(mangled_exported_functions)
 foreach(x IN LISTS exported_functions)
diff --git a/wasm/kws/app.js b/wasm/kws/app.js
index 1e97262a10..6df20d23be 100644
--- a/wasm/kws/app.js
+++ b/wasm/kws/app.js
@@ -102,15 +102,17 @@ if (navigator.mediaDevices.getUserMedia) {
       recognizer_stream.acceptWaveform(expectedSampleRate, samples);
       while (recognizer.isReady(recognizer_stream)) {
         recognizer.decode(recognizer_stream);
-      }
 
+        let result = recognizer.getResult(recognizer_stream);
 
-      let result = recognizer.getResult(recognizer_stream);
+        if (result.keyword.length > 0) {
+          console.log(result)
+          lastResult = result;
+          resultList.push(JSON.stringify(result));
 
-      if (result.keyword.length > 0) {
-        console.log(result)
-        lastResult = result;
-        resultList.push(JSON.stringify(result));
+          // remember to reset the stream right after detecting a keyword
+          recognizer.reset(recognizer_stream);
+        }
       }
 
 
diff --git a/wasm/kws/assets/README.md b/wasm/kws/assets/README.md
index ac67fb5a04..18f792b49c 100644
--- a/wasm/kws/assets/README.md
+++ b/wasm/kws/assets/README.md
@@ -7,21 +7,34 @@ to download a model.
 # Kws
 
 The following is an example:
-```
-cd sherpa-onnx/wasm/kws
-git clone https://www.modelscope.cn/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.git assets
+```bash
+cd sherpa-onnx/wasm/kws/assets
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+
+mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ./
+mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ./
+mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ./
+mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ./
+rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
 ```
 
 You should have the following files in `assets` before you can run
 `build-wasm-simd-kws.sh`
 
-```
-├── decoder-epoch-12-avg-2-chunk-16-left-64.onnx
-├── encoder-epoch-12-avg-2-chunk-16-left-64.onnx
-├── joiner-epoch-12-avg-2-chunk-16-left-64.onnx
-├── keywords_raw.txt
-├── keywords.txt
-├── README.md
-└── tokens.txt
+```bash
+fangjuns-MacBook-Pro:assets fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/wasm/kws/assets
 
+fangjuns-MacBook-Pro:assets fangjun$ ls -lh
+total 25616
+-rw-r--r--  1 fangjun  staff   692B Oct 29 16:53 README.md
+-rw-r--r--  1 fangjun  staff   660K Aug 14 15:21 decoder-epoch-12-avg-2-chunk-16-left-64.onnx
+-rw-r--r--  1 fangjun  staff    12M Aug 14 15:21 encoder-epoch-12-avg-2-chunk-16-left-64.onnx
+-rw-r--r--  1 fangjun  staff   247K Aug 14 15:21 joiner-epoch-12-avg-2-chunk-16-left-64.onnx
+-rw-r--r--  1 fangjun  staff   1.6K Aug 14 15:08 tokens.txt
 ```
+
+**Hint**: Remember to remove extra files from ``assets``. For instance, please remember to remove
+the file `sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2`.
diff --git a/wasm/kws/sherpa-onnx-kws.js b/wasm/kws/sherpa-onnx-kws.js
index b7c0233566..57c81e096a 100644
--- a/wasm/kws/sherpa-onnx-kws.js
+++ b/wasm/kws/sherpa-onnx-kws.js
@@ -296,8 +296,11 @@ class Kws {
   }
 
   decode(stream) {
-    return this.Module._SherpaOnnxDecodeKeywordStream(
-        this.handle, stream.handle);
+    this.Module._SherpaOnnxDecodeKeywordStream(this.handle, stream.handle);
+  }
+
+  reset(stream) {
+    this.Module._SherpaOnnxResetKeywordStream(this.handle, stream.handle);
   }
 
   getResult(stream) {
diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt
index 4efc879a15..dc8d8c8549 100644
--- a/wasm/nodejs/CMakeLists.txt
+++ b/wasm/nodejs/CMakeLists.txt
@@ -70,6 +70,17 @@ set(exported_functions
   SherpaOnnxDestroySpeechSegment
   SherpaOnnxVoiceActivityDetectorReset
   SherpaOnnxVoiceActivityDetectorFlush
+  # Speaker diarization
+  SherpaOnnxCreateOfflineSpeakerDiarization
+  SherpaOnnxDestroyOfflineSpeakerDiarization
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment
+  SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
+  SherpaOnnxOfflineSpeakerDiarizationProcess
+  SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
+  SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
+  SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+  SherpaOnnxOfflineSpeakerDiarizationSetConfig
   #
   SherpaOnnxFileExists
   SherpaOnnxReadWave
@@ -109,6 +120,7 @@ install(
   ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
   ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
   ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
+  ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
   ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
     "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
diff --git a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
index 43c9d42cb3..ff8cd49394 100644
--- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
+++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
@@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
 
 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
+static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
 static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
@@ -25,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
                       sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
                       sizeof(SherpaOnnxOfflineWhisperModelConfig) +
                       sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
-                      sizeof(SherpaOnnxOfflineSenseVoiceModelConfig),
+                      sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
+                      sizeof(SherpaOnnxOfflineMoonshineModelConfig),
               "");
 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
@@ -66,6 +68,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   auto whisper = &model_config->whisper;
   auto tdnn = &model_config->tdnn;
   auto sense_voice = &model_config->sense_voice;
+  auto moonshine = &model_config->moonshine;
 
   fprintf(stdout, "----------offline transducer model config----------\n");
   fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -93,6 +96,12 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   fprintf(stdout, "language: %s\n", sense_voice->language);
   fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);
 
+  fprintf(stdout, "----------offline moonshine model config----------\n");
+  fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor);
+  fprintf(stdout, "encoder: %s\n", moonshine->encoder);
+  fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
+  fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
+
   fprintf(stdout, "tokens: %s\n", model_config->tokens);
   fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
   fprintf(stdout, "provider: %s\n", model_config->provider);
diff --git a/wasm/speaker-diarization/CMakeLists.txt b/wasm/speaker-diarization/CMakeLists.txt
new file mode 100644
index 0000000000..71af018ac5
--- /dev/null
+++ b/wasm/speaker-diarization/CMakeLists.txt
@@ -0,0 +1,61 @@
+if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+  message(FATAL_ERROR "Please use ./build-wasm-simd-speaker-diarization.sh to build for WASM for speaker diarization")
+endif()
+
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/segmentation.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/embedding.onnx")
+  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+endif()
+
+set(exported_functions
+  MyPrint
+  SherpaOnnxCreateOfflineSpeakerDiarization
+  SherpaOnnxDestroyOfflineSpeakerDiarization
+  SherpaOnnxOfflineSpeakerDiarizationDestroyResult
+  SherpaOnnxOfflineSpeakerDiarizationDestroySegment
+  SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
+  SherpaOnnxOfflineSpeakerDiarizationProcess
+  SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
+  SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
+  SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
+  SherpaOnnxOfflineSpeakerDiarizationSetConfig
+)
+set(mangled_exported_functions)
+foreach(x IN LISTS exported_functions)
+  list(APPEND mangled_exported_functions "_${x}")
+endforeach()
+list(JOIN mangled_exported_functions "," all_exported_functions)
+
+
+include_directories(${CMAKE_SOURCE_DIR})
+set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
+string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+
+message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+
+if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+  message(FATAL_ERROR "The default suffix for building executables should be .js!")
+endif()
+# set(CMAKE_EXECUTABLE_SUFFIX ".html")
+
+add_executable(sherpa-onnx-wasm-main-speaker-diarization sherpa-onnx-wasm-main-speaker-diarization.cc)
+target_link_libraries(sherpa-onnx-wasm-main-speaker-diarization sherpa-onnx-c-api)
+install(TARGETS sherpa-onnx-wasm-main-speaker-diarization DESTINATION bin/wasm/speaker-diarization)
+
+install(
+  FILES
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.js"
+    "index.html"
+    "sherpa-onnx-speaker-diarization.js"
+    "app-speaker-diarization.js"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.wasm"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.data"
+  DESTINATION
+    bin/wasm/speaker-diarization
+)
diff --git a/wasm/speaker-diarization/app-speaker-diarization.js b/wasm/speaker-diarization/app-speaker-diarization.js
new file mode 100644
index 0000000000..cb757fcfd3
--- /dev/null
+++ b/wasm/speaker-diarization/app-speaker-diarization.js
@@ -0,0 +1,124 @@
+const startBtn = document.getElementById('startBtn');
+const hint = document.getElementById('hint');
+const numClustersInput = document.getElementById('numClustersInputID');
+const thresholdInput = document.getElementById('thresholdInputID');
+const textArea = document.getElementById('text');
+
+const fileSelectCtrl = document.getElementById('file');
+
+let sd = null;
+let float32Samples = null;
+
+Module = {};
+Module.onRuntimeInitialized = function() {
+  console.log('Model files downloaded!');
+
+  console.log('Initializing speaker diarization ......');
+  sd = createOfflineSpeakerDiarization(Module)
+  console.log('sampleRate', sd.sampleRate);
+
+  hint.innerText =
+      'Initialized! Please select a wave file and click the Start button.';
+
+  fileSelectCtrl.disabled = false;
+};
+
+function onFileChange() {
+  var files = document.getElementById('file').files;
+
+  if (files.length == 0) {
+    console.log('No file selected');
+    float32Samples = null;
+    startBtn.disabled = true;
+    return;
+  }
+  textArea.value = '';
+
+  console.log('files: ' + files);
+
+  const file = files[0];
+  console.log(file);
+  console.log('file.name ' + file.name);
+  console.log('file.type ' + file.type);
+  console.log('file.size ' + file.size);
+
+  let audioCtx = new AudioContext({sampleRate: sd.sampleRate});
+
+  let reader = new FileReader();
+  reader.onload = function() {
+    console.log('reading file!');
+    audioCtx.decodeAudioData(reader.result, decodedDone);
+  };
+
+  function decodedDone(decoded) {
+    let typedArray = new Float32Array(decoded.length);
+    float32Samples = decoded.getChannelData(0);
+
+    startBtn.disabled = false;
+  }
+
+  reader.readAsArrayBuffer(file);
+}
+
+startBtn.onclick = function() {
+  textArea.value = '';
+  if (float32Samples == null) {
+    alert('Empty audio samples!');
+
+    startBtn.disabled = true;
+    return;
+  }
+
+  let numClusters = numClustersInput.value;
+  if (numClusters.trim().length == 0) {
+    alert(
+        'Please provide numClusters. Use -1 if you are not sure how many speakers are there');
+    return;
+  }
+
+  if (!numClusters.match(/^\d+$/)) {
+    alert(`number of clusters ${
+        numClusters} is not an integer .\nPlease enter an integer`);
+    return;
+  }
+  numClusters = parseInt(numClusters, 10);
+  if (numClusters < -1) {
+    alert(`Number of clusters should be >= -1`);
+    return;
+  }
+
+  let threshold = 0.5;
+  if (numClusters <= 0) {
+    threshold = thresholdInput.value;
+    if (threshold.trim().length == 0) {
+      alert('Please provide a threshold.');
+      return;
+    }
+
+    threshold = parseFloat(threshold);
+    if (threshold < 0) {
+      alert(`Pleaser enter a positive threshold`);
+      return;
+    }
+  }
+
+  let config = sd.config
+  config.clustering = {numClusters: numClusters, threshold: threshold};
+  sd.setConfig(config);
+  let segments = sd.process(float32Samples);
+  if (segments == null) {
+    textArea.value = 'No speakers detected';
+    return
+  }
+
+  let s = '';
+  let sep = '';
+
+  for (seg of segments) {
+    // clang-format off
+    s += sep + `${seg.start.toFixed(2)} -- ${seg.end.toFixed(2)} speaker_${seg.speaker}`
+    // clang-format on
+    sep = '\n';
+  }
+  textArea.value = s;
+}
diff --git a/wasm/speaker-diarization/assets/README.md b/wasm/speaker-diarization/assets/README.md
new file mode 100644
index 0000000000..f09a5899d7
--- /dev/null
+++ b/wasm/speaker-diarization/assets/README.md
@@ -0,0 +1,26 @@
+# Introduction
+
+Please refer to
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
+to download a speaker segmentation model
+and
+refer to
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+to download a speaker embedding extraction model.
+
+Remember to rename the downloaded files.
+
+The following is an example.
+
+```bash
+cd wasm/speaker-diarization/assets/
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+cp sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx
+rm -rf sherpa-onnx-pyannote-segmentation-3-0
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx
+```
diff --git a/wasm/speaker-diarization/index.html b/wasm/speaker-diarization/index.html
new file mode 100644
index 0000000000..55de8bd3b4
--- /dev/null
+++ b/wasm/speaker-diarization/index.html
@@ -0,0 +1,48 @@
+<html lang="en">
+
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width" />
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Speaker Diarization</title>
+  <style>
+    h1,div {
+      text-align: center;
+    }
+    textarea {
+      width:100%;
+    }
+  </style>
+</head>
+
+<body>
+  <h1>
+    Next-gen Kaldi + WebAssembly<br/>
+    Speaker Diarization <br> with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
+  </h1>
+  <div>
+    <span id="hint">Loading model ... ...</span>
+    <br/>
+    <br/>
+    <label for="avatar">Choose a wav file:</label>
+    <input type="file" id="file" accept=".wav" onchange="onFileChange()" disabled></input>
+    <br/>
+    <br/>
+    <label for="numClusters" id="numClustersID">Number of speakers: </label>
+    <input type="text" id="numClustersInputID" name="numClusters" value="-1" />
+    <br/>
+    <br/>
+    <label for="clusteringThreshold" id="thresholdID">Clustering threshold: </label>
+    <input type="text" id="thresholdInputID" name="clusteringThreshold" value="0.5" />
+    <br/>
+    <br/>
+
+    <textarea id="text" rows="10" placeholder="If you know the actual number of speakers in the input wave file, please provide it via Number of speakers. Otherwise, please leave Number of speakers to -1 and provide Clustering threshold instead. A larger threshold leads to fewer clusters, i.e., fewer speakers; a smaller threshold leads to more clusters, i.e., more speakers."></textarea>
+    <br/>
+    <br/>
+    <button id="startBtn" disabled>Start</button>
+  </div>
+
+  <script src="app-speaker-diarization.js"></script>
+  <script src="sherpa-onnx-speaker-diarization.js"></script>
+  <script src="sherpa-onnx-wasm-main-speaker-diarization.js"></script>
+</body>
diff --git a/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
new file mode 100644
index 0000000000..7410134808
--- /dev/null
+++ b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
@@ -0,0 +1,299 @@
+
+function freeConfig(config, Module) {
+  if ('buffer' in config) {
+    Module._free(config.buffer);
+  }
+
+  if ('config' in config) {
+    freeConfig(config.config, Module)
+  }
+
+  if ('segmentation' in config) {
+    freeConfig(config.segmentation, Module)
+  }
+
+  if ('embedding' in config) {
+    freeConfig(config.embedding, Module)
+  }
+
+  if ('clustering' in config) {
+    freeConfig(config.clustering, Module)
+  }
+
+  Module._free(config.ptr);
+}
+
+function initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(
+    config, Module) {
+  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
+  const n = modelLen;
+  const buffer = Module._malloc(n);
+
+  const len = 1 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
+  offset += modelLen;
+
+  offset = 0;
+  Module.setValue(ptr, buffer + offset, 'i8*');
+
+  return {
+    buffer: buffer, ptr: ptr, len: len,
+  }
+}
+
+function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) {
+  if (!('pyannote' in config)) {
+    config.pyannote = {
+      model: '',
+    };
+  }
+
+  const pyannote = initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(
+      config.pyannote, Module);
+
+  const len = pyannote.len + 3 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module._CopyHeap(pyannote.ptr, pyannote.len, ptr + offset);
+  offset += pyannote.len;
+
+  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.debug || 0, 'i32');
+  offset += 4;
+
+  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
+  const buffer = Module._malloc(providerLen);
+  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
+  Module.setValue(ptr + offset, buffer, 'i8*');
+
+  return {
+    buffer: buffer,
+    ptr: ptr,
+    len: len,
+    config: pyannote,
+  };
+}
+
+function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) {
+  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
+  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
+  const n = modelLen + providerLen;
+  const buffer = Module._malloc(n);
+
+  const len = 4 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
+  offset += modelLen;
+
+  Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
+  offset += providerLen;
+
+  offset = 0
+  Module.setValue(ptr + offset, buffer, 'i8*');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.debug || 0, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, buffer + modelLen, 'i8*');
+  offset += 4;
+
+  return {
+    buffer: buffer,
+    ptr: ptr,
+    len: len,
+  };
+}
+
+function initSherpaOnnxFastClusteringConfig(config, Module) {
+  const len = 2 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.setValue(ptr + offset, config.numClusters || -1, 'i32');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
+  offset += 4;
+
+  return {
+    ptr: ptr,
+    len: len,
+  };
+}
+
+function initSherpaOnnxOfflineSpeakerDiarizationConfig(config, Module) {
+  if (!('segmentation' in config)) {
+    config.segmentation = {
+      pyannote: {model: ''},
+      numThreads: 1,
+      debug: 0,
+      provider: 'cpu',
+    };
+  }
+
+  if (!('embedding' in config)) {
+    config.embedding = {
+      model: '',
+      numThreads: 1,
+      debug: 0,
+      provider: 'cpu',
+    };
+  }
+
+  if (!('clustering' in config)) {
+    config.clustering = {
+      numClusters: -1,
+      threshold: 0.5,
+    };
+  }
+
+  const segmentation = initSherpaOnnxOfflineSpeakerSegmentationModelConfig(
+      config.segmentation, Module);
+
+  const embedding =
+      initSherpaOnnxSpeakerEmbeddingExtractorConfig(config.embedding, Module);
+
+  const clustering =
+      initSherpaOnnxFastClusteringConfig(config.clustering, Module);
+
+  const len = segmentation.len + embedding.len + clustering.len + 2 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module._CopyHeap(segmentation.ptr, segmentation.len, ptr + offset);
+  offset += segmentation.len;
+
+  Module._CopyHeap(embedding.ptr, embedding.len, ptr + offset);
+  offset += embedding.len;
+
+  Module._CopyHeap(clustering.ptr, clustering.len, ptr + offset);
+  offset += clustering.len;
+
+  Module.setValue(ptr + offset, config.minDurationOn || 0.2, 'float');
+  offset += 4;
+
+  Module.setValue(ptr + offset, config.minDurationOff || 0.5, 'float');
+  offset += 4;
+
+  return {
+    ptr: ptr, len: len, segmentation: segmentation, embedding: embedding,
+        clustering: clustering,
+  }
+}
+
+class OfflineSpeakerDiarization {
+  constructor(configObj, Module) {
+    const config =
+        initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, Module)
+    // Module._MyPrint(config.ptr);
+
+    const handle =
+        Module._SherpaOnnxCreateOfflineSpeakerDiarization(config.ptr);
+
+    freeConfig(config, Module);
+
+    this.handle = handle;
+    this.sampleRate =
+        Module._SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(this.handle);
+    this.Module = Module
+
+                  this.config = configObj;
+  }
+
+  free() {
+    this.Module._SherpaOnnxDestroyOfflineSpeakerDiarization(this.handle);
+    this.handle = 0
+  }
+
+  setConfig(configObj) {
+    if (!('clustering' in configObj)) {
+      return;
+    }
+
+    const config =
+        initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, this.Module);
+
+    this.Module._SherpaOnnxOfflineSpeakerDiarizationSetConfig(
+        this.handle, config.ptr);
+
+    freeConfig(config, Module);
+
+    this.config.clustering = configObj.clustering;
+  }
+
+  process(samples) {
+    const pointer =
+        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
+    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
+
+    let r = this.Module._SherpaOnnxOfflineSpeakerDiarizationProcess(
+        this.handle, pointer, samples.length);
+    this.Module._free(pointer);
+
+    let numSegments =
+        this.Module._SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);
+
+    let segments =
+        this.Module._SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
+            r);
+
+    let ans = [];
+
+    let sizeOfSegment = 3 * 4;
+    for (let i = 0; i < numSegments; ++i) {
+      let p = segments + i * sizeOfSegment
+
+      let start = this.Module.HEAPF32[p / 4 + 0];
+      let end = this.Module.HEAPF32[p / 4 + 1];
+      let speaker = this.Module.HEAP32[p / 4 + 2];
+
+      ans.push({start: start, end: end, speaker: speaker});
+    }
+
+    this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
+    this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r);
+
+    return ans;
+  }
+}
+
+function createOfflineSpeakerDiarization(Module, myConfig) {
+  let config = {
+    segmentation: {
+      pyannote: {model: './segmentation.onnx'},
+      debug: 1,
+    },
+    embedding: {
+      model: './embedding.onnx',
+      debug: 1,
+    },
+    clustering: {numClusters: -1, threshold: 0.5},
+    minDurationOn: 0.3,
+    minDurationOff: 0.5,
+  };
+
+  if (myConfig) {
+    config = myConfig;
+  }
+
+  return new OfflineSpeakerDiarization(config, Module);
+}
+
+if (typeof process == 'object' && typeof process.versions == 'object' &&
+    typeof process.versions.node == 'string') {
+  module.exports = {
+    createOfflineSpeakerDiarization,
+  };
+}
diff --git a/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc
new file mode 100644
index 0000000000..6e83f61d8b
--- /dev/null
+++ b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc
@@ -0,0 +1,63 @@
+// wasm/sherpa-onnx-wasm-main-speaker-diarization.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <stdio.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+// see also
+// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+
+extern "C" {
+
+static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) ==
+                  1 * 4,
+              "");
+
+static_assert(
+    sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) ==
+        sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4,
+    "");
+
+static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, "");
+
+static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, "");
+
+static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) ==
+                  sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) +
+                      sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) +
+                      sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4,
+              "");
+
+void MyPrint(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) {
+  const auto &segmentation = sd_config->segmentation;
+  const auto &embedding = sd_config->embedding;
+  const auto &clustering = sd_config->clustering;
+
+  fprintf(stdout, "----------segmentation config----------\n");
+  fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model);
+  fprintf(stdout, "num threads: %d\n", segmentation.num_threads);
+  fprintf(stdout, "debug: %d\n", segmentation.debug);
+  fprintf(stdout, "provider: %s\n", segmentation.provider);
+
+  fprintf(stdout, "----------embedding config----------\n");
+  fprintf(stdout, "model: %s\n", embedding.model);
+  fprintf(stdout, "num threads: %d\n", embedding.num_threads);
+  fprintf(stdout, "debug: %d\n", embedding.debug);
+  fprintf(stdout, "provider: %s\n", embedding.provider);
+
+  fprintf(stdout, "----------clustering config----------\n");
+  fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters);
+  fprintf(stdout, "threshold: %.3f\n", clustering.threshold);
+
+  fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on);
+  fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off);
+}
+
+void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+  std::copy(src, src + num_bytes, dst);
+}
+}
diff --git a/wasm/tts/sherpa-onnx-tts.js b/wasm/tts/sherpa-onnx-tts.js
index 4d68b854f4..e08c9e8d81 100644
--- a/wasm/tts/sherpa-onnx-tts.js
+++ b/wasm/tts/sherpa-onnx-tts.js
@@ -8,6 +8,14 @@ function freeConfig(config, Module) {
     freeConfig(config.config, Module)
   }
 
+  if ('matcha' in config) {
+    freeConfig(config.matcha, Module)
+  }
+
+  if ('kokoro' in config) {
+    freeConfig(config.kokoro, Module)
+  }
+
   Module._free(config.ptr);
 }
 
@@ -66,11 +74,181 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
   }
 }
 
+function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) {
+  const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel) + 1;
+  const vocoderLen = Module.lengthBytesUTF8(config.vocoder) + 1;
+  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
+  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
+  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
+  const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
+
+  const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen +
+      dataDirLen + dictDirLen;
+
+  const buffer = Module._malloc(n);
+
+  const len = 8 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.stringToUTF8(
+      config.acousticModel || '', buffer + offset, acousticModelLen);
+  offset += acousticModelLen;
+
+  Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
+  offset += vocoderLen;
+
+  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
+  offset += lexiconLen;
+
+  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
+  offset += tokensLen;
+
+  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
+  offset += dataDirLen;
+
+  Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
+  offset += dictDirLen;
+
+  offset = 0;
+  Module.setValue(ptr, buffer + offset, 'i8*');
+  offset += acousticModelLen;
+
+  Module.setValue(ptr + 4, buffer + offset, 'i8*');
+  offset += vocoderLen;
+
+  Module.setValue(ptr + 8, buffer + offset, 'i8*');
+  offset += lexiconLen;
+
+  Module.setValue(ptr + 12, buffer + offset, 'i8*');
+  offset += tokensLen;
+
+  Module.setValue(ptr + 16, buffer + offset, 'i8*');
+  offset += dataDirLen;
+
+  Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float');
+  Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float');
+  Module.setValue(ptr + 28, buffer + offset, 'i8*');
+  offset += dictDirLen;
+
+  return {
+    buffer: buffer, ptr: ptr, len: len,
+  }
+}
+
+function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
+  const modelLen = Module.lengthBytesUTF8(config.model) + 1;
+  const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
+  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
+  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
+  const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
+  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
+
+  const n =
+      modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen;
+
+  const buffer = Module._malloc(n);
+
+  const len = 7 * 4;
+  const ptr = Module._malloc(len);
+
+  let offset = 0;
+  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
+  offset += modelLen;
+
+  Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen);
+  offset += voicesLen;
+
+  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
+  offset += tokensLen;
+
+  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
+  offset += dataDirLen;
+
+  Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
+  offset += dictDirLen;
+
+  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
+  offset += lexiconLen;
+
+  offset = 0;
+  Module.setValue(ptr, buffer + offset, 'i8*');
+  offset += modelLen;
+
+  Module.setValue(ptr + 4, buffer + offset, 'i8*');
+  offset += voicesLen;
+
+  Module.setValue(ptr + 8, buffer + offset, 'i8*');
+  offset += tokensLen;
+
+  Module.setValue(ptr + 12, buffer + offset, 'i8*');
+  offset += dataDirLen;
+
+  Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float');
+
+  Module.setValue(ptr + 20, buffer + offset, 'i8*');
+  offset += dictDirLen;
+
+  Module.setValue(ptr + 24, buffer + offset, 'i8*');
+  offset += lexiconLen;
+
+  return {
+    buffer: buffer, ptr: ptr, len: len,
+  }
+}
+
 function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
+  if (!('offlineTtsVitsModelConfig' in config)) {
+    config.offlineTtsVitsModelConfig = {
+      model: '',
+      lexicon: '',
+      tokens: '',
+      noiseScale: 0.667,
+      noiseScaleW: 0.8,
+      lengthScale: 1.0,
+      dataDir: '',
+      dictDir: '',
+    };
+  }
+
+  if (!('offlineTtsMatchaModelConfig' in config)) {
+    config.offlineTtsMatchaModelConfig = {
+      acousticModel: '',
+      vocoder: '',
+      lexicon: '',
+      tokens: '',
+      noiseScale: 0.667,
+      lengthScale: 1.0,
+      dataDir: '',
+      dictDir: '',
+    };
+  }
+
+  if (!('offlineTtsKokoroModelConfig' in config)) {
+    config.offlineTtsKokoroModelConfig = {
+      model: '',
+      voices: '',
+      tokens: '',
+      lengthScale: 1.0,
+      dataDir: '',
+      dictDir: '',
+      lexicon: '',
+    };
+  }
+
+
   const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
       config.offlineTtsVitsModelConfig, Module);
 
-  const len = vitsModelConfig.len + 3 * 4;
+  const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig(
+      config.offlineTtsMatchaModelConfig, Module);
+
+  const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig(
+      config.offlineTtsKokoroModelConfig, Module);
+
+  const len = vitsModelConfig.len + matchaModelConfig.len +
+      kokoroModelConfig.len + 3 * 4;
+
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -87,9 +265,17 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
   const buffer = Module._malloc(providerLen);
   Module.stringToUTF8(config.provider, buffer, providerLen);
   Module.setValue(ptr + offset, buffer, 'i8*');
+  offset += 4;
+
+  Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset);
+  offset += matchaModelConfig.len;
+
+  Module._CopyHeap(kokoroModelConfig.ptr, kokoroModelConfig.len, ptr + offset);
+  offset += kokoroModelConfig.len;
 
   return {
     buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
+        matcha: matchaModelConfig, kokoro: kokoroModelConfig,
   }
 }
 
@@ -195,12 +381,37 @@ function createOfflineTts(Module, myConfig) {
     noiseScaleW: 0.8,
     lengthScale: 1.0,
   };
+
+  const offlineTtsMatchaModelConfig = {
+    acousticModel: '',
+    vocoder: '',
+    lexicon: '',
+    tokens: '',
+    dataDir: '',
+    dictDir: '',
+    noiseScale: 0.667,
+    lengthScale: 1.0,
+  };
+
+  const offlineTtsKokoroModelConfig = {
+    model: '',
+    voices: '',
+    tokens: '',
+    dataDir: '',
+    lengthScale: 1.0,
+    dictDir: '',
+    lexicon: '',
+  };
+
   const offlineTtsModelConfig = {
     offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
+    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
     numThreads: 1,
     debug: 1,
     provider: 'cpu',
   };
+
   let offlineTtsConfig = {
     offlineTtsModelConfig: offlineTtsModelConfig,
     ruleFsts: '',
diff --git a/wasm/tts/sherpa-onnx-wasm-main-tts.cc b/wasm/tts/sherpa-onnx-wasm-main-tts.cc
index 872a1c853a..07bf4d429c 100644
--- a/wasm/tts/sherpa-onnx-wasm-main-tts.cc
+++ b/wasm/tts/sherpa-onnx-wasm-main-tts.cc
@@ -14,8 +14,12 @@
 extern "C" {
 
 static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
+static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, "");
+static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
-                  sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
+                  sizeof(SherpaOnnxOfflineTtsVitsModelConfig) +
+                      sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) +
+                      sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4,
               "");
 static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
                   sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4,
@@ -24,6 +28,8 @@ static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
 void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
   auto tts_model_config = &tts_config->model;
   auto vits_model_config = &tts_model_config->vits;
+  auto matcha_model_config = &tts_model_config->matcha;
+  auto kokoro = &tts_model_config->kokoro;
   fprintf(stdout, "----------vits model config----------\n");
   fprintf(stdout, "model: %s\n", vits_model_config->model);
   fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
@@ -34,6 +40,25 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
   fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
   fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);
 
+  fprintf(stdout, "----------matcha model config----------\n");
+  fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model);
+  fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder);
+  fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon);
+  fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens);
+  fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir);
+  fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale);
+  fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale);
+  fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir);
+
+  fprintf(stdout, "----------kokoro model config----------\n");
+  fprintf(stdout, "model: %s\n", kokoro->model);
+  fprintf(stdout, "voices: %s\n", kokoro->voices);
+  fprintf(stdout, "tokens: %s\n", kokoro->tokens);
+  fprintf(stdout, "data_dir: %s\n", kokoro->data_dir);
+  fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale);
+  fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir);
+  fprintf(stdout, "lexicon: %s\n", kokoro->lexicon);
+
   fprintf(stdout, "----------tts model config----------\n");
   fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
   fprintf(stdout, "debug: %d\n", tts_model_config->debug);
diff --git a/wasm/vad-asr/app-vad-asr.js b/wasm/vad-asr/app-vad-asr.js
index 5cb172e644..68b7b7da1f 100644
--- a/wasm/vad-asr/app-vad-asr.js
+++ b/wasm/vad-asr/app-vad-asr.js
@@ -111,6 +111,13 @@ function initOfflineRecognizer() {
     };
   } else if (fileExists('telespeech.onnx')) {
     config.modelConfig.telespeechCtc = './telespeech.onnx';
+  } else if (fileExists('moonshine-preprocessor.onnx')) {
+    config.modelConfig.moonshine = {
+      preprocessor: './moonshine-preprocessor.onnx',
+      encoder: './moonshine-encoder.onnx',
+      uncachedDecoder: './moonshine-uncached-decoder.onnx',
+      cachedDecoder: './moonshine-cached-decoder.onnx'
+    };
   } else {
     console.log('Please specify a model.');
     alert('Please specify a model.');