diff --git a/.github/scripts/test-audio-tagging.sh b/.github/scripts/test-audio-tagging.sh new file mode 100755 index 000000000..57e6663fe --- /dev/null +++ b/.github/scripts/test-audio-tagging.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "EXE is $EXE" +echo "PATH: $PATH" + +which $EXE + +log "------------------------------------------------------------" +log "Run zipformer for audio tagging " +log "------------------------------------------------------------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +repo=sherpa-onnx-zipformer-audio-tagging-2024-04-09 +ls -lh $repo + +for w in 1.wav 2.wav 3.wav 4.wav; do + $EXE \ + --zipformer-model=$repo/model.onnx \ + --labels=$repo/class_labels_indices.csv \ + $repo/test_wavs/$w +done +rm -rf $repo diff --git a/.github/scripts/test-c-api.sh b/.github/scripts/test-c-api.sh index afc66c106..ce2f6350d 100755 --- a/.github/scripts/test-c-api.sh +++ b/.github/scripts/test-c-api.sh @@ -10,8 +10,34 @@ log() { echo "SLID_EXE is $SLID_EXE" echo "SID_EXE is $SID_EXE" +echo "AT_EXE is $AT_EXE" +echo "PUNCT_EXE is $PUNCT_EXE" echo "PATH: $PATH" +log "------------------------------------------------------------" +log "Test adding punctuations " +log "------------------------------------------------------------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +ls -lh +tar xf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +ls -lh sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 +rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +$PUNCT_EXE +rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 + +log "------------------------------------------------------------" +log "Test audio tagging " +log "------------------------------------------------------------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 + +$AT_EXE + +rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09 + log "------------------------------------------------------------" log "Download whisper tiny for spoken language identification " diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index c5c6d5a40..b757781c3 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,10 @@ cd dotnet-examples/ -cd spoken-language-identification +cd streaming-hlg-decoding/ +./run.sh + +cd ../spoken-language-identification ./run.sh cd ../online-decode-files diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index c205d3880..a27214383 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -58,14 +58,20 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 node ./test-online-zipformer2-ctc.js rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +node ./test-online-zipformer2-ctc-hlg.js +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 + # offline tts curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 tar xf vits-piper-en_US-amy-low.tar.bz2 node ./test-offline-tts-en.js -rm vits-piper-en_US-amy-low.tar.bz2 +rm -rf vits-piper-en_US-amy-low* -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 -tar xvf vits-zh-aishell3.tar.bz2 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 +tar xvf vits-icefall-zh-aishell3.tar.bz2 node ./test-offline-tts-zh.js -rm vits-zh-aishell3.tar.bz2 +rm -rf vits-icefall-zh-aishell3* diff --git a/.github/scripts/test-offline-punctuation.sh b/.github/scripts/test-offline-punctuation.sh new file mode 100755 index 000000000..bca0ede08 --- /dev/null +++ b/.github/scripts/test-offline-punctuation.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "EXE is $EXE" +echo "PATH: $PATH" + +which $EXE + +log "------------------------------------------------------------" +log "Download the punctuation model " +log "------------------------------------------------------------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 +ls -lh $repo + +$EXE \ + --debug=1 \ + --ct-transformer=$repo/model.onnx \ + "这是一个测试你好吗How are you我很好thank you are you ok谢谢你" + +$EXE \ + --debug=1 \ + --ct-transformer=$repo/model.onnx \ + "我们都是木头人不会说话不会动" + +$EXE \ + --debug=1 \ + --ct-transformer=$repo/model.onnx \ + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry" + +rm -rf $repo diff --git a/.github/scripts/test-online-ctc.sh b/.github/scripts/test-online-ctc.sh index fa331be6f..7c631dd05 100755 --- a/.github/scripts/test-online-ctc.sh +++ b/.github/scripts/test-online-ctc.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -set -e +set -ex log() { # This function is from espnet @@ -13,6 +13,26 @@ echo "PATH: $PATH" which $EXE +log "------------------------------------------------------------" +log "Run streaming Zipformer2 CTC HLG decoding " +log "------------------------------------------------------------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +repo=$PWD/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 +ls -lh $repo +echo "pwd: $PWD" + +$EXE \ + --zipformer2-ctc-model=$repo/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \ + --ctc-graph=$repo/HLG.fst \ + --tokens=$repo/tokens.txt \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/8k.wav + +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 + log "------------------------------------------------------------" log "Run streaming Zipformer2 CTC " log "------------------------------------------------------------" diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh index b454d5310..fe0f568f0 100755 --- a/.github/scripts/test-python.sh +++ b/.github/scripts/test-python.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -set -e +set -ex log() { # This function is from espnet @@ -8,6 +8,44 @@ log() { echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" } +log "test offline punctuation" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 +ls -lh $repo + +python3 ./python-api-examples/add-punctuation.py + +rm -rf $repo + +log "test audio tagging" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 +rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 + python3 ./python-api-examples/audio-tagging-from-a-file.py +rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09 + + +log "test streaming zipformer2 ctc HLG decoding" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +repo=sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 + +python3 ./python-api-examples/online-zipformer-ctc-hlg-decode-file.py \ + --debug 1 \ + --tokens ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt \ + --graph ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst \ + --model ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \ + ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav + +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 + + mkdir -p /tmp/icefall-models dir=/tmp/icefall-models diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index ec276c416..536c04c47 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -7,6 +7,10 @@ echo "pwd: $PWD" cd swift-api-examples ls -lh +./run-streaming-hlg-decode-file.sh +rm ./streaming-hlg-decode-file +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 + ./run-spoken-language-identification.sh rm -rf sherpa-onnx-whisper* @@ -31,4 +35,5 @@ sed -i.bak '20d' ./decode-file.swift ./run-decode-file-non-streaming.sh + ls -lh diff --git a/.github/workflows/apk-asr.yaml b/.github/workflows/apk-asr.yaml new file mode 100644 index 000000000..3fdb2baac --- /dev/null +++ b/.github/workflows/apk-asr.yaml @@ -0,0 +1,174 @@ +name: apk-asr + +on: + push: + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-asr-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_asr: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for asr ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-asr-apk-script.py --total $total --index $index + + chmod +x build-apk-asr.sh + mv -v ./build-apk-asr.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-asr.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p asr + cp -v ../apks/*.apk ./asr/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-audio-tagging-wearos.yaml b/.github/workflows/apk-audio-tagging-wearos.yaml new file mode 100644 index 000000000..67fb1ea38 --- /dev/null +++ b/.github/workflows/apk-audio-tagging-wearos.yaml @@ -0,0 +1,174 @@ +name: apk-audio-tagging-wearos + +on: + push: + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-audio-tagging-wearos-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_audio_tagging_wearos: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for WearOS ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-audio-tagging-apk-script.py --total $total --index $index + + chmod +x build-apk-audio-tagging-wearos.sh + mv -v ./build-apk-audio-tagging-wearos.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-audio-tagging-wearos.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK for audio tagging after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK for audio tagging after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p audio-tagging-wearos + cp -v ../apks/*.apk ./audio-tagging-wearos/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-audio-tagging.yaml b/.github/workflows/apk-audio-tagging.yaml new file mode 100644 index 000000000..8d18241fe --- /dev/null +++ b/.github/workflows/apk-audio-tagging.yaml @@ -0,0 +1,174 @@ +name: apk-audio-tagging + +on: + push: + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-audio-tagging-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_audio_tagging: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for audio tagging ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-audio-tagging-apk-script.py --total $total --index $index + + chmod +x build-apk-audio-tagging.sh + mv -v ./build-apk-audio-tagging.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-audio-tagging.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK for audio tagging after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK for audio tagging after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p audio-tagging + cp -v ../apks/*.apk ./audio-tagging/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-speaker-identification.yaml b/.github/workflows/apk-speaker-identification.yaml index 7c21ab686..9a4bc2194 100644 --- a/.github/workflows/apk-speaker-identification.yaml +++ b/.github/workflows/apk-speaker-identification.yaml @@ -18,7 +18,7 @@ jobs: apk_speaker_identification: if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' runs-on: ${{ matrix.os }} - name: apk for tts ${{ matrix.index }}/${{ matrix.total }} + name: apk for speaker identification ${{ matrix.index }}/${{ matrix.total }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/apk-spoken-language-identification.yaml b/.github/workflows/apk-spoken-language-identification.yaml new file mode 100644 index 000000000..39e1e1b7f --- /dev/null +++ b/.github/workflows/apk-spoken-language-identification.yaml @@ -0,0 +1,174 @@ +name: apk-slid + +on: + push: + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-slid-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_slid: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for slid ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-slid-apk-script.py --total $total --index $index + + chmod +x build-apk-slid.sh + mv -v ./build-apk-slid.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-slid.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK for slid after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK for slid after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p slid + cp -v ../apks/*.apk ./slid/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml index 76ad9fcf8..762239880 100644 --- a/.github/workflows/arm-linux-gnueabihf.yaml +++ b/.github/workflows/arm-linux-gnueabihf.yaml @@ -173,6 +173,7 @@ jobs: rm -v $dst/lib/libasound.so rm -v $dst/lib/libonnxruntime.so rm -v $dst/lib/libsherpa-onnx-fst.so + rm -v $dst/lib/libsherpa-onnx-fstfar.so fi tree $dst diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml index 17ff53d7e..4ecc7a415 100644 --- a/.github/workflows/build-wheels-aarch64.yaml +++ b/.github/workflows/build-wheels-aarch64.yaml @@ -59,8 +59,27 @@ jobs: run: | ls -lh ./wheelhouse/ + - name: Install patchelf + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + sudo apt-get update -q + sudo apt-get install -q -y patchelf + patchelf --help + + - name: Patch wheels + shell: bash + if: matrix.os == 'ubuntu-latest' + run: | + mkdir ./wheels + sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels + + ls -lh ./wheels/ + rm -rf ./wheelhouse + mv ./wheels ./wheelhouse + - name: Publish to huggingface - if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux2014' + if: (matrix.python-version == 'cp38' || matrix.python-version == 'cp39' ) && matrix.manylinux == 'manylinux2014' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} uses: nick-fields/retry@v3 diff --git a/.github/workflows/export-ced-to-onnx.yaml b/.github/workflows/export-ced-to-onnx.yaml new file mode 100644 index 000000000..506abe513 --- /dev/null +++ b/.github/workflows/export-ced-to-onnx.yaml @@ -0,0 +1,78 @@ +name: export-ced-to-onnx + +on: + workflow_dispatch: + +concurrency: + group: export-ced-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-ced-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export ced + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run + shell: bash + run: | + cd scripts/ced + ./run.sh + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: audio-tagging-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + models=( + tiny + mini + small + base + ) + + for m in ${models[@]}; do + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19 + git clone https://huggingface.co/k2-fsa/$d huggingface + mv -v $d/* huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "first commit" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d main + cd .. + done diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index b1f3fa91b..260b99af5 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -15,6 +15,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -32,6 +34,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -122,7 +126,15 @@ jobs: - uses: actions/upload-artifact@v4 with: name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} - path: build/bin/* + path: install/* + + - name: Test offline punctuation + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-punctuation + + .github/scripts/test-offline-punctuation.sh - name: Test C API shell: bash @@ -130,9 +142,28 @@ jobs: export PATH=$PWD/build/bin:$PATH export SLID_EXE=spoken-language-identification-c-api export SID_EXE=speaker-identification-c-api + export AT_EXE=audio-tagging-c-api + export PUNCT_EXE=add-punctuation-c-api .github/scripts/test-c-api.sh + - name: Test Audio tagging + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-audio-tagging + + .github/scripts/test-audio-tagging.sh + + - name: Test online CTC + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx + + .github/scripts/test-online-ctc.sh + + - name: Test spoken language identification (C++ API) shell: bash run: | @@ -149,13 +180,6 @@ jobs: .github/scripts/test-kws.sh - - name: Test online CTC - shell: bash - run: | - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx - - .github/scripts/test-online-ctc.sh - name: Test offline Whisper if: matrix.build_type != 'Debug' diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index 0d0980619..e70ff11e1 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -15,6 +15,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -31,6 +33,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -103,15 +107,33 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx + - name: Test offline punctuation + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-punctuation + + .github/scripts/test-offline-punctuation.sh + - name: Test C API shell: bash run: | export PATH=$PWD/build/bin:$PATH export SLID_EXE=spoken-language-identification-c-api export SID_EXE=speaker-identification-c-api + export AT_EXE=audio-tagging-c-api + export PUNCT_EXE=add-punctuation-c-api .github/scripts/test-c-api.sh + - name: Test Audio tagging + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-audio-tagging + + .github/scripts/test-audio-tagging.sh + - name: Test spoken language identification (C++ API) shell: bash run: | diff --git a/.github/workflows/riscv64-linux.yaml b/.github/workflows/riscv64-linux.yaml index b1008b514..a5869a4b0 100644 --- a/.github/workflows/riscv64-linux.yaml +++ b/.github/workflows/riscv64-linux.yaml @@ -211,6 +211,7 @@ jobs: rm -fv $dst/lib/libasound.so rm -fv $dst/lib/libonnxruntime.so rm -fv $dst/lib/libsherpa-onnx-fst.so + rm -fv $dst/lib/libsherpa-onnx-fstfar.so fi tree $dst diff --git a/.github/workflows/run-python-test-macos.yaml b/.github/workflows/run-python-test-macos.yaml new file mode 100644 index 000000000..af973ae31 --- /dev/null +++ b/.github/workflows/run-python-test-macos.yaml @@ -0,0 +1,105 @@ +name: run-python-test-macos + +on: + push: + branches: + - master + paths: + - '.github/workflows/run-python-test-macos.yaml' + - '.github/scripts/test-python.sh' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'python-api-examples/**' + pull_request: + branches: + - master + paths: + - '.github/workflows/run-python-test-macos.yaml' + - '.github/scripts/test-python.sh' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'python-api-examples/**' + workflow_dispatch: + +concurrency: + group: run-python-test-macos-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + run-python-test: + name: ${{ matrix.os }} ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + # See https://github.com/actions/runner-images + # macos-14 is for arm64 + # macos-14-large is for x64 + include: + - os: macos-11 + python-version: "3.7" + + - os: macos-12 + python-version: "3.8" + + - os: macos-13 + python-version: "3.9" + - os: macos-13 + python-version: "3.10" + - os: macos-13 + python-version: "3.11" + + - os: macos-14 + python-version: "3.12" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Display OS version + shell: bash + run: | + uname -a + sw_vers + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-python-${{ matrix.python-version }} + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile + + - name: Install sherpa-onnx + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + python3 -m pip install --verbose . + + - name: Test sherpa-onnx + shell: bash + run: | + export OS=${{ matrix.os }} + .github/scripts/test-python.sh + .github/scripts/test-speaker-recognition-python.sh + + - uses: actions/upload-artifact@v4 + with: + name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }} + path: tts diff --git a/.github/workflows/run-python-test.yaml b/.github/workflows/run-python-test.yaml index ec6a10916..c7433e0e9 100644 --- a/.github/workflows/run-python-test.yaml +++ b/.github/workflows/run-python-test.yaml @@ -30,9 +30,6 @@ concurrency: permissions: contents: read -env: - SCCACHE_GHA_ENABLED: "true" - jobs: run-python-test: name: ${{ matrix.os }} ${{ matrix.python-version }} @@ -40,33 +37,37 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10","3.11", "3.12"] - exclude: - - os: macos-latest + include: + - os: ubuntu-20.04 + python-version: "3.7" + - os: ubuntu-20.04 + python-version: "3.8" + - os: ubuntu-20.04 python-version: "3.9" - - os: macos-latest + + - os: ubuntu-22.04 python-version: "3.10" - # - os: windows-latest - # python-version: "3.12" + - os: ubuntu-22.04 + python-version: "3.11" + - os: ubuntu-22.04 + python-version: "3.12" steps: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Display OS version + shell: bash + run: | + uname -a + find "/etc" -maxdepth 1 -type f -name "*version" -exec head -n 100 {} \; + - name: ccache - if: matrix.os != 'windows-latest' uses: hendrikmuhs/ccache-action@v1.2 with: key: ${{ matrix.os }}-python-${{ matrix.python-version }} - - name: Run sccache-cache - if: matrix.os == 'windows-latest' - uses: mozilla-actions/sccache-action@v0.0.4 - with: - version: "v0.7.4" - - name: Setup Python uses: actions/setup-python@v5 with: @@ -76,31 +77,108 @@ jobs: shell: bash run: | python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile + python3 -m pip install wheel twine setuptools - - name: Install sherpa-onnx - if: matrix.os != 'windows-latest' + - name: Install ninja + shell: bash + run: | + sudo apt-get install ninja-build + + - name: Display ninja version + shell: bash + run: | + ninja --version + ninja --help || true + which ninja + + - name: Display site packages dir + shell: bash + run: | + python3 -c 'import site; print(site.getsitepackages())' + p=$(python3 -c 'import site; print(site.getsitepackages())') + echo "p: $p" + + - name: Install patchelf + shell: bash + run: | + sudo apt-get update -q + sudo apt-get install -q -y patchelf + patchelf --help + + - name: Build sherpa-onnx shell: bash run: | export CMAKE_CXX_COMPILER_LAUNCHER=ccache export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" cmake --version + export SHERPA_ONNX_CMAKE_ARGS="-G Ninja -DCMAKE_BUILD_TYPE=Release" + export SHERPA_ONNX_MAKE_ARGS="-j 6" - python3 -m pip install --verbose . + python3 setup.py bdist_wheel + + - name: Patch wheels + shell: bash + run: | + mkdir ./dist2 + sudo ./scripts/wheel/patch_wheel.py --in-dir ./dist --out-dir ./dist2 - name: Install sherpa-onnx - if: matrix.os == 'windows-latest' shell: bash run: | - cmake --version - export SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache" - echo "SHERPA_ONNX_CMAKE_ARGS: ${SHERPA_ONNX_CMAKE_ARGS}" + ls -lh dist2 + + python3 -m pip install ./dist2/*.whl + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.os }}-${{ matrix.python-version }}-whl + path: ./dist - python3 -m pip install --verbose . + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.os }}-${{ matrix.python-version }}-whl-patched + path: ./dist2 + + - name: Show dependencies + shell: bash + run: | + cd dist + mkdir t + cd t + unzip ../*.whl + readelf -d _sherpa_onnx*.so + + echo "----" + + readelf -d sherpa_onnx-*.data/data/bin/sherpa-onnx + + - name: Show dependencies (patched) + shell: bash + run: | + cd dist2 + mkdir t + cd t + unzip ../*.whl + readelf -d _sherpa_onnx*.so + + echo "----" + + readelf -d sherpa_onnx-*.data/data/bin/sherpa-onnx - name: Test sherpa-onnx shell: bash run: | export OS=${{ matrix.os }} + + p=$(python3 -c 'import site; print(site.getsitepackages()[0])') + echo "p: $p" + p=$p/sherpa_onnx/lib + echo "p: $p" + ls -lh $p + + export LD_LIBRARY_PATH=$p:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + .github/scripts/test-python.sh .github/scripts/test-speaker-recognition-python.sh diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml index 54c265bbd..2c6070693 100644 --- a/.github/workflows/test-build-wheel.yaml +++ b/.github/workflows/test-build-wheel.yaml @@ -35,8 +35,50 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + # See https://github.com/actions/runner-images + include: + - os: ubuntu-20.04 + python-version: "3.7" + - os: ubuntu-20.04 + python-version: "3.8" + - os: ubuntu-20.04 + python-version: "3.9" + - os: ubuntu-22.04 + python-version: "3.10" + - os: ubuntu-22.04 + python-version: "3.11" + - os: ubuntu-22.04 + python-version: "3.12" + + - os: macos-11 + python-version: "3.7" + + - os: macos-12 + python-version: "3.8" + + - os: macos-13 + python-version: "3.9" + - os: macos-13 + python-version: "3.10" + - os: macos-13 + python-version: "3.11" + + - os: macos-14 + python-version: "3.12" + + - os: windows-2019 + python-version: "3.7" + - os: windows-2019 + python-version: "3.8" + - os: windows-2019 + python-version: "3.9" + + - os: windows-2022 + python-version: "3.10" + - os: windows-2022 + python-version: "3.11" + - os: windows-2022 + python-version: "3.12" steps: - uses: actions/checkout@v4 @@ -89,8 +131,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.11.8/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.3/x64/bin:$PATH which sherpa-onnx sherpa-onnx --help diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index aa8e7b1e3..243b4f1a5 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -178,6 +178,7 @@ jobs: cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/ cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/ cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ + cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding ls -lh /tmp diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml index d761be4f8..f76157ab2 100644 --- a/.github/workflows/test-go-package.yaml +++ b/.github/workflows/test-go-package.yaml @@ -66,12 +66,77 @@ jobs: run: | gcc --version - - name: Test speaker identification + - name: Test streaming HLG decoding (Linux/macOS) + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/streaming-hlg-decoding/ + ./run.sh + + - name: Test speaker identification (Linux/macOS) + if: matrix.os != 'windows-latest' shell: bash run: | cd go-api-examples/speaker-identification ./run.sh + - name: Test speaker identification (Win64) + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' + shell: bash + run: | + cd go-api-examples/speaker-identification + go mod tidy + cat go.mod + go build + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx + git clone https://github.com/csukuangfj/sr-data + ls -lh + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . + ls -lh + go mod tidy + go build + go run ./main.go + + - name: Test speaker identification (Win32) + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' + shell: bash + run: | + cd go-api-examples/speaker-identification + go mod tidy + cat go.mod + ls -lh + + go env GOARCH + go env + echo "------------------------------" + go env -w GOARCH=386 + go env -w CGO_ENABLED=1 + go env + + go clean + go build + + echo $PWD + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx + git clone https://github.com/csukuangfj/sr-data + ls -lh + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . + ls -lh + go mod tidy + go build + go run ./main.go + + rm -rf sr-data + rm -rf *.onnx + - name: Test non-streaming TTS (Linux/macOS) if: matrix.os != 'windows-latest' shell: bash @@ -96,10 +161,12 @@ jobs: ./run-vits-vctk.sh rm -rf vits-vctk - echo "Test vits-zh-aishell3" - git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 + echo "Test vits-icefall-zh-aishell3" + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 + tar xvf vits-icefall-zh-aishell3.tar.bz2 + rm vits-icefall-zh-aishell3.tar.bz2 ./run-vits-zh-aishell3.sh - rm -rf vits-zh-aishell3 + rm -rf vits-icefall-zh-aishell3* echo "Test vits-piper-en_US-lessac-medium" git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 298403ecf..e7bf9cfde 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -74,6 +74,12 @@ jobs: go mod tidy go build + - name: Test streaming HLG decoding + shell: bash + run: | + cd scripts/go/_internal/streaming-hlg-decoding/ + ./run.sh + - name: Test speaker identification shell: bash run: | @@ -105,9 +111,11 @@ jobs: rm -rf vits-vctk echo "Test vits-zh-aishell3" - git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 + tar xvf vits-icefall-zh-aishell3.tar.bz2 + rm vits-icefall-zh-aishell3.tar.bz2 ./run-vits-zh-aishell3.sh - rm -rf vits-zh-aishell3 + rm -rf vits-icefall-zh-aishell3 echo "Test vits-piper-en_US-lessac-medium" git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index c79f3a8b3..998469cef 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -28,11 +28,50 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] - exclude: - - os: macos-latest + # See https://github.com/actions/runner-images + include: + - os: ubuntu-20.04 python-version: "3.7" + - os: ubuntu-20.04 + python-version: "3.8" + - os: ubuntu-20.04 + python-version: "3.9" + - os: ubuntu-22.04 + python-version: "3.10" + - os: ubuntu-22.04 + python-version: "3.11" + - os: ubuntu-22.04 + python-version: "3.12" + + - os: macos-11 + python-version: "3.7" + + - os: macos-12 + python-version: "3.8" + + - os: macos-13 + python-version: "3.9" + - os: macos-13 + python-version: "3.10" + - os: macos-13 + python-version: "3.11" + + - os: macos-14 + python-version: "3.12" + + - os: windows-2019 + python-version: "3.7" + - os: windows-2019 + python-version: "3.8" + - os: windows-2019 + python-version: "3.9" + + - os: windows-2022 + python-version: "3.10" + - os: windows-2022 + python-version: "3.11" + - os: windows-2022 + python-version: "3.12" steps: - uses: actions/checkout@v4 @@ -67,8 +106,8 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.11.8/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.3/x64/bin:$PATH sherpa-onnx --help sherpa-onnx-keyword-spotter --help diff --git a/.github/workflows/test-python-offline-websocket-server.yaml b/.github/workflows/test-python-offline-websocket-server.yaml index 349e0b61d..18183f482 100644 --- a/.github/workflows/test-python-offline-websocket-server.yaml +++ b/.github/workflows/test-python-offline-websocket-server.yaml @@ -35,8 +35,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14] + python-version: ["3.10"] model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"] steps: diff --git a/.github/workflows/test-python-online-websocket-server.yaml b/.github/workflows/test-python-online-websocket-server.yaml index 1f4096c0f..888af36ae 100644 --- a/.github/workflows/test-python-online-websocket-server.yaml +++ b/.github/workflows/test-python-online-websocket-server.yaml @@ -35,8 +35,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14] + python-version: ["3.10"] model_type: ["transducer", "paraformer", "zipformer2-ctc"] steps: diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml index cb92b7a16..6fdd35cca 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml @@ -40,7 +40,7 @@ jobs: ls -lh echo "----------" - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index ea7cf7458..d160e475e 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -14,6 +14,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -28,6 +30,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-online-ctc.sh' - '.github/scripts/test-offline-tts.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -70,15 +74,33 @@ jobs: ls -lh ./bin/Release/sherpa-onnx.exe + - name: Test offline punctuation + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-punctuation.exe + + .github/scripts/test-offline-punctuation.sh + - name: Test C API shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH export SLID_EXE=spoken-language-identification-c-api.exe export SID_EXE=speaker-identification-c-api.exe + export AT_EXE=audio-tagging-c-api.exe + export PUNCT_EXE=add-punctuation-c-api.exe .github/scripts/test-c-api.sh + - name: Test Audio tagging + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-audio-tagging.exe + + .github/scripts/test-audio-tagging.sh + - name: Test spoken language identification (C++ API) shell: bash run: | diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index 69ad7cd97..c476ab107 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -14,6 +14,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -28,6 +30,8 @@ on: - '.github/scripts/test-offline-ctc.sh' - '.github/scripts/test-offline-tts.sh' - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-audio-tagging.sh' + - '.github/scripts/test-offline-punctuation.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-onnx/csrc/*' @@ -70,11 +74,22 @@ jobs: ls -lh ./bin/Release/sherpa-onnx.exe + - name: Test offline punctuation + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-punctuation.exe + + .github/scripts/test-offline-punctuation.sh + - name: Test spoken language identification (C API) shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH export SLID_EXE=spoken-language-identification-c-api.exe + export SID_EXE=speaker-identification-c-api.exe + export AT_EXE=audio-tagging-c-api.exe + export PUNCT_EXE=add-punctuation-c-api.exe .github/scripts/test-c-api.sh @@ -85,6 +100,13 @@ jobs: # export EXE=sherpa-onnx-offline-language-identification.exe # # .github/scripts/test-spoken-language-identification.sh + - name: Test Audio tagging + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-onnx-offline-audio-tagging.exe + + .github/scripts/test-audio-tagging.sh - name: Test online CTC shell: bash diff --git a/.gitignore b/.gitignore index c2c874243..83ca941d2 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,9 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en sr-data *xcworkspace/xcuserdata/* +vits-icefall-* +sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 +spoken-language-identification-test-wavs +my-release-key* +vits-zh-hf-fanchen-C +sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1784687dc..f2d2c72b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.9.15") +set(SHERPA_ONNX_VERSION "1.9.22") # Disable warning about # @@ -260,6 +260,7 @@ if(SHERPA_ONNX_ENABLE_TTS) set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") include(piper-phonemize) + include(cppjieba) # For Chinese TTS. It is a header-only C++ library endif() add_subdirectory(sherpa-onnx) diff --git a/README.md b/README.md index 7ea1b638e..4f4246ebb 100644 --- a/README.md +++ b/README.md @@ -2,23 +2,48 @@ This repository supports running the following functions **locally** - - Speech-to-text (i.e., ASR) + - Speech-to-text (i.e., ASR); both streaming and non-streaming are supported - Text-to-speech (i.e., TTS) - Speaker identification + - Speaker verification + - Spoken language identification + - Audio tagging + - VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad)) on the following platforms and operating systems: - - Linux, macOS, Windows - - Android + - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64) + - Linux, macOS, Windows, openKylin + - Android, WearOS - iOS - - Raspberry Pi + - NodeJS + - WebAssembly + - [Raspberry Pi](https://www.raspberrypi.com/) + - [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf) + - [LicheePi4A](https://sipeed.com/licheepi4a) + - [VisionFive 2](https://www.starfivetech.com/en/site/boards) + - [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html) - etc +with the following APIs + + - C++ + - C + - Python + - Go + - ``C#`` + - Javascript + - Java + - Kotlin + - Swift + # Useful links - Documentation: https://k2-fsa.github.io/sherpa/onnx/ - APK for the text-to-speech engine: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html - APK for speaker identification: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html +- APK for speech recognition: https://github.com/k2-fsa/sherpa-onnx/releases/ +- Bilibili 演示视频: https://search.bilibili.com/all?keyword=%E6%96%B0%E4%B8%80%E4%BB%A3Kaldi # How to reach us diff --git a/android/README.md b/android/README.md index 053ad66e0..42b29e08f 100644 --- a/android/README.md +++ b/android/README.md @@ -7,14 +7,26 @@ for usage. - [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model. - [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model - for the first pass and use a non-streaming ASR model for the second pass. + for the first pass and use a non-streaming ASR model for the second pass -- [SherpaOnnxVad](./SherpaOnnxVad) It demonstrates how to use a VAD +- [SherpaOnnxKws](./SherpaOnnxKws) It demonstrates how to use keyword spotting -- [SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It uses a VAD with a non-streaming - ASR model. +- [SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification) It demonstrates + how to use speaker identification - [SherpaOnnxTts](./SherpaOnnxTts) It is for standalone text-to-speech. - [SherpaOnnxTtsEngine](./SherpaOnnxTtsEngine) It is for text-to-speech engine; you can use it to replace the system TTS engine. + +- [SherpaOnnxVad](./SherpaOnnxVad) It demonstrates how to use a VAD + +- [SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It uses a VAD with a non-streaming + ASR model. + +- [SherpaOnnxWebSocket](./SherpaOnnxWebSocket) It shows how to write a websocket + client for the Python streaming websocket server. + +- [SherpaOnnxAudioTagging](./SherpaOnnxAudioTagging) It shows how to use audio tagging. + +- [SherpaOnnxAudioTaggingWearOS](./SherpaOnnxAudioTagging) It shows how to use audio tagging on WearOS. diff --git a/android/SherpaOnnx/app/src/main/AndroidManifest.xml b/android/SherpaOnnx/app/src/main/AndroidManifest.xml index 935fb0e95..c0c79ddd3 100644 --- a/android/SherpaOnnx/app/src/main/AndroidManifest.xml +++ b/android/SherpaOnnx/app/src/main/AndroidManifest.xml @@ -16,6 +16,7 @@ tools:targetApi="31"> diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt new file mode 120000 index 000000000..952fae878 --- /dev/null +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 1619f3b27..e4eb5e276 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -12,16 +12,19 @@ import android.widget.Button import android.widget.TextView import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat -import com.k2fsa.sherpa.onnx.* import kotlin.concurrent.thread private const val TAG = "sherpa-onnx" private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 +// To enable microphone in android emulator, use +// +// adb emu avd hostmicon + class MainActivity : AppCompatActivity() { private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) - private lateinit var model: SherpaOnnx + private lateinit var recognizer: OnlineRecognizer private var audioRecord: AudioRecord? = null private lateinit var recordButton: Button private lateinit var textView: TextView @@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() { audioRecord!!.startRecording() recordButton.setText(R.string.stop) isRecording = true - model.reset(true) textView.text = "" lastText = "" idx = 0 @@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() { private fun processSamples() { Log.i(TAG, "processing samples") + val stream = recognizer.createStream() val interval = 0.1 // i.e., 100 ms val bufferSize = (interval * sampleRateInHz).toInt() // in samples @@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() { val ret = audioRecord?.read(buffer, 0, buffer.size) if (ret != null && ret > 0) { val samples = FloatArray(ret) { buffer[it] / 32768.0f } - model.acceptWaveform(samples, sampleRate=sampleRateInHz) - while (model.isReady()) { - model.decode() + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) + while (recognizer.isReady(stream)) { + recognizer.decode(stream) } - val isEndpoint = model.isEndpoint() - val text = model.text + val isEndpoint = recognizer.isEndpoint(stream) + var text = recognizer.getResult(stream).text + + // For streaming parformer, we need to manually add some + // paddings so that it has enough right context to + // recognize the last word of this segment + if (isEndpoint && recognizer.config.modelConfig.paraformer.encoder.isNotBlank()) { + val tailPaddings = FloatArray((0.8 * sampleRateInHz).toInt()) + stream.acceptWaveform(tailPaddings, sampleRate = sampleRateInHz) + while (recognizer.isReady(stream)) { + recognizer.decode(stream) + } + text = recognizer.getResult(stream).text + } - var textToDisplay = lastText; + var textToDisplay = lastText - if(text.isNotBlank()) { - if (lastText.isBlank()) { - textToDisplay = "${idx}: ${text}" + if (text.isNotBlank()) { + textToDisplay = if (lastText.isBlank()) { + "${idx}: $text" } else { - textToDisplay = "${lastText}\n${idx}: ${text}" + "${lastText}\n${idx}: $text" } } if (isEndpoint) { - model.reset() + recognizer.reset(stream) if (text.isNotBlank()) { - lastText = "${lastText}\n${idx}: ${text}" - textToDisplay = lastText; + lastText = "${lastText}\n${idx}: $text" + textToDisplay = lastText idx += 1 } } @@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() { } } } + stream.release() } private fun initMicrophone(): Boolean { @@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() { // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models val type = 0 - println("Select model type ${type}") + Log.i(TAG, "Select model type $type") val config = OnlineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), modelConfig = getModelConfig(type = type)!!, @@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() { enableEndpoint = true, ) - model = SherpaOnnx( + recognizer = OnlineRecognizer( assetManager = application.assets, config = config, ) diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt new file mode 120000 index 000000000..5bb19ee10 --- /dev/null +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt new file mode 120000 index 000000000..d4518b89b --- /dev/null +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt deleted file mode 100644 index dca399840..000000000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2023 Xiaomi Corporation -package com.k2fsa.sherpa.onnx - -import android.content.res.AssetManager - -class WaveReader { - companion object { - // Read a mono wave file asset - // The returned array has two entries: - // - the first entry contains an 1-D float array - // - the second entry is the sample rate - external fun readWaveFromAsset( - assetManager: AssetManager, - filename: String, - ): Array - - // Read a mono wave file from disk - // The returned array has two entries: - // - the first entry contains an 1-D float array - // - the second entry is the sample rate - external fun readWaveFromFile( - filename: String, - ): Array - - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt new file mode 120000 index 000000000..05c8fb246 --- /dev/null +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/res/values/strings.xml b/android/SherpaOnnx/app/src/main/res/values/strings.xml index 801b18f46..0c3c70a29 100644 --- a/android/SherpaOnnx/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnx/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - ASR with Next-gen Kaldi + ASR Click the Start button to play speech-to-text with Next-gen Kaldi. \n \n\n\n diff --git a/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml b/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml index 2a440df14..0cbbfafe8 100644 --- a/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml +++ b/android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml @@ -16,6 +16,7 @@ tools:targetApi="31"> @@ -29,4 +30,4 @@ - \ No newline at end of file + diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt new file mode 120000 index 000000000..952fae878 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 012c0db5e..596d03e09 100644 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -17,11 +17,13 @@ import kotlin.concurrent.thread private const val TAG = "sherpa-onnx" private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 +// adb emu avd hostmicon +// to enable microphone inside the emulator class MainActivity : AppCompatActivity() { private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) - private lateinit var onlineRecognizer: SherpaOnnx - private lateinit var offlineRecognizer: SherpaOnnxOffline + private lateinit var onlineRecognizer: OnlineRecognizer + private lateinit var offlineRecognizer: OfflineRecognizer private var audioRecord: AudioRecord? = null private lateinit var recordButton: Button private lateinit var textView: TextView @@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() { audioRecord!!.startRecording() recordButton.setText(R.string.stop) isRecording = true - onlineRecognizer.reset(true) samplesBuffer.clear() textView.text = "" lastText = "" @@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() { private fun processSamples() { Log.i(TAG, "processing samples") + val stream = onlineRecognizer.createStream() val interval = 0.1 // i.e., 100 ms val bufferSize = (interval * sampleRateInHz).toInt() // in samples @@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() { val samples = FloatArray(ret) { buffer[it] / 32768.0f } samplesBuffer.add(samples) - onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz) - while (onlineRecognizer.isReady()) { - onlineRecognizer.decode() + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) + while (onlineRecognizer.isReady(stream)) { + onlineRecognizer.decode(stream) } - val isEndpoint = onlineRecognizer.isEndpoint() + val isEndpoint = onlineRecognizer.isEndpoint(stream) var textToDisplay = lastText - var text = onlineRecognizer.text + var text = onlineRecognizer.getResult(stream).text if (text.isNotBlank()) { - if (lastText.isBlank()) { + textToDisplay = if (lastText.isBlank()) { // textView.text = "${idx}: ${text}" - textToDisplay = "${idx}: ${text}" + "${idx}: $text" } else { - textToDisplay = "${lastText}\n${idx}: ${text}" + "${lastText}\n${idx}: $text" } } if (isEndpoint) { - onlineRecognizer.reset() + onlineRecognizer.reset(stream) if (text.isNotBlank()) { text = runSecondPass() - lastText = "${lastText}\n${idx}: ${text}" + lastText = "${lastText}\n${idx}: $text" idx += 1 } else { samplesBuffer.clear() @@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() { } } } + stream.release() } private fun initMicrophone(): Boolean { @@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() { // Please change getModelConfig() to add new models // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models - val firstType = 1 - println("Select model type ${firstType} for the first pass") + val firstType = 9 + Log.i(TAG, "Select model type $firstType for the first pass") val config = OnlineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), modelConfig = getModelConfig(type = firstType)!!, @@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() { enableEndpoint = true, ) - onlineRecognizer = SherpaOnnx( + onlineRecognizer = OnlineRecognizer( assetManager = application.assets, config = config, ) @@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() { // Please change getOfflineModelConfig() to add new models // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models - val secondType = 1 - println("Select model type ${secondType} for the second pass") + val secondType = 0 + Log.i(TAG, "Select model type $secondType for the second pass") val config = OfflineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), modelConfig = getOfflineModelConfig(type = secondType)!!, ) - offlineRecognizer = SherpaOnnxOffline( + offlineRecognizer = OfflineRecognizer( assetManager = application.assets, config = config, ) @@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() { val n = maxOf(0, samples.size - 8000) samplesBuffer.clear() - samplesBuffer.add(samples.sliceArray(n..samples.size-1)) + samplesBuffer.add(samples.sliceArray(n until samples.size)) - return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz) + val stream = offlineRecognizer.createStream() + stream.acceptWaveform(samples.sliceArray(0..n), sampleRateInHz) + offlineRecognizer.decode(stream) + val result = offlineRecognizer.getResult(stream) + + stream.release() + + return result.text } } diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt new file mode 120000 index 000000000..faa3ab4ac --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt new file mode 120000 index 000000000..2a3aff864 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt new file mode 120000 index 000000000..5bb19ee10 --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt new file mode 120000 index 000000000..d4518b89b --- /dev/null +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt deleted file mode 100644 index 601ecf83f..000000000 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt +++ /dev/null @@ -1,404 +0,0 @@ -package com.k2fsa.sherpa.onnx - -import android.content.res.AssetManager - -data class EndpointRule( - var mustContainNonSilence: Boolean, - var minTrailingSilence: Float, - var minUtteranceLength: Float, -) - -data class EndpointConfig( - var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f), - var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f), - var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f) -) - -data class OnlineTransducerModelConfig( - var encoder: String = "", - var decoder: String = "", - var joiner: String = "", -) - -data class OnlineParaformerModelConfig( - var encoder: String = "", - var decoder: String = "", -) - -data class OnlineZipformer2CtcModelConfig( - var model: String = "", -) - -data class OnlineModelConfig( - var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), - var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), - var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), - var tokens: String, - var numThreads: Int = 1, - var debug: Boolean = false, - var provider: String = "cpu", - var modelType: String = "", -) - -data class OnlineLMConfig( - var model: String = "", - var scale: Float = 0.5f, -) - -data class FeatureConfig( - var sampleRate: Int = 16000, - var featureDim: Int = 80, -) - -data class OnlineRecognizerConfig( - var featConfig: FeatureConfig = FeatureConfig(), - var modelConfig: OnlineModelConfig, - var lmConfig: OnlineLMConfig = OnlineLMConfig(), - var endpointConfig: EndpointConfig = EndpointConfig(), - var enableEndpoint: Boolean = true, - var decodingMethod: String = "greedy_search", - var maxActivePaths: Int = 4, - var hotwordsFile: String = "", - var hotwordsScore: Float = 1.5f, -) - -data class OfflineTransducerModelConfig( - var encoder: String = "", - var decoder: String = "", - var joiner: String = "", -) - -data class OfflineParaformerModelConfig( - var model: String = "", -) - -data class OfflineWhisperModelConfig( - var encoder: String = "", - var decoder: String = "", - var language: String = "en", // Used with multilingual model - var task: String = "transcribe", // transcribe or translate - var tailPaddings: Int = 1000, // Padding added at the end of the samples -) - -data class OfflineModelConfig( - var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), - var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), - var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), - var numThreads: Int = 1, - var debug: Boolean = false, - var provider: String = "cpu", - var modelType: String = "", - var tokens: String, -) - -data class OfflineRecognizerConfig( - var featConfig: FeatureConfig = FeatureConfig(), - var modelConfig: OfflineModelConfig, - // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it - var decodingMethod: String = "greedy_search", - var maxActivePaths: Int = 4, - var hotwordsFile: String = "", - var hotwordsScore: Float = 1.5f, -) - -class SherpaOnnx( - assetManager: AssetManager? = null, - var config: OnlineRecognizerConfig, -) { - private val ptr: Long - - init { - if (assetManager != null) { - ptr = new(assetManager, config) - } else { - ptr = newFromFile(config) - } - } - - protected fun finalize() { - delete(ptr) - } - - fun acceptWaveform(samples: FloatArray, sampleRate: Int) = - acceptWaveform(ptr, samples, sampleRate) - - fun inputFinished() = inputFinished(ptr) - fun reset(recreate: Boolean = false, hotwords: String = "") = reset(ptr, recreate, hotwords) - fun decode() = decode(ptr) - fun isEndpoint(): Boolean = isEndpoint(ptr) - fun isReady(): Boolean = isReady(ptr) - - val text: String - get() = getText(ptr) - - val tokens: Array - get() = getTokens(ptr) - - private external fun delete(ptr: Long) - - private external fun new( - assetManager: AssetManager, - config: OnlineRecognizerConfig, - ): Long - - private external fun newFromFile( - config: OnlineRecognizerConfig, - ): Long - - private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) - private external fun inputFinished(ptr: Long) - private external fun getText(ptr: Long): String - private external fun reset(ptr: Long, recreate: Boolean, hotwords: String) - private external fun decode(ptr: Long) - private external fun isEndpoint(ptr: Long): Boolean - private external fun isReady(ptr: Long): Boolean - private external fun getTokens(ptr: Long): Array - - companion object { - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} - -class SherpaOnnxOffline( - assetManager: AssetManager? = null, - var config: OfflineRecognizerConfig, -) { - private val ptr: Long - - init { - if (assetManager != null) { - ptr = new(assetManager, config) - } else { - ptr = newFromFile(config) - } - } - - protected fun finalize() { - delete(ptr) - } - - fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate) - - private external fun delete(ptr: Long) - - private external fun new( - assetManager: AssetManager, - config: OfflineRecognizerConfig, - ): Long - - private external fun newFromFile( - config: OfflineRecognizerConfig, - ): Long - - private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String - - companion object { - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} - -fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig { - return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim) -} - -/* -Please see -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -for a list of pre-trained models. - -We only add a few here. Please change the following code -to add your own. (It should be straightforward to add a new model -by following the code) - -@param type -0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23 - encoder/joiner int8, decoder float32 - -1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english - encoder/joiner int8, decoder fp32 - - */ -fun getModelConfig(type: Int): OnlineModelConfig? { - when (type) { - 0 -> { - val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" - return OnlineModelConfig( - transducer = OnlineTransducerModelConfig( - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "zipformer", - ) - } - - 1 -> { - val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" - return OnlineModelConfig( - transducer = OnlineTransducerModelConfig( - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "zipformer", - ) - } - } - return null -} - -/* -Please see -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -for a list of pre-trained models. - -We only add a few here. Please change the following code -to add your own LM model. (It should be straightforward to train a new NN LM model -by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py) - -@param type -0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english - */ -fun getOnlineLMConfig(type: Int): OnlineLMConfig { - when (type) { - 0 -> { - val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" - return OnlineLMConfig( - model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx", - scale = 0.5f, - ) - } - } - return OnlineLMConfig() -} - -// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8 -fun getEndpointConfig(): EndpointConfig { - return EndpointConfig( - rule1 = EndpointRule(false, 2.4f, 0.0f), - rule2 = EndpointRule(true, 0.8f, 0.0f), - rule3 = EndpointRule(false, 0.0f, 20.0f) - ) -} - -/* -Please see -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -for a list of pre-trained models. - -We only add a few here. Please change the following code -to add your own. (It should be straightforward to add a new model -by following the code) - -@param type - -0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese - int8 - -1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english - encoder int8, decoder/joiner float32 - -2 - sherpa-onnx-whisper-tiny.en - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en - encoder int8, decoder int8 - -3 - sherpa-onnx-whisper-base.en - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en - encoder int8, decoder int8 - -4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese) - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese - encoder/joiner int8, decoder fp32 - - */ -fun getOfflineModelConfig(type: Int): OfflineModelConfig? { - when (type) { - 0 -> { - val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28" - return OfflineModelConfig( - paraformer = OfflineParaformerModelConfig( - model = "$modelDir/model.int8.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "paraformer", - ) - } - - 1 -> { - val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04" - return OfflineModelConfig( - transducer = OfflineTransducerModelConfig( - encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx", - decoder = "$modelDir/decoder-epoch-30-avg-4.onnx", - joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "zipformer", - ) - } - - 2 -> { - val modelDir = "sherpa-onnx-whisper-tiny.en" - return OfflineModelConfig( - whisper = OfflineWhisperModelConfig( - encoder = "$modelDir/tiny.en-encoder.int8.onnx", - decoder = "$modelDir/tiny.en-decoder.int8.onnx", - ), - tokens = "$modelDir/tiny.en-tokens.txt", - modelType = "whisper", - ) - } - - 3 -> { - val modelDir = "sherpa-onnx-whisper-base.en" - return OfflineModelConfig( - whisper = OfflineWhisperModelConfig( - encoder = "$modelDir/base.en-encoder.int8.onnx", - decoder = "$modelDir/base.en-decoder.int8.onnx", - ), - tokens = "$modelDir/base.en-tokens.txt", - modelType = "whisper", - ) - } - - - 4 -> { - val modelDir = "icefall-asr-zipformer-wenetspeech-20230615" - return OfflineModelConfig( - transducer = OfflineTransducerModelConfig( - encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx", - decoder = "$modelDir/decoder-epoch-12-avg-4.onnx", - joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "zipformer", - ) - } - - 5 -> { - val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2" - return OfflineModelConfig( - transducer = OfflineTransducerModelConfig( - encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx", - decoder = "$modelDir/decoder-epoch-20-avg-1.onnx", - joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", - ), - tokens = "$modelDir/tokens.txt", - modelType = "zipformer2", - ) - } - - } - return null -} diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt deleted file mode 100644 index 3060450d6..000000000 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt +++ /dev/null @@ -1,28 +0,0 @@ -package com.k2fsa.sherpa.onnx - -import android.content.res.AssetManager - -class WaveReader { - companion object { - // Read a mono wave file asset - // The returned array has two entries: - // - the first entry contains an 1-D float array - // - the second entry is the sample rate - external fun readWaveFromAsset( - assetManager: AssetManager, - filename: String, - ): Array - - // Read a mono wave file from disk - // The returned array has two entries: - // - the first entry contains an 1-D float array - // - the second entry is the sample rate - external fun readWaveFromFile( - filename: String, - ): Array - - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} diff --git a/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml b/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml index cc2a4050f..942912b07 100644 --- a/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - ASR with Next-gen Kaldi + ASR2pass Click the Start button to play speech-to-text with Next-gen Kaldi. \n \n\n\n diff --git a/android/SherpaOnnxAudioTagging/.gitignore b/android/SherpaOnnxAudioTagging/.gitignore new file mode 100644 index 000000000..aa724b770 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxAudioTagging/app/.gitignore b/android/SherpaOnnxAudioTagging/app/.gitignore new file mode 100644 index 000000000..42afabfd2 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/build.gradle.kts b/android/SherpaOnnxAudioTagging/app/build.gradle.kts new file mode 100644 index 000000000..1709e2efa --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/build.gradle.kts @@ -0,0 +1,69 @@ +plugins { + id("com.android.application") + id("org.jetbrains.kotlin.android") +} + +android { + namespace = "com.k2fsa.sherpa.onnx.audio.tagging" + compileSdk = 34 + + defaultConfig { + applicationId = "com.k2fsa.sherpa.onnx.audio.tagging" + minSdk = 21 + targetSdk = 34 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + vectorDrawables { + useSupportLibrary = true + } + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = "1.8" + } + buildFeatures { + compose = true + } + composeOptions { + kotlinCompilerExtensionVersion = "1.5.1" + } + packaging { + resources { + excludes += "/META-INF/{AL2.0,LGPL2.1}" + } + } +} + +dependencies { + + implementation("androidx.core:core-ktx:1.12.0") + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0") + implementation("androidx.activity:activity-compose:1.8.2") + implementation(platform("androidx.compose:compose-bom:2023.08.00")) + implementation("androidx.compose.ui:ui") + implementation("androidx.compose.ui:ui-graphics") + implementation("androidx.compose.ui:ui-tooling-preview") + implementation("androidx.compose.material3:material3") + testImplementation("junit:junit:4.13.2") + androidTestImplementation("androidx.test.ext:junit:1.1.5") + androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") + androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00")) + androidTestImplementation("androidx.compose.ui:ui-test-junit4") + debugImplementation("androidx.compose.ui:ui-tooling") + debugImplementation("androidx.compose.ui:ui-test-manifest") +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/proguard-rules.pro b/android/SherpaOnnxAudioTagging/app/proguard-rules.pro new file mode 100644 index 000000000..481bb4348 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/androidTest/java/com/k2fsa/sherpa/onnx/audio/tagging/ExampleInstrumentedTest.kt b/android/SherpaOnnxAudioTagging/app/src/androidTest/java/com/k2fsa/sherpa/onnx/audio/tagging/ExampleInstrumentedTest.kt new file mode 100644 index 000000000..c17852d6b --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/androidTest/java/com/k2fsa/sherpa/onnx/audio/tagging/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx.audio.tagging + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx.audio.tagging", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/AndroidManifest.xml b/android/SherpaOnnxAudioTagging/app/src/main/AndroidManifest.xml new file mode 100644 index 000000000..3d205d4ea --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/AndroidManifest.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/assets/.gitignore b/android/SherpaOnnxAudioTagging/app/src/main/assets/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt new file mode 120000 index 000000000..176a8df8d --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt new file mode 100644 index 000000000..a1edc2554 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt @@ -0,0 +1,253 @@ +@file:OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class) + +package com.k2fsa.sherpa.onnx.audio.tagging + +import android.Manifest +import android.app.Activity +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.util.Log +import androidx.compose.foundation.ExperimentalFoundationApi +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.PaddingValues +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.lazy.LazyColumn +import androidx.compose.foundation.lazy.items +import androidx.compose.material3.Button +import androidx.compose.material3.CenterAlignedTopAppBar +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Slider +import androidx.compose.material3.Surface +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBarDefaults +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateListOf +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.text.style.TextAlign +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import androidx.core.app.ActivityCompat +import com.k2fsa.sherpa.onnx.AudioEvent +import kotlin.concurrent.thread + + +@Composable +fun Home() { + Scaffold( + topBar = { + CenterAlignedTopAppBar( + colors = TopAppBarDefaults.topAppBarColors( + containerColor = MaterialTheme.colorScheme.primaryContainer, + titleContentColor = MaterialTheme.colorScheme.primary, + ), + title = { + Text( + "Next-gen Kaldi: Audio tagging", + fontWeight = FontWeight.Bold, + fontSize = 15.sp, + ) + }, + ) + }, + content = { + MyApp(it) + }, + ) +} + +private var audioRecord: AudioRecord? = null +private val sampleRateInHz = 16000 + +@Composable +fun MyApp(padding: PaddingValues) { + val activity = LocalContext.current as Activity + var threshold by remember { mutableStateOf(0.6F) } + var isStarted by remember { mutableStateOf(false) } + val result = remember { mutableStateListOf() } + + + val onButtonClick: () -> Unit = { + isStarted = !isStarted + if (isStarted) { + result.clear() + if (ActivityCompat.checkSelfPermission( + activity, + Manifest.permission.RECORD_AUDIO + ) != PackageManager.PERMISSION_GRANTED + ) { + Log.i(TAG, "Recording is not allowed") + } else { + val audioSource = MediaRecorder.AudioSource.MIC + val channelConfig = AudioFormat.CHANNEL_IN_MONO + val audioFormat = AudioFormat.ENCODING_PCM_16BIT + val numBytes = + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) + + audioRecord = AudioRecord( + audioSource, + sampleRateInHz, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM + ) + + thread(true) { + Log.i(TAG, "processing samples") + val interval = 0.1 // i.e., 100 ms + val bufferSize = (interval * sampleRateInHz).toInt() // in samples + val buffer = ShortArray(bufferSize) + val sampleList = ArrayList() + audioRecord?.let { + it.startRecording() + while (isStarted) { + val ret = it.read(buffer, 0, buffer.size) + ret.let { n -> + val samples = FloatArray(n) { buffer[it] / 32768.0f } + sampleList.add(samples) + } + } + } + Log.i(TAG, "Stop recording") + Log.i(TAG, "Start recognition") + val samples = Flatten(sampleList) + val stream = Tagger.tagger.createStream() + stream.acceptWaveform(samples, sampleRateInHz) + val events = Tagger.tagger.compute(stream) + stream.release() + for (e in events) { + if (e.prob > threshold) { + result.add(e) + } + + } + + } + } + } + } + + Box( + modifier = Modifier.fillMaxSize(), + contentAlignment = Alignment.TopCenter + ) { + Column( + Modifier.padding(padding), + horizontalAlignment = Alignment.CenterHorizontally, + ) { + Spacer(modifier = Modifier.height(16.dp)) + Text("Threshold " + String.format("%.1f", threshold)) + Slider( + value = threshold, + onValueChange = { threshold = it }, + valueRange = 0.1F..1.0F, + modifier = Modifier.fillMaxWidth() + ) + + Button(onClick = onButtonClick) { + if (isStarted) { + Text("Stop") + } else { + Text("Start") + } + } + + Spacer(modifier = Modifier.height(16.dp)) + LazyColumn(modifier = Modifier.fillMaxSize()) { + if (!result.isEmpty()) { + + item { + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly + ) { + Text( + text = "Event name", + ) + Text( + text = "Probability", + ) + } + } + } + + items(result) { event: AudioEvent -> + ViewRow(event = event) + } + } + } + } +} + +@Composable +fun ShowResult(result: String) { + Text( + modifier = Modifier.fillMaxWidth(), + textAlign = TextAlign.Center, + color = MaterialTheme.colorScheme.primary, + text = result, + ) +} + +@Composable +fun ViewRow( + modifier: Modifier = Modifier, + event: AudioEvent +) { + Surface( + modifier = modifier + .fillMaxWidth() + .padding(8.dp), + color = MaterialTheme.colorScheme.inversePrimary, + ) { + Row( + modifier = modifier, + horizontalArrangement = Arrangement.Center, + verticalAlignment = Alignment.CenterVertically, + ) { + Text( + text = event.name, + modifier = modifier.weight(1.0F), + ) + Text( + text = "%.2f".format(event.prob), + modifier = modifier.weight(1.0F), + ) + } + } +} + +fun Flatten(sampleList: ArrayList): FloatArray { + var totalSamples = 0 + for (a in sampleList) { + totalSamples += a.size + } + var i = 0 + val samples = FloatArray(totalSamples) + for (a in sampleList) { + for (s in a) { + samples[i] = s + i += 1 + } + } + Log.i(TAG, "$i, $totalSamples") + + return samples +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt new file mode 100644 index 000000000..c338a930e --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt @@ -0,0 +1,75 @@ +package com.k2fsa.sherpa.onnx.audio.tagging + +import android.Manifest +import android.content.pm.PackageManager +import android.os.Bundle +import android.util.Log +import android.widget.Toast +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Surface +import androidx.compose.runtime.Composable +import androidx.compose.ui.Modifier +import androidx.core.app.ActivityCompat +import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme + +const val TAG = "sherpa-onnx" + +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 + +// adb emu avd hostmicon +// to enable mic inside the emulator +class MainActivity : ComponentActivity() { + private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) + override fun onCreate(savedInstanceState: Bundle?) { + + super.onCreate(savedInstanceState) + setContent { + AudioTaggingApp() + } + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + Tagger.initTagger(this.assets) + } + + @Suppress("DEPRECATION") + @Deprecated("Deprecated in Java") + override fun onRequestPermissionsResult( + requestCode: Int, + permissions: Array, + grantResults: IntArray + ) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { + grantResults[0] == PackageManager.PERMISSION_GRANTED + } else { + false + } + + if (!permissionToRecordAccepted) { + Log.e(TAG, "Audio record is disallowed") + Toast.makeText( + this, + "This App needs access to the microphone", + Toast.LENGTH_SHORT + ) + .show() + finish() + } + Log.i(TAG, "Audio record is permitted") + } +} + +@Composable +fun AudioTaggingApp() { + SherpaOnnxAudioTaggingTheme { + // A surface container using the 'background' color from the theme + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + Home() + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt new file mode 120000 index 000000000..f3faa5b76 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt new file mode 100644 index 000000000..811c9e74f --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt @@ -0,0 +1,27 @@ +package com.k2fsa.sherpa.onnx.audio.tagging + +import android.content.res.AssetManager +import android.util.Log +import com.k2fsa.sherpa.onnx.AudioTagging +import com.k2fsa.sherpa.onnx.getAudioTaggingConfig + + +object Tagger { + private var _tagger: AudioTagging? = null + val tagger: AudioTagging + get() { + return _tagger!! + } + + fun initTagger(assetManager: AssetManager? = null, numThreads: Int = 1) { + synchronized(this) { + if (_tagger != null) { + return + } + + Log.i("sherpa-onnx", "Initializing audio tagger") + val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!! + _tagger = AudioTagging(assetManager, config) + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Color.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Color.kt new file mode 100644 index 000000000..fe96a8413 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Theme.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Theme.kt new file mode 100644 index 000000000..236dc8a51 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Theme.kt @@ -0,0 +1,70 @@ +package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.runtime.SideEffect +import androidx.compose.ui.graphics.toArgb +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.platform.LocalView +import androidx.core.view.WindowCompat + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + + /* Other default colors to override + background = Color(0xFFFFFBFE), + surface = Color(0xFFFFFBFE), + onPrimary = Color.White, + onSecondary = Color.White, + onTertiary = Color.White, + onBackground = Color(0xFF1C1B1F), + onSurface = Color(0xFF1C1B1F), + */ +) + +@Composable +fun SherpaOnnxAudioTaggingTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + val view = LocalView.current + if (!view.isInEditMode) { + SideEffect { + val window = (view.context as Activity).window + window.statusBarColor = colorScheme.primary.toArgb() + WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme + } + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Type.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Type.kt new file mode 100644 index 000000000..e549c8bdc --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Type.kt @@ -0,0 +1,34 @@ +package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) + /* Other default text styles to override + titleLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 22.sp, + lineHeight = 28.sp, + letterSpacing = 0.sp + ), + labelSmall = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Medium, + fontSize = 11.sp, + lineHeight = 16.sp, + letterSpacing = 0.5.sp + ) + */ +) \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/arm64-v8a/.gitignore b/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/arm64-v8a/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/armeabi-v7a/.gitignore b/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/armeabi-v7a/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86/.gitignore b/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86_64/.gitignore b/android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86_64/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..2b068d114 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..07d5da9cb --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 000000000..c209e78ec Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 000000000..b2dfe3d1b Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 000000000..4f0f1d64e Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp new file mode 100644 index 000000000..62b611da0 Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 000000000..948a3070f Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..1b9a6956b Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp new file mode 100644 index 000000000..28d4b77f9 Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..9287f5083 Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp new file mode 100644 index 000000000..aa7d6427e Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..9126ae37c Binary files /dev/null and b/android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/values/colors.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/values/colors.xml new file mode 100644 index 000000000..f8c6127d3 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/values/strings.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/values/strings.xml new file mode 100644 index 000000000..8dd8dcef8 --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + Audio Tagging + diff --git a/android/SherpaOnnxAudioTagging/app/src/main/res/values/themes.xml b/android/SherpaOnnxAudioTagging/app/src/main/res/values/themes.xml new file mode 100644 index 000000000..53b9432eb --- /dev/null +++ b/android/SherpaOnnxAudioTagging/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxAudioTaggingWearOs/build.gradle.kts b/android/SherpaOnnxAudioTaggingWearOs/build.gradle.kts new file mode 100644 index 000000000..8e8f4ab91 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/build.gradle.kts @@ -0,0 +1,5 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. +plugins { + id("com.android.application") version "8.2.0" apply false + id("org.jetbrains.kotlin.android") version "1.9.0" apply false +} \ No newline at end of file diff --git a/android/SherpaOnnxAudioTaggingWearOs/gradle.properties b/android/SherpaOnnxAudioTaggingWearOs/gradle.properties new file mode 100644 index 000000000..3c5031eb7 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/gradle.properties @@ -0,0 +1,23 @@ +# Project-wide Gradle settings. +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true +# AndroidX package structure to make it clearer which packages are bundled with the +# Android operating system, and which are packaged with your app's APK +# https://developer.android.com/topic/libraries/support-library/androidx-rn +android.useAndroidX=true +# Kotlin code style for this project: "official" or "obsolete": +kotlin.code.style=official +# Enables namespacing of each library's R class so that its R class includes only the +# resources declared in the library itself and none from the library's dependencies, +# thereby reducing the size of the R class for that library +android.nonTransitiveRClass=true \ No newline at end of file diff --git a/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.jar b/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 000000000..e708b1c02 Binary files /dev/null and b/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.jar differ diff --git a/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.properties b/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 000000000..9758ae8e7 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Apr 16 20:57:10 CST 2024 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/android/SherpaOnnxAudioTaggingWearOs/gradlew b/android/SherpaOnnxAudioTaggingWearOs/gradlew new file mode 100755 index 000000000..4f906e0c8 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/android/SherpaOnnxAudioTaggingWearOs/gradlew.bat b/android/SherpaOnnxAudioTaggingWearOs/gradlew.bat new file mode 100644 index 000000000..ac1b06f93 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/android/SherpaOnnxAudioTaggingWearOs/settings.gradle.kts b/android/SherpaOnnxAudioTaggingWearOs/settings.gradle.kts new file mode 100644 index 000000000..68476cb56 --- /dev/null +++ b/android/SherpaOnnxAudioTaggingWearOs/settings.gradle.kts @@ -0,0 +1,18 @@ +pluginManagement { + repositories { + google() + mavenCentral() + gradlePluginPortal() + } +} +dependencyResolutionManagement { + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) + repositories { + google() + mavenCentral() + } +} + +rootProject.name = "SherpaOnnxAudioTaggingWearOs" +include(":app") + \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/AndroidManifest.xml b/android/SherpaOnnxKws/app/src/main/AndroidManifest.xml index 935fb0e95..d575b6b90 100644 --- a/android/SherpaOnnxKws/app/src/main/AndroidManifest.xml +++ b/android/SherpaOnnxKws/app/src/main/AndroidManifest.xml @@ -15,7 +15,8 @@ android:theme="@style/Theme.SherpaOnnx" tools:targetApi="31"> diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt new file mode 120000 index 000000000..952fae878 --- /dev/null +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt new file mode 120000 index 000000000..4392376a1 --- /dev/null +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 83c8abe31..b17a6ea6c 100644 --- a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -1,4 +1,4 @@ -package com.k2fsa.sherpa.onnx +package com.k2fsa.sherpa.onnx.kws import android.Manifest import android.content.pm.PackageManager @@ -14,7 +14,13 @@ import android.widget.TextView import android.widget.Toast import androidx.appcompat.app.AppCompatActivity import androidx.core.app.ActivityCompat -import com.k2fsa.sherpa.onnx.* +import com.k2fsa.sherpa.onnx.KeywordSpotter +import com.k2fsa.sherpa.onnx.KeywordSpotterConfig +import com.k2fsa.sherpa.onnx.OnlineStream +import com.k2fsa.sherpa.onnx.R +import com.k2fsa.sherpa.onnx.getFeatureConfig +import com.k2fsa.sherpa.onnx.getKeywordsFile +import com.k2fsa.sherpa.onnx.getKwsModelConfig import kotlin.concurrent.thread private const val TAG = "sherpa-onnx" @@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 class MainActivity : AppCompatActivity() { private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) - private lateinit var model: SherpaOnnxKws + private lateinit var kws: KeywordSpotter + private lateinit var stream: OnlineStream private var audioRecord: AudioRecord? = null private lateinit var recordButton: Button private lateinit var textView: TextView @@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() { Log.i(TAG, keywords) keywords = keywords.replace("\n", "/") + keywords = keywords.trim() // If keywords is an empty string, it just resets the decoding stream // always returns true in this case. // If keywords is not empty, it will create a new decoding stream with // the given keywords appended to the default keywords. - // Return false if errors occured when adding keywords, true otherwise. - val status = model.reset(keywords) - if (!status) { - Log.i(TAG, "Failed to reset with keywords.") - Toast.makeText(this, "Failed to set keywords.", Toast.LENGTH_LONG).show(); + // Return false if errors occurred when adding keywords, true otherwise. + stream.release() + stream = kws.createStream(keywords) + if (stream.ptr == 0L) { + Log.i(TAG, "Failed to create stream with keywords: $keywords") + Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG) + .show() return } @@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() { audioRecord!!.release() audioRecord = null recordButton.setText(R.string.start) + stream.release() Log.i(TAG, "Stopped recording") } } @@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() { val ret = audioRecord?.read(buffer, 0, buffer.size) if (ret != null && ret > 0) { val samples = FloatArray(ret) { buffer[it] / 32768.0f } - model.acceptWaveform(samples, sampleRate=sampleRateInHz) - while (model.isReady()) { - model.decode() + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) + while (kws.isReady(stream)) { + kws.decode(stream) } - val text = model.keyword + val text = kws.getResult(stream).keyword - var textToDisplay = lastText; + var textToDisplay = lastText - if(text.isNotBlank()) { + if (text.isNotBlank()) { if (lastText.isBlank()) { - textToDisplay = "${idx}: ${text}" + textToDisplay = "$idx: $text" } else { - textToDisplay = "${idx}: ${text}\n${lastText}" + textToDisplay = "$idx: $text\n$lastText" } - lastText = "${idx}: ${text}\n${lastText}" + lastText = "$idx: $text\n$lastText" idx += 1 } @@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() { } private fun initModel() { - // Please change getModelConfig() to add new models + // Please change getKwsModelConfig() to add new models // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html // for a list of available models val type = 0 - Log.i(TAG, "Select model type ${type}") + Log.i(TAG, "Select model type $type") val config = KeywordSpotterConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), - modelConfig = getModelConfig(type = type)!!, - keywordsFile = getKeywordsFile(type = type)!!, + modelConfig = getKwsModelConfig(type = type)!!, + keywordsFile = getKeywordsFile(type = type), ) - model = SherpaOnnxKws( + kws = KeywordSpotter( assetManager = application.assets, config = config, ) + stream = kws.createStream() } -} +} \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt new file mode 120000 index 000000000..5bb19ee10 --- /dev/null +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt new file mode 120000 index 000000000..d4518b89b --- /dev/null +++ b/android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxKws/app/src/main/res/values/strings.xml b/android/SherpaOnnxKws/app/src/main/res/values/strings.xml index 1fba032f9..484977db0 100644 --- a/android/SherpaOnnxKws/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnxKws/app/src/main/res/values/strings.xml @@ -1,12 +1,12 @@ - KWS with Next-gen Kaldi + Keyword spotting Click the Start button to play keyword spotting with Next-gen Kaldi. \n \n\n\n The source code and pre-trained models are publicly available. Please see https://github.com/k2-fsa/sherpa-onnx for details. - Input your keywords here, one keyword perline. + Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥 Start Stop diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/BarItem.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/BarItem.kt index 7c3a56dda..620f4f0c5 100644 --- a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/BarItem.kt +++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/BarItem.kt @@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification import androidx.compose.ui.graphics.vector.ImageVector -data class BarItem ( +data class BarItem( val title: String, // see https://www.composables.com/icons diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavRoutes.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavRoutes.kt index 118396645..e00abc95a 100644 --- a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavRoutes.kt +++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavRoutes.kt @@ -1,8 +1,8 @@ package com.k2fsa.sherpa.onnx.speaker.identification sealed class NavRoutes(val route: String) { - object Home: NavRoutes("home") - object Register: NavRoutes("register") - object View: NavRoutes("view") - object Help: NavRoutes("help") + object Home : NavRoutes("home") + object Register : NavRoutes("register") + object View : NavRoutes("view") + object Help : NavRoutes("help") } \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/OnlineStream.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/OnlineStream.kt new file mode 120000 index 000000000..3211155f6 --- /dev/null +++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/OnlineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt deleted file mode 100644 index 4c9bd06fa..000000000 --- a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt +++ /dev/null @@ -1,188 +0,0 @@ -package com.k2fsa.sherpa.onnx - -import android.content.res.AssetManager -import android.util.Log - -private val TAG = "sherpa-onnx" -data class SpeakerEmbeddingExtractorConfig( - val model: String, - var numThreads: Int = 1, - var debug: Boolean = false, - var provider: String = "cpu", -) - -class SpeakerEmbeddingExtractorStream(var ptr: Long) { - fun acceptWaveform(samples: FloatArray, sampleRate: Int) = - acceptWaveform(ptr, samples, sampleRate) - - fun inputFinished() = inputFinished(ptr) - - protected fun finalize() { - delete(ptr) - ptr = 0 - } - - private external fun myTest(ptr: Long, v: Array) - - fun release() = finalize() - private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) - - private external fun inputFinished(ptr: Long) - - private external fun delete(ptr: Long) - - companion object { - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} - -class SpeakerEmbeddingExtractor( - assetManager: AssetManager? = null, - config: SpeakerEmbeddingExtractorConfig, -) { - private var ptr: Long - - init { - ptr = if (assetManager != null) { - new(assetManager, config) - } else { - newFromFile(config) - } - } - - protected fun finalize() { - delete(ptr) - ptr = 0 - } - - fun release() = finalize() - - fun createStream(): SpeakerEmbeddingExtractorStream { - val p = createStream(ptr) - return SpeakerEmbeddingExtractorStream(p) - } - - fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr) - fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr) - fun dim() = dim(ptr) - - private external fun new( - assetManager: AssetManager, - config: SpeakerEmbeddingExtractorConfig, - ): Long - - private external fun newFromFile( - config: SpeakerEmbeddingExtractorConfig, - ): Long - - private external fun delete(ptr: Long) - - private external fun createStream(ptr: Long): Long - - private external fun isReady(ptr: Long, streamPtr: Long): Boolean - - private external fun compute(ptr: Long, streamPtr: Long): FloatArray - - private external fun dim(ptr: Long): Int - - companion object { - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} - -class SpeakerEmbeddingManager(val dim: Int) { - private var ptr: Long - - init { - ptr = new(dim) - } - - protected fun finalize() { - delete(ptr) - ptr = 0 - } - - fun release() = finalize() - fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding) - fun add(name: String, embedding: Array) = addList(ptr, name, embedding) - fun remove(name: String) = remove(ptr, name) - fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold) - fun verify(name: String, embedding: FloatArray, threshold: Float) = - verify(ptr, name, embedding, threshold) - - fun contains(name: String) = contains(ptr, name) - fun numSpeakers() = numSpeakers(ptr) - - fun allSpeakerNames() = allSpeakerNames(ptr) - - private external fun new(dim: Int): Long - private external fun delete(ptr: Long): Unit - private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean - private external fun addList(ptr: Long, name: String, embedding: Array): Boolean - private external fun remove(ptr: Long, name: String): Boolean - private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String - private external fun verify( - ptr: Long, - name: String, - embedding: FloatArray, - threshold: Float - ): Boolean - - private external fun contains(ptr: Long, name: String): Boolean - private external fun numSpeakers(ptr: Long): Int - - private external fun allSpeakerNames(ptr: Long): Array - - companion object { - init { - System.loadLibrary("sherpa-onnx-jni") - } - } -} - -// Please download the model file from -// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models -// and put it inside the assets directory. -// -// Please don't put it in a subdirectory of assets -private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx" - -object SpeakerRecognition { - var _extractor: SpeakerEmbeddingExtractor? = null - var _manager: SpeakerEmbeddingManager? = null - - val extractor: SpeakerEmbeddingExtractor - get() { - return _extractor!! - } - - val manager: SpeakerEmbeddingManager - get() { - return _manager!! - } - - fun initExtractor(assetManager: AssetManager? = null) { - synchronized(this) { - if (_extractor != null) { - return - } - Log.i(TAG, "Initializing speaker embedding extractor") - - _extractor = SpeakerEmbeddingExtractor( - assetManager = assetManager, - config = SpeakerEmbeddingExtractorConfig( - model = modelName, - numThreads = 2, - debug = false, - provider = "cpu", - ) - ) - - _manager = SpeakerEmbeddingManager(dim = _extractor!!.dim()) - } - } -} diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt new file mode 120000 index 000000000..b7307bc21 --- /dev/null +++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/strings.xml index 0766efd7d..16db4894c 100644 --- a/android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - Speaker Identification + Speaker ID Start recording Stop recording Add speaker diff --git a/android/SherpaOnnxSpokenLanguageIdentification/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/.gitignore new file mode 100644 index 000000000..aa724b770 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/.gitignore new file mode 100644 index 000000000..42afabfd2 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/build.gradle.kts b/android/SherpaOnnxSpokenLanguageIdentification/app/build.gradle.kts new file mode 100644 index 000000000..638582676 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/build.gradle.kts @@ -0,0 +1,69 @@ +plugins { + id("com.android.application") + id("org.jetbrains.kotlin.android") +} + +android { + namespace = "com.k2fsa.sherpa.onnx.slid" + compileSdk = 34 + + defaultConfig { + applicationId = "com.k2fsa.sherpa.onnx.slid" + minSdk = 21 + targetSdk = 34 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + vectorDrawables { + useSupportLibrary = true + } + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = "1.8" + } + buildFeatures { + compose = true + } + composeOptions { + kotlinCompilerExtensionVersion = "1.5.1" + } + packaging { + resources { + excludes += "/META-INF/{AL2.0,LGPL2.1}" + } + } +} + +dependencies { + + implementation("androidx.core:core-ktx:1.12.0") + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0") + implementation("androidx.activity:activity-compose:1.8.2") + implementation(platform("androidx.compose:compose-bom:2023.08.00")) + implementation("androidx.compose.ui:ui") + implementation("androidx.compose.ui:ui-graphics") + implementation("androidx.compose.ui:ui-tooling-preview") + implementation("androidx.compose.material3:material3") + testImplementation("junit:junit:4.13.2") + androidTestImplementation("androidx.test.ext:junit:1.1.5") + androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") + androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00")) + androidTestImplementation("androidx.compose.ui:ui-test-junit4") + debugImplementation("androidx.compose.ui:ui-tooling") + debugImplementation("androidx.compose.ui:ui-test-manifest") +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/proguard-rules.pro b/android/SherpaOnnxSpokenLanguageIdentification/app/proguard-rules.pro new file mode 100644 index 000000000..481bb4348 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/androidTest/java/com/k2fsa/sherpa/onnx/slid/ExampleInstrumentedTest.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/androidTest/java/com/k2fsa/sherpa/onnx/slid/ExampleInstrumentedTest.kt new file mode 100644 index 000000000..5cb3e238d --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/androidTest/java/com/k2fsa/sherpa/onnx/slid/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx.slid + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx.slid", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/AndroidManifest.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/AndroidManifest.xml new file mode 100644 index 000000000..df44766e2 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/AndroidManifest.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/assets/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/assets/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/Home.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/Home.kt new file mode 100644 index 000000000..5a994e9a3 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/Home.kt @@ -0,0 +1,170 @@ +@file:OptIn(ExperimentalMaterial3Api::class) + +package com.k2fsa.sherpa.onnx.slid + +import android.Manifest +import android.app.Activity +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.util.Log +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.PaddingValues +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.material3.Button +import androidx.compose.material3.CenterAlignedTopAppBar +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBarDefaults +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import androidx.core.app.ActivityCompat +import kotlin.concurrent.thread + +@Composable +fun Home() { + Scaffold( + topBar = { + CenterAlignedTopAppBar( + colors = TopAppBarDefaults.topAppBarColors( + containerColor = MaterialTheme.colorScheme.primaryContainer, + titleContentColor = MaterialTheme.colorScheme.primary, + ), + title = { + Text( + "Next-gen Kaldi: Spoken language identification", + fontWeight = FontWeight.Bold, + fontSize = 13.sp, + ) + }, + ) + }, + content = { + MyApp(it) + }, + ) +} + +private var audioRecord: AudioRecord? = null +private const val sampleRateInHz = 16000 + +@Composable +fun MyApp(padding: PaddingValues) { + val activity = LocalContext.current as Activity + var isStarted by remember { mutableStateOf(false) } + var result by remember { mutableStateOf("") } + + val onButtonClick: () -> Unit = { + isStarted = !isStarted + if (isStarted) { + result = "" + if (ActivityCompat.checkSelfPermission( + activity, + Manifest.permission.RECORD_AUDIO + ) != PackageManager.PERMISSION_GRANTED + ) { + Log.i(TAG, "Recording is not allowed") + } else { + val audioSource = MediaRecorder.AudioSource.MIC + val channelConfig = AudioFormat.CHANNEL_IN_MONO + val audioFormat = AudioFormat.ENCODING_PCM_16BIT + val numBytes = + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) + + audioRecord = AudioRecord( + audioSource, + sampleRateInHz, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM + ) + + thread(true) { + Log.i(TAG, "processing samples") + val interval = 0.1 // i.e., 100 ms + val bufferSize = (interval * sampleRateInHz).toInt() // in samples + val buffer = ShortArray(bufferSize) + val sampleList = ArrayList() + audioRecord?.let { + it.startRecording() + while (isStarted) { + val ret = it.read(buffer, 0, buffer.size) + ret.let { n -> + val samples = FloatArray(n) { buffer[it] / 32768.0f } + sampleList.add(samples) + } + } + } + Log.i(TAG, "Stop recording") + Log.i(TAG, "Start recognition") + val samples = flatten(sampleList) + val stream = Slid.slid.createStream() + stream.acceptWaveform(samples, sampleRateInHz) + val lang = Slid.slid.compute(stream) + + result = Slid.localeMap[lang] ?: lang + + stream.release() + } + } + } + } + + Box( + modifier = Modifier.fillMaxSize(), + contentAlignment = Alignment.TopCenter + ) { + Column( + Modifier.padding(padding), + horizontalAlignment = Alignment.CenterHorizontally, + ) { + Spacer(modifier = Modifier.height(16.dp)) + Button(onClick = onButtonClick) { + if (isStarted) { + Text("Stop") + } else { + Text("Start") + } + } + + Spacer(modifier = Modifier.height(16.dp)) + if (result.isNotEmpty() && result.isNotBlank()) { + Text("Detected language: $result") + } + } + } +} + +fun flatten(sampleList: ArrayList): FloatArray { + var totalSamples = 0 + for (a in sampleList) { + totalSamples += a.size + } + var i = 0 + val samples = FloatArray(totalSamples) + for (a in sampleList) { + for (s in a) { + samples[i] = s + i += 1 + } + } + Log.i(TAG, "$i, $totalSamples") + + return samples +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/MainActivity.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/MainActivity.kt new file mode 100644 index 000000000..705f431ee --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/MainActivity.kt @@ -0,0 +1,72 @@ +package com.k2fsa.sherpa.onnx.slid + +import android.Manifest +import android.content.pm.PackageManager +import android.os.Bundle +import android.util.Log +import android.widget.Toast +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Surface +import androidx.compose.runtime.Composable +import androidx.compose.ui.Modifier +import androidx.core.app.ActivityCompat +import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme + +const val TAG = "sherpa-onnx" +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 + +class MainActivity : ComponentActivity() { + private val permissions: Array = arrayOf(Manifest.permission.RECORD_AUDIO) + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContent { + SpokenLanguageIdentificationApp() + } + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) + Slid.initSlid(this.assets) + } + + @Suppress("DEPRECATION") + @Deprecated("Deprecated in Java") + override fun onRequestPermissionsResult( + requestCode: Int, + permissions: Array, + grantResults: IntArray + ) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { + grantResults[0] == PackageManager.PERMISSION_GRANTED + } else { + false + } + + if (!permissionToRecordAccepted) { + Log.e(TAG, "Audio record is disallowed") + Toast.makeText( + this, + "This App needs access to the microphone", + Toast.LENGTH_SHORT + ) + .show() + finish() + } + Log.i(TAG, "Audio record is permitted") + } +} + +@Composable +fun SpokenLanguageIdentificationApp() { + SherpaOnnxSpokenLanguageIdentificationTheme { + // A surface container using the 'background' color from the theme + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + Home() + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/OfflineStream.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/OfflineStream.kt new file mode 120000 index 000000000..c8c06085c --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/OfflineStream.kt @@ -0,0 +1 @@ +../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/SpokenLanguageIdentification.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/SpokenLanguageIdentification.kt new file mode 120000 index 000000000..b5cd3eb98 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/SpokenLanguageIdentification.kt @@ -0,0 +1 @@ +../../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/slid.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/slid.kt new file mode 100644 index 000000000..ed9439db9 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/slid.kt @@ -0,0 +1,42 @@ +package com.k2fsa.sherpa.onnx.slid + +import android.content.res.AssetManager +import android.util.Log +import com.k2fsa.sherpa.onnx.SpokenLanguageIdentification +import com.k2fsa.sherpa.onnx.getSpokenLanguageIdentificationConfig +import java.util.Locale + + +object Slid { + private var _slid: SpokenLanguageIdentification? = null + + private var _localeMap = mutableMapOf() + val slid: SpokenLanguageIdentification + get() { + return _slid!! + } + val localeMap: Map + get() { + return _localeMap + } + + fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) { + synchronized(this) { + if (_slid == null) { + + Log.i(TAG, "Initializing slid") + val config = + getSpokenLanguageIdentificationConfig(type = 0, numThreads = numThreads)!! + _slid = SpokenLanguageIdentification(assetManager, config) + } + + if (_localeMap.isEmpty()) { + val allLang = Locale.getISOLanguages() + for (lang in allLang) { + val locale = Locale(lang) + _localeMap[lang] = locale.displayName + } + } + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Color.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Color.kt new file mode 100644 index 000000000..cbfdfd17c --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.k2fsa.sherpa.onnx.slid.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Theme.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Theme.kt new file mode 100644 index 000000000..02f83371a --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Theme.kt @@ -0,0 +1,70 @@ +package com.k2fsa.sherpa.onnx.slid.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.runtime.SideEffect +import androidx.compose.ui.graphics.toArgb +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.platform.LocalView +import androidx.core.view.WindowCompat + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + + /* Other default colors to override + background = Color(0xFFFFFBFE), + surface = Color(0xFFFFFBFE), + onPrimary = Color.White, + onSecondary = Color.White, + onTertiary = Color.White, + onBackground = Color(0xFF1C1B1F), + onSurface = Color(0xFF1C1B1F), + */ +) + +@Composable +fun SherpaOnnxSpokenLanguageIdentificationTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + val view = LocalView.current + if (!view.isInEditMode) { + SideEffect { + val window = (view.context as Activity).window + window.statusBarColor = colorScheme.primary.toArgb() + WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme + } + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Type.kt b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Type.kt new file mode 100644 index 000000000..48bb5ae96 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Type.kt @@ -0,0 +1,34 @@ +package com.k2fsa.sherpa.onnx.slid.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) + /* Other default text styles to override + titleLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 22.sp, + lineHeight = 28.sp, + letterSpacing = 0.sp + ), + labelSmall = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Medium, + fontSize = 11.sp, + lineHeight = 16.sp, + letterSpacing = 0.5.sp + ) + */ +) \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/arm64-v8a/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/arm64-v8a/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/armeabi-v7a/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/armeabi-v7a/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86_64/.gitignore b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86_64/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..2b068d114 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..07d5da9cb --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 000000000..c209e78ec Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 000000000..b2dfe3d1b Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 000000000..4f0f1d64e Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp new file mode 100644 index 000000000..62b611da0 Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 000000000..948a3070f Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..1b9a6956b Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp new file mode 100644 index 000000000..28d4b77f9 Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..9287f5083 Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp new file mode 100644 index 000000000..aa7d6427e Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp new file mode 100644 index 000000000..9126ae37c Binary files /dev/null and b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/colors.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/colors.xml new file mode 100644 index 000000000..f8c6127d3 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/strings.xml new file mode 100644 index 000000000..3ac26174d --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + Language ID + \ No newline at end of file diff --git a/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/themes.xml b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/themes.xml new file mode 100644 index 000000000..07b6588b3 --- /dev/null +++ b/android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + +