Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ models/*
!models/yitu_api_zh
!models/aliyun_ftasr_api_zh
!models/ximalaya_api_zh
!models/dianya_quality_api_zh

# en models
!models/aliyun_api_en
Expand Down Expand Up @@ -108,3 +109,6 @@ models/yitu_api_zh/DEV_KEY
# ximalaya api credential
models/ximalaya_api_zh/APP_KEY
models/ximalaya_api_zh/SECRET_KEY

# dianya api credential
models/dianya_quality_api_zh/API_KEY
11 changes: 11 additions & 0 deletions models/dianya_quality_api_zh/SBI
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

if [ $# -ne 2 ]; then
echo "recognize.sh wav.scp <working_dir>"
exit 1
fi

scp=$1
dir=$2

./asr_api.py "$scp" "$dir/raw_rec.txt"
129 changes: 129 additions & 0 deletions models/dianya_quality_api_zh/asr_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/env python3
# coding: utf-8

import sys
import os
import time
import json
import codecs
import requests

API_KEY_FILE = 'API_KEY'
DIANYA_URL = 'https://api.dianyaai.com/api/transcribe/upload?transcribe_only=true&short_asr=true&model_name=quality'
MAX_RETRY = 5
RETRY_INTERVAL = 0.1
QPS_INTERVAL = 0.2
TIMEOUT = 30


def load_api_key():
try:
with open(API_KEY_FILE, 'r', encoding='utf8') as f:
return f.readline().strip()
except Exception as e:
sys.stderr.write(f'Failed to load API key from {API_KEY_FILE}: {e}\n')
sys.exit(1)


def recognize(api_key, audio_path):
for attempt in range(MAX_RETRY):
try:
with open(audio_path, 'rb') as audio_file:
files = {
'payload': (os.path.basename(audio_path), audio_file, 'audio/wav')
}
headers = {
'Authorization': f'Bearer {api_key}'
}
response = requests.post(
DIANYA_URL,
headers=headers,
files=files,
timeout=TIMEOUT,
)

if response.status_code != 200:
sys.stderr.write(f'HTTP {response.status_code} from Dianya, attempt {attempt + 1}/{MAX_RETRY}.\n')
time.sleep(RETRY_INTERVAL)
continue

try:
data = response.json()
except ValueError:
sys.stderr.write(f'Invalid JSON response from Dianya on attempt {attempt + 1}/{MAX_RETRY}.\n')
time.sleep(RETRY_INTERVAL)
continue

status = data.get('status')
if status != 'ok':
sys.stderr.write(f'Dianya returned non-ok status "{status}" on attempt {attempt + 1}/{MAX_RETRY}.\n')
time.sleep(RETRY_INTERVAL)
continue

text = data.get('data', '')
if text is None:
text = ''
return str(text).strip()

except Exception as e:
sys.stderr.write(f'Exception when calling Dianya on attempt {attempt + 1}/{MAX_RETRY}: {e}\n')
time.sleep(RETRY_INTERVAL)
continue

sys.stderr.write(f'Failed to recognize {audio_path} after {MAX_RETRY} attempts.\n')
return ''


if __name__ == '__main__':
if len(sys.argv) != 3:
sys.stderr.write('asr_api.py <in_scp> <out_trans>\n')
sys.exit(1)

in_scp = sys.argv[1]
out_trans = sys.argv[2]

api_key = load_api_key()

try:
scp_file = codecs.open(in_scp, 'r', 'utf8')
except Exception as e:
sys.stderr.write(f'Failed to open input scp file {in_scp}: {e}\n')
sys.exit(1)

try:
trans_file = codecs.open(out_trans, 'w+', 'utf8')
except Exception as e:
sys.stderr.write(f'Failed to open output transcription file {out_trans}: {e}\n')
sys.exit(1)

# 预读所有非空行以便统计总数
lines = [line.strip() for line in scp_file if line.strip()]
total = len(lines)

n = 0
for idx, line in enumerate(lines):
if '\t' in line:
key, audio = line.split('\t', 1)
else:
parts = line.split(maxsplit=1)
if len(parts) != 2:
sys.stderr.write(f'Invalid line in scp file (skip): {line}\n')
continue
key, audio = parts

sys.stderr.write(f'{n}\tkey:{key}\taudio:{audio}\n')
sys.stderr.flush()

time.sleep(QPS_INTERVAL)
rec_text = recognize(api_key, audio)

trans_file.write(key + '\t' + rec_text + '\n')
trans_file.flush()
n += 1

# 进度行,供外部脚本解析
sys.stderr.write(f'[DY_PROGRESS] {idx + 1}/{total} {audio}\n')
sys.stderr.flush()

scp_file.close()
trans_file.close()
15 changes: 15 additions & 0 deletions models/dianya_quality_api_zh/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM ubuntu:20.04
LABEL maintainer="leaderboard@example.com"

RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip && \
rm -rf /var/lib/apt/lists/*

RUN pip3 install requests

# Use C.UTF-8 locale to avoid issues with ASCII encoding
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

WORKDIR /app/speechio/leaderboard
7 changes: 7 additions & 0 deletions models/dianya_quality_api_zh/model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: ASR
language: zh
sample_rate: 16000
date: 2025-12-09
author: gaowenrong
entity: 改变世界(深圳)人工智能科技有限公司
email: gaowenrongmuxingziben1@dianyaai.com