Skip to content

Commit

Permalink
tts(sherpa): update support for matcha tts
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhecuican committed Jan 25, 2025
1 parent 5caa8d8 commit 4a9f5a0
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 1,498 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- qt_version: 5.15.2
qt_arch: gcc_64
modules: "qtwebengine qtnetworkauth"
- qt_version: 6.8.1
- qt_version: 6.2.4
qt_arch: gcc_64
modules: "qtwebengine qtnetworkauth qtmultimedia"

Expand Down
4 changes: 3 additions & 1 deletion Conversation/NLU/nlumodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
class NLUModel : public QObject
{
public:
NLUModel(QObject* parent=nullptr):QObject(parent){valid=true;}
NLUModel(QObject* parent=nullptr):QObject(parent){valid=true; start=false;}
virtual ~NLUModel(){}
virtual ParsedIntent parseIntent(const QString& text)=0;
virtual void stop(){}
virtual bool isStart() {return start;}
protected:
bool valid;
bool start;
};

#endif // NLUMODEL_H
2 changes: 2 additions & 0 deletions Conversation/NLU/rasanlu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ ParsedIntent RasaNLU::parseIntent(const QString &text) {
qaFile.close();
}
}
start = true;
} else {
qWarning() << "rasa request error";
start = false;
}
reply->deleteLater();
return parsedIntent;
Expand Down
41 changes: 30 additions & 11 deletions Conversation/TTS/sherpatts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,43 @@ SherpaTTS::SherpaTTS(QObject* parent) : TTSModel(parent) {
QJsonObject ttsConfig = Config::instance()->getConfig("sherpa_tts");
speakerid = ttsConfig.value("speakerid").toInt();
extraVol = ttsConfig.value("extra_volume").toInt();
std::string rules = Config::getDataPath(ttsConfig.value("rules").toString()).toStdString();
std::string model = Config::getDataPath(ttsConfig.value("model").toString()).toStdString();
QString rules = ttsConfig.value("rules").toString();
QString model = Config::getDataPath(ttsConfig.value("model").toString());
std::string modelS = model.toStdString();
std::string lexicon = Config::getDataPath(ttsConfig.value("lexicon").toString()).toStdString();
std::string tokens = Config::getDataPath(ttsConfig.value("tokens").toString()).toStdString();
std::string dataDir = Config::getDataPath(ttsConfig.value("data_dir").toString()).toStdString();
std::string vocoder = Config::getDataPath(ttsConfig.value("vocoder").toString()).toStdString();
std::string dict = Config::getDataPath(ttsConfig.value("dict").toString()).toStdString();
std::string dataPath = Config::getDataPath("").toStdString();
speed = ttsConfig.value("speed").toDouble(1);
config.max_num_sentences = 50;
if(rules != dataPath) config.rule_fsts = rules.c_str();
QList<QString> rules_split = rules.split(',');
for (int i = 0; i < rules_split.size(); i++) {
rules_split[i] = Config::getDataPath(rules_split[i]);
}
rules = rules_split.join(',');
std::string rulesS = rules.toStdString();
config.rule_fsts = rulesS.c_str();
config.model.debug = 0;
config.model.num_threads = 2;
config.model.provider = "cpu";
config.model.vits.model = model.c_str();
config.model.vits.length_scale = ttsConfig.value("length").toDouble();
config.model.vits.noise_scale = ttsConfig.value("noise").toDouble();
config.model.vits.noise_scale_w = ttsConfig.value("noise-w").toDouble();
if(lexicon != dataPath) config.model.vits.lexicon = lexicon.c_str();
if(dataDir != dataPath) config.model.vits.data_dir = dataDir.c_str();
config.model.vits.tokens = tokens.c_str();
if (model.contains("vits")) {
config.model.vits.model = modelS.c_str();
config.model.vits.length_scale = ttsConfig.value("length").toDouble();
config.model.vits.noise_scale = ttsConfig.value("noise").toDouble();
config.model.vits.noise_scale_w = ttsConfig.value("noise-w").toDouble();
if(lexicon != dataPath) config.model.vits.lexicon = lexicon.c_str();
if(dataDir != dataPath) config.model.vits.data_dir = dataDir.c_str();
if(dict != dataPath) config.model.vits.dict_dir = dict.c_str();
config.model.vits.tokens = tokens.c_str();
} else if (model.contains("matcha")) {
config.model.matcha.acoustic_model = modelS.c_str();
config.model.matcha.vocoder = vocoder.c_str();
config.model.matcha.lexicon = lexicon.c_str();
config.model.matcha.tokens = tokens.c_str();
config.model.matcha.dict_dir = dict.c_str();
}
tts = SherpaOnnxCreateOfflineTts(&config);
}

Expand All @@ -38,7 +57,7 @@ SherpaTTS::~SherpaTTS(){
void SherpaTTS::detect(const QString& text, const QString& type, int id){
std::string textS = text.toStdString();
const SherpaOnnxGeneratedAudio* audio =
SherpaOnnxOfflineTtsGenerate(tts, textS.c_str(), speakerid, 1);
SherpaOnnxOfflineTtsGenerate(tts, textS.c_str(), speakerid, speed);
QByteArray data;
data.resize(audio->n * 2);
for(int i=0; i<audio->n; i++){
Expand Down
3 changes: 2 additions & 1 deletion Conversation/TTS/sherpatts.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ class SherpaTTS : public TTSModel
void detect(const QString& text, const QString& type, int id) override;
void stop() override;
private:
SherpaOnnxOfflineTts* tts;
const SherpaOnnxOfflineTts* tts;
int speakerid;
int extraVol;
float speed;
};

#endif // SHERPATTS_H
6 changes: 5 additions & 1 deletion Conversation/conversation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,11 @@ void Conversation::stopSay(const QString &type,
}

ParsedIntent Conversation::parse(const QString &text) {
return nlu->parseIntent(text);
ParsedIntent intent = nlu->parseIntent(text);
if (!nlu->isStart()) {
say("正在启动");
}
return intent;
}

void Conversation::onASRRequest(const QByteArray &data, int id) {
Expand Down
4 changes: 4 additions & 0 deletions Data/default_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,14 @@
"lexicon": "vits-zh-aishell3/lexicon.txt",
"tokens": "vits-zh-aishell3/tokens.txt",
"rules": "vits-zh-aishell3/rule.fst",
"data_dir": "",
"vocoder": "hifigan_v2.onnx",
"dict": "matcha-icefall-zh-baker/dict",
"noise": 0.3,
"noise-w": 0.2,
"length": 1,
"speakerid": 66,
"speed": 1.0,
"extra_volume": 0
},
"sherpa_wakeup": {
Expand Down
8 changes: 6 additions & 2 deletions Wakeup/Wakeup/duilitewakeup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ void DuiliteWakeup::detect(const QByteArray& data){
// for(int i=0; i<chunkSize; i++){
// buf[i] = data[i];
// }
if((ret = feedFunc(wakeup, (char*)data.data(), chunkSize)) != 0){
qWarning() << "duilite wakeup feed error" << ret;
try {
if((ret = feedFunc(wakeup, (char*)data.data(), chunkSize)) != 0){
qWarning() << "duilite wakeup feed error" << ret;
}
} catch (std::exception const &e) {
qCritical() << "duilite wakeup exception" << e.what();
}
}

Expand Down
2 changes: 1 addition & 1 deletion Wakeup/wakeup.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public slots:

signals:
void dataArrive(QByteArray data);
void detected(bool stop);
void detected(bool stop, bool command, QString commandStr);
void finishResponse();
void wakeup();
private:
Expand Down
162 changes: 0 additions & 162 deletions lib/sherpa_onnx/include/cargs.h

This file was deleted.

Loading

0 comments on commit 4a9f5a0

Please sign in to comment.