From d6abc085a7d788838103b519ee61310973f2142c Mon Sep 17 00:00:00 2001
From: Quan HL <quan.luuhoang8@gmail.com>
Date: Wed, 16 Oct 2024 12:55:52 +0700
Subject: [PATCH 1/2] support playht3.0 languages

---
 lib/routes/api/speech-credentials.js          |   4 +-
 lib/utils/speech-data/tts-languages-playht.js | 152 ++++++++++++++++++
 lib/utils/speech-utils.js                     |  72 ++++++---
 3 files changed, 201 insertions(+), 27 deletions(-)
 create mode 100644 lib/utils/speech-data/tts-languages-playht.js

diff --git a/lib/routes/api/speech-credentials.js b/lib/routes/api/speech-credentials.js
index 79326a0f..f8ede015 100644
--- a/lib/routes/api/speech-credentials.js
+++ b/lib/routes/api/speech-credentials.js
@@ -875,7 +875,7 @@ router.get('/:sid/test', async(req, res) => {
 router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
   const {logger, getTtsVoices} = req.app.locals;
   try {
-    const {vendor, label} = req.query;
+    const {vendor, label, create_new} = req.query;
     if (!vendor) {
       throw new DbErrorBadRequest('vendor is required');
     }
@@ -883,7 +883,7 @@ router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
     const service_provider_sid = req.user.service_provider_sid ||
       req.body.service_provider_sid || parseServiceProviderSid(req);
 
-    const credentials = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
+    const credentials = create_new ? null : await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
       service_provider_sid, account_sid, vendor, label);
     const tmp = credentials && credentials.length > 0 ? credentials[0] : null;
     const cred = tmp ? JSON.parse(decrypt(tmp.credential)) : null;
diff --git a/lib/utils/speech-data/tts-languages-playht.js b/lib/utils/speech-data/tts-languages-playht.js
new file mode 100644
index 00000000..c2ec7588
--- /dev/null
+++ b/lib/utils/speech-data/tts-languages-playht.js
@@ -0,0 +1,152 @@
+// languages.js
+
+module.exports = [
+  {
+    name: 'English',
+    value: 'english'
+  },
+  {
+    name: 'Mandarin',
+    value: 'mandarin'
+  },
+  {
+    name: 'Hindi',
+    value: 'hindi'
+  },
+  {
+    name: 'Japanese',
+    value: 'japanese'
+  },
+  {
+    name: 'Korean',
+    value: 'korean'
+  },
+  {
+    name: 'Arabic',
+    value: 'arabic'
+  },
+  {
+    name: 'Spanish',
+    value: 'spanish'
+  },
+  {
+    name: 'French',
+    value: 'french'
+  },
+  {
+    name: 'Italian',
+    value: 'italian'
+  },
+  {
+    name: 'Portuguese',
+    value: 'portuguese'
+  },
+  {
+    name: 'German',
+    value: 'german'
+  },
+  {
+    name: 'Dutch',
+    value: 'dutch'
+  },
+  {
+    name: 'Swedish',
+    value: 'swedish'
+  },
+  {
+    name: 'Czech',
+    value: 'czech'
+  },
+  {
+    name: 'Polish',
+    value: 'polish'
+  },
+  {
+    name: 'Russian',
+    value: 'russian'
+  },
+  {
+    name: 'Bulgarian',
+    value: 'bulgarian'
+  },
+  {
+    name: 'Hebrew',
+    value: 'hebrew'
+  },
+  {
+    name: 'Greek',
+    value: 'greek'
+  },
+  {
+    name: 'Turkish',
+    value: 'turkish'
+  },
+  {
+    name: 'Afrikaans',
+    value: 'afrikaans'
+  },
+  {
+    name: 'Xhosa',
+    value: 'xhosa'
+  },
+  {
+    name: 'Tagalog',
+    value: 'tagalog'
+  },
+  {
+    name: 'Malay',
+    value: 'malay'
+  },
+  {
+    name: 'Indonesian',
+    value: 'indonesian'
+  },
+  {
+    name: 'Bengali',
+    value: 'bengali'
+  },
+  {
+    name: 'Serbian',
+    value: 'serbian'
+  },
+  {
+    name: 'Thai',
+    value: 'thai'
+  },
+  {
+    name: 'Urdu',
+    value: 'urdu'
+  },
+  {
+    name: 'Croatian',
+    value: 'croatian'
+  },
+  {
+    name: 'Hungarian',
+    value: 'hungarian'
+  },
+  {
+    name: 'Danish',
+    value: 'danish'
+  },
+  {
+    name: 'Amharic',
+    value: 'amharic'
+  },
+  {
+    name: 'Albanian',
+    value: 'albanian'
+  },
+  {
+    name: 'Catalan',
+    value: 'catalan'
+  },
+  {
+    name: 'Ukrainian',
+    value: 'ukrainian'
+  },
+  {
+    name: 'Galician',
+    value: 'galician'
+  }
+];
diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js
index 99993cd6..ad72b588 100644
--- a/lib/utils/speech-utils.js
+++ b/lib/utils/speech-utils.js
@@ -25,6 +25,7 @@ const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
 const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
 const TtsModelWhisper = require('./speech-data/tts-model-whisper');
 const TtsModelPlayHT = require('./speech-data/tts-model-playht');
+const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
 const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
 
 const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
@@ -40,6 +41,7 @@ const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics')
 const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
 const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
 
+
 const testSonioxStt = async(logger, credentials) => {
   const api_key = credentials;
   const soniox = new SpeechClient(api_key);
@@ -869,6 +871,7 @@ const fetchLayHTVoices = async(credential) => {
 
 async function getLanguagesVoicesForPlayHT(credential) {
   if (credential) {
+    const {voice_engine} = credential;
     const [cloned_voice, voices] = await fetchLayHTVoices(credential);
     const list_voices = [...cloned_voice, ...voices];
 
@@ -876,38 +879,57 @@ async function getLanguagesVoicesForPlayHT(credential) {
       let name = `${d.name} -${concat(d.accent)}${concat(d.age)}${concat(d.gender)}${concat(d.loudness)}` +
       `${concat(d.style)}${concat(d.tempo)}${concat(d.texture)}` ;
       name = name.endsWith(',') ? name.trim().slice(0, -1) : name;
+      name += !d.language_code ? ' - Custom Voice' : '';
+
       return {
         value: `${d.id}`,
         name
       };
     };
 
-    const ttsVoices = list_voices.reduce((acc, voice) => {
-      // Play3.0 support all voice for PlayHT2.0*
-      const filteredVoiceEngine = credential.voice_engine === 'Play3.0' ?
-        `${credential.voice_engine}_PlayHT2.0_PlayHT2.0-turbo` : credential.voice_engine;
-      if (!filteredVoiceEngine.includes(voice.voice_engine)) {
+    const buildPlay30Payload = () => {
+      // PlayHT3.0 can play different languages with differrent voice.
+      // all voices will be added to english language by default and orther langauges will get voices from english.
+      const ttsVoices = ttsLanguagesPlayHt.map((l) => ({
+        ...l,
+        voices: l.value === 'english' ? list_voices.map((v) => buildVoice(v)) : []
+      }));
+      return tranform(ttsVoices, undefined, TtsModelPlayHT);
+    };
+
+    const buildPayload = () => {
+      const ttsVoices = list_voices.reduce((acc, voice) => {
+        if (!voice_engine.includes(voice.voice_engine)) {
+          return acc;
+        }
+        const languageCode = voice.language_code;
+        // custom voice does not have language code
+        if (!languageCode) {
+          voice.language_code = 'en';
+          voice.language = 'Custom-English';
+        }
+        const existingLanguage = acc.find((lang) => lang.value === languageCode);
+        if (existingLanguage) {
+          existingLanguage.voices.push(buildVoice(voice));
+        } else {
+          acc.push({
+            value: voice.language_code,
+            name: voice.language,
+            voices: [buildVoice(voice)]
+          });
+        }
         return acc;
-      }
-      const languageCode = voice.language_code;
-      // custom voice does not have language code
-      if (!languageCode) {
-        voice.language_code = 'en';
-        voice.language = 'Custom-English';
-      }
-      const existingLanguage = acc.find((lang) => lang.value === languageCode);
-      if (existingLanguage) {
-        existingLanguage.voices.push(buildVoice(voice));
-      } else {
-        acc.push({
-          value: voice.language_code,
-          name: voice.language,
-          voices: [buildVoice(voice)]
-        });
-      }
-      return acc;
-    }, []);
-    return tranform(ttsVoices, undefined, TtsModelPlayHT);
+      }, []);
+      return tranform(ttsVoices, undefined, TtsModelPlayHT);
+    };
+
+    switch (voice_engine) {
+      case 'Play3.0':
+        return buildPlay30Payload();
+
+      default:
+        return buildPayload();
+    }
   }
   return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
 }

From 32d50b5649883b1f6fb51d959c45782fa9b31715 Mon Sep 17 00:00:00 2001
From: Quan HL <quan.luuhoang8@gmail.com>
Date: Wed, 16 Oct 2024 18:26:33 +0700
Subject: [PATCH 2/2] update speech utils version

---
 package-lock.json | 8 ++++----
 package.json      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 3665b2be..441aacdb 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -19,7 +19,7 @@
         "@jambonz/lamejs": "^1.2.2",
         "@jambonz/mw-registrar": "^0.2.7",
         "@jambonz/realtimedb-helpers": "^0.8.10",
-        "@jambonz/speech-utils": "^0.1.18",
+        "@jambonz/speech-utils": "^0.1.19",
         "@jambonz/time-series": "^0.2.8",
         "@jambonz/verb-specifications": "^0.0.72",
         "@soniox/soniox-node": "^1.2.2",
@@ -2224,9 +2224,9 @@
       }
     },
     "node_modules/@jambonz/speech-utils": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.18.tgz",
-      "integrity": "sha512-GlcPvUIKcyiiH4cfUPXyYZtP1HIIdFbrqYUmeTmeBaOuZUrJ0xW+TAp/pbysh54vgPnAfcS43Y3ciULx0S3IjQ==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.19.tgz",
+      "integrity": "sha512-3R4o1zJYnUIqMsT2GmYAItcAnhEz6NVTNqk5aHSAvBLFSMpqdeEPrTPaKVmQAPS8aIY7crhbJmJx8L3i2xDzDg==",
       "license": "MIT",
       "dependencies": {
         "@aws-sdk/client-polly": "^3.496.0",
diff --git a/package.json b/package.json
index 39e48afb..87f965d8 100644
--- a/package.json
+++ b/package.json
@@ -29,7 +29,7 @@
     "@jambonz/lamejs": "^1.2.2",
     "@jambonz/mw-registrar": "^0.2.7",
     "@jambonz/realtimedb-helpers": "^0.8.10",
-    "@jambonz/speech-utils": "^0.1.18",
+    "@jambonz/speech-utils": "^0.1.19",
     "@jambonz/time-series": "^0.2.8",
     "@jambonz/verb-specifications": "^0.0.72",
     "@soniox/soniox-node": "^1.2.2",