From bfa78b123a3bc8c5b0e3a84ecec34838affe9f4d Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 17 Nov 2023 21:39:34 +0800 Subject: [PATCH 1/8] :sparkles: feat: update api --- .dumirc.ts | 5 +- .../edge-speech-tts.zh-CN.md} | 90 ++++++++----------- docs/api/index.zh-CN.md | 5 ++ src/core/EdgeSpeechTTS/index.ts | 14 ++- 4 files changed, 56 insertions(+), 58 deletions(-) rename docs/{edge-speech-tts.md => api/edge-speech-tts.zh-CN.md} (67%) create mode 100644 docs/api/index.zh-CN.md diff --git a/.dumirc.ts b/.dumirc.ts index c73af35..3638a45 100644 --- a/.dumirc.ts +++ b/.dumirc.ts @@ -52,7 +52,10 @@ export default defineConfig({ 'process.env': process.env, }, favicons: ['https://npm.elemecdn.com/@lobehub/assets-favicons/assets/favicon.ico'], - locales: [{ id: 'en-US', name: 'English' }], + locales: [ + { id: 'en-US', name: 'English' }, + { id: 'zh-CN', name: '简体中文' }, + ], // mfsu: isWin ? undefined : {}, mfsu: false, npmClient: 'pnpm', diff --git a/docs/edge-speech-tts.md b/docs/api/edge-speech-tts.zh-CN.md similarity index 67% rename from docs/edge-speech-tts.md rename to docs/api/edge-speech-tts.zh-CN.md index e933205..ba2b943 100644 --- a/docs/edge-speech-tts.md +++ b/docs/api/edge-speech-tts.zh-CN.md @@ -1,16 +1,47 @@ +--- +group: TTS +title: EdgeSpeechTTS +--- + # EdgeSpeechTTS -## 简介 +## `constructor(options: EdgeSpeechAPI & { locale?: string }): EdgeSpeechTTS` `EdgeSpeechTTS` 类是一个用于将文本转换为语音的工具,它可以在边缘运行时环境中使用。该类提供了一系列方法来获取语音选项,创建语音合成请求,并处理返回的音频数据。 -## 构造函数 +### 示例 -### `constructor(options: EdgeSpeechAPI & { locale?: string }): EdgeSpeechTTS` +以下是使用 `EdgeSpeechTTS` 类的示例代码: -创建一个 `EdgeSpeechTTS` 实例。 +Node 环境: -#### 参数 +```js +import { EdgeSpeechTTS } from '@lobehub/tts'; +import fs from 'fs'; +import path from 'path'; + +// 实例化 EdgeSpeechTTS +const tts = new EdgeSpeechTTS({ locale: 'zh-CN' }); + +// 创建语音合成请求负载 +const payload = { + text: '这是一段语音演示', + voice: 'zh-CN-XiaoxiaoNeural', +}; +const speechFile = path.resolve('./speech.mp3'); + +// 调用 create 方法来合成语音 +async function main() { + const mp3Buffer = await tts.create(payload); + await fs.writeFileSync(speechFile, mp3Buffer); +} + +main(); +``` + +在此示例中,首先实例化了 `EdgeSpeechTTS` 类,并指定了后端服务的 URL 和语音区域设置。然后创建了一个包含文本和语音选项的请求负载。最后,通过调用 `create` 方法并传入负载来合成语音。如果合成成功,将返回一个包含音频数据的 `AudioBuffer` 对象。如果出现错误,将捕获并处理。 + +## 参数 - `options`: 对象,可选。 - `backendUrl`: 字符串,指定后端服务的 URL。如果提供,将使用此 URL 发送请求。 @@ -38,22 +69,6 @@ 返回一个包含当前可用语音选项的对象。 -### `fetch(payload: EdgeSpeechPayload): Promise` - -内部方法,用于发送语音合成请求。 - -#### 参数 - -- `payload`: `EdgeSpeechPayload` 类型,包含语音合成请求的必要信息。 - -#### 返回值 - -返回一个 `Promise`,该 `Promise` 解析为包含音频数据的 `Response` 对象。 - -#### 异常 - -如果网络响应不成功,将抛出一个错误。 - ### `create(payload: EdgeSpeechPayload): Promise` 使用给定的请求负载创建语音合成。 @@ -65,36 +80,3 @@ #### 返回值 返回一个 `Promise`,该 `Promise` 解析为 `AudioBuffer` 对象,包含合成的音频数据。 - -## 示例 - -以下是使用 `EdgeSpeechTTS` 类的示例代码: - -```javascript -import { EdgeSpeechTTS } from 'path-to-EdgeSpeechTTS'; - -// 实例化 EdgeSpeechTTS -const tts = new EdgeSpeechTTS({ - backendUrl: 'https://your-backend-service.com/api/speech', - locale: 'en-US', -}); - -// 创建语音合成请求负载 -const payload = { - text: 'Hello, world!', - voice: 'en-US-Standard-B', - // 其他选项... -}; - -// 调用 create 方法来合成语音 -tts - .create(payload) - .then((audioBuffer) => { - // 使用 audioBuffer - }) - .catch((error) => { - console.error('语音合成失败:', error); - }); -``` - -在此示例中,首先实例化了 `EdgeSpeechTTS` 类,并指定了后端服务的 URL 和语音区域设置。然后创建了一个包含文本和语音选项的请求负载。最后,通过调用 `create` 方法并传入负载来合成语音。如果合成成功,将返回一个包含音频数据的 `AudioBuffer` 对象。如果出现错误,将捕获并处理。 diff --git a/docs/api/index.zh-CN.md b/docs/api/index.zh-CN.md new file mode 100644 index 0000000..ef9dd62 --- /dev/null +++ b/docs/api/index.zh-CN.md @@ -0,0 +1,5 @@ +# API Reference + +## TTS + +- [EdgeSpeechTTS](./edge-speech-tts.zh-CN.md) diff --git a/src/core/EdgeSpeechTTS/index.ts b/src/core/EdgeSpeechTTS/index.ts index eb65474..b05f17f 100644 --- a/src/core/EdgeSpeechTTS/index.ts +++ b/src/core/EdgeSpeechTTS/index.ts @@ -41,10 +41,18 @@ export class EdgeSpeechTTS { return response; }; - create = async (payload: EdgeSpeechPayload): Promise => { - const response = await this.fetch(payload); + create = async (payload: EdgeSpeechPayload): Promise => { + return this.fetch(payload); + }; + + /** + * Browser only + * @param payload + */ + createAudio = async (payload: EdgeSpeechPayload): Promise => { + const res = await this.create(payload); - const arrayBuffer = await response.arrayBuffer(); + const arrayBuffer = await res.arrayBuffer(); return arrayBufferConvert(arrayBuffer); }; From 884f8445ddfc784be98ee01ad3e7369d7705c666 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 17 Nov 2023 21:41:34 +0800 Subject: [PATCH 2/8] :sparkles: feat: update api --- src/react/useEdgeSpeech/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/react/useEdgeSpeech/index.ts b/src/react/useEdgeSpeech/index.ts index add72e0..acb06ee 100644 --- a/src/react/useEdgeSpeech/index.ts +++ b/src/react/useEdgeSpeech/index.ts @@ -17,7 +17,7 @@ export const useEdgeSpeech = (defaultText: string, config: EdgeSpeechOptions) => (segmentText: string) => { const instance = new EdgeSpeechTTS({ ...api, locale }); - return instance.create({ input: segmentText, options }); + return instance.createAudio({ input: segmentText, options }); }, swrConfig, ); From d735a60075b9bfafaf33a88f0c8d16a677d57e3b Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 17 Nov 2023 13:42:32 +0000 Subject: [PATCH 3/8] :bookmark: chore(release): v1.20.0-beta.1 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## [Version 1.20.0-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.19.1...v1.20.0-beta.1) Released on **2023-11-17** #### ✨ Features - **misc**: Update api, update api.
Improvements and Fixes #### What's improved * **misc**: Update api ([884f844](https://github.com/lobehub/lobe-tts/commit/884f844)) * **misc**: Update api ([bfa78b1](https://github.com/lobehub/lobe-tts/commit/bfa78b1))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 26 ++++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbf031b..69388f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,32 @@ # Changelog +## [Version 1.20.0-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.19.1...v1.20.0-beta.1) + +Released on **2023-11-17** + +#### ✨ Features + +- **misc**: Update api, update api. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Update api ([884f844](https://github.com/lobehub/lobe-tts/commit/884f844)) +- **misc**: Update api ([bfa78b1](https://github.com/lobehub/lobe-tts/commit/bfa78b1)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ### [Version 1.19.1](https://github.com/lobehub/lobe-tts/compare/v1.19.0...v1.19.1) Released on **2023-11-16** diff --git a/package.json b/package.json index 9817c20..a7e91b9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/tts", - "version": "1.19.1", + "version": "1.20.0-beta.1", "description": "A high-quality & reliable TTS React Hooks library", "homepage": "https://github.com/lobehub/lobe-tts", "bugs": { From 105e2d059ccbff5ed4c8ef144c6b8b2e353096ba Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 17 Nov 2023 22:03:48 +0800 Subject: [PATCH 4/8] :sparkles: feat: update MicrosoftSpeech api --- docs/api/edge-speech-tts.zh-CN.md | 40 +++++++++++++++++---------- src/core/MicrosoftSpeechTTS/index.ts | 8 ++++-- src/react/useMicrosoftSpeech/index.ts | 2 +- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/docs/api/edge-speech-tts.zh-CN.md b/docs/api/edge-speech-tts.zh-CN.md index ba2b943..a05c210 100644 --- a/docs/api/edge-speech-tts.zh-CN.md +++ b/docs/api/edge-speech-tts.zh-CN.md @@ -9,14 +9,20 @@ title: EdgeSpeechTTS `EdgeSpeechTTS` 类是一个用于将文本转换为语音的工具,它可以在边缘运行时环境中使用。该类提供了一系列方法来获取语音选项,创建语音合成请求,并处理返回的音频数据。 -### 示例 +### 参数 -以下是使用 `EdgeSpeechTTS` 类的示例代码: +- `options`: 对象,可选。 + - `backendUrl`: 字符串,指定后端服务的 URL。如果提供,将使用此 URL 发送请求。 + - `locale`: 字符串,指定要使用的语音区域设置。如果提供,将用于过滤可用语音列表。 -Node 环境: +### 示例 + +使用 Bun 直接运行 `EdgeSpeechTTS`: ```js +// bun index.js import { EdgeSpeechTTS } from '@lobehub/tts'; +import { Buffer } from 'buffer'; import fs from 'fs'; import path from 'path'; @@ -25,27 +31,33 @@ const tts = new EdgeSpeechTTS({ locale: 'zh-CN' }); // 创建语音合成请求负载 const payload = { - text: '这是一段语音演示', - voice: 'zh-CN-XiaoxiaoNeural', + input: '这是一段语音演示', + options: { + voice: 'zh-CN-XiaoxiaoNeural', + }, }; + const speechFile = path.resolve('./speech.mp3'); // 调用 create 方法来合成语音 -async function main() { - const mp3Buffer = await tts.create(payload); - await fs.writeFileSync(speechFile, mp3Buffer); -} +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); -main(); +fs.writeFileSync(speechFile, mp3Buffer); ``` 在此示例中,首先实例化了 `EdgeSpeechTTS` 类,并指定了后端服务的 URL 和语音区域设置。然后创建了一个包含文本和语音选项的请求负载。最后,通过调用 `create` 方法并传入负载来合成语音。如果合成成功,将返回一个包含音频数据的 `AudioBuffer` 对象。如果出现错误,将捕获并处理。 -## 参数 +在 Node.js 中运行 -- `options`: 对象,可选。 - - `backendUrl`: 字符串,指定后端服务的 URL。如果提供,将使用此 URL 发送请求。 - - `locale`: 字符串,指定要使用的语音区域设置。如果提供,将用于过滤可用语音列表。 +由于 Nodejs 环境缺少 `WebSocket` 实例,所以我们需要 polyfill WebSocket。通过引入 ws 包即可。 + +```js +// 在文件顶部引入 +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` ## 属性 diff --git a/src/core/MicrosoftSpeechTTS/index.ts b/src/core/MicrosoftSpeechTTS/index.ts index a49c3d0..0a624ef 100644 --- a/src/core/MicrosoftSpeechTTS/index.ts +++ b/src/core/MicrosoftSpeechTTS/index.ts @@ -42,8 +42,12 @@ export class MicrosoftSpeechTTS { return response; }; - create = async (payload: MicrosoftSpeechPayload): Promise => { - const response = await this.fetch(payload); + create = async (payload: MicrosoftSpeechPayload): Promise => { + return await this.fetch(payload); + }; + + createAudio = async (payload: MicrosoftSpeechPayload): Promise => { + const response = await this.create(payload); const arrayBuffer = await response.arrayBuffer(); diff --git a/src/react/useMicrosoftSpeech/index.ts b/src/react/useMicrosoftSpeech/index.ts index 060df7d..e0624f7 100644 --- a/src/react/useMicrosoftSpeech/index.ts +++ b/src/react/useMicrosoftSpeech/index.ts @@ -20,7 +20,7 @@ export const useMicrosoftSpeech = (defaultText: string, config: MicrosoftSpeechO text, (segmentText: string) => { const instance = new MicrosoftSpeechTTS({ ...api, locale }); - return instance.create({ input: segmentText, options }); + return instance.createAudio({ input: segmentText, options }); }, swrConfig, ); From 0796dcb68f0d11888b39a1b66bb112b49147d164 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 17 Nov 2023 14:04:52 +0000 Subject: [PATCH 5/8] :bookmark: chore(release): v1.20.0-beta.2 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## [Version 1.20.0-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.20.0-beta.1...v1.20.0-beta.2) Released on **2023-11-17** #### ✨ Features - **misc**: Update MicrosoftSpeech api.
Improvements and Fixes #### What's improved * **misc**: Update MicrosoftSpeech api ([105e2d0](https://github.com/lobehub/lobe-tts/commit/105e2d0))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 25 +++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69388f7..271ebd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,31 @@ # Changelog +## [Version 1.20.0-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.20.0-beta.1...v1.20.0-beta.2) + +Released on **2023-11-17** + +#### ✨ Features + +- **misc**: Update MicrosoftSpeech api. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Update MicrosoftSpeech api ([105e2d0](https://github.com/lobehub/lobe-tts/commit/105e2d0)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ## [Version 1.20.0-beta.1](https://github.com/lobehub/lobe-tts/compare/v1.19.1...v1.20.0-beta.1) Released on **2023-11-17** diff --git a/package.json b/package.json index a7e91b9..1888c9e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/tts", - "version": "1.20.0-beta.1", + "version": "1.20.0-beta.2", "description": "A high-quality & reliable TTS React Hooks library", "homepage": "https://github.com/lobehub/lobe-tts", "bugs": { From 1a14f4b49a8276e5d0579bdc0ce84302248aa647 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 17 Nov 2023 22:45:11 +0800 Subject: [PATCH 6/8] :sparkles: feat: update OpenAITTS api --- docs/api/edge-speech-tts.zh-CN.md | 41 +++++----- docs/api/microsoft-speech-tts.zh-CN.md | 102 +++++++++++++++++++++++++ docs/api/openai-tts.zh-CN.md | 88 +++++++++++++++++++++ src/core/OpenAITTS/index.ts | 22 ++++-- src/react/useOpenAITTS/index.ts | 2 +- 5 files changed, 224 insertions(+), 31 deletions(-) create mode 100644 docs/api/microsoft-speech-tts.zh-CN.md create mode 100644 docs/api/openai-tts.zh-CN.md diff --git a/docs/api/edge-speech-tts.zh-CN.md b/docs/api/edge-speech-tts.zh-CN.md index a05c210..e7dfb4b 100644 --- a/docs/api/edge-speech-tts.zh-CN.md +++ b/docs/api/edge-speech-tts.zh-CN.md @@ -1,26 +1,28 @@ --- group: TTS title: EdgeSpeechTTS +apiHeader: + pkg: '@lobehub/tts' --- -# EdgeSpeechTTS +`EdgeSpeechTTS` 是一个基于 Edge 语音服务的文本转语音方法类。 -## `constructor(options: EdgeSpeechAPI & { locale?: string }): EdgeSpeechTTS` +该类支持将文本转换为语音,并提供了一系列方法来获取语音选项,创建语音合成请求。 -`EdgeSpeechTTS` 类是一个用于将文本转换为语音的工具,它可以在边缘运行时环境中使用。该类提供了一系列方法来获取语音选项,创建语音合成请求,并处理返回的音频数据。 +```ts +constructor(options: EdgeSpeechAPI & { locale?: string }): EdgeSpeechTTS +``` -### 参数 +## 参数 - `options`: 对象,可选。 - - `backendUrl`: 字符串,指定后端服务的 URL。如果提供,将使用此 URL 发送请求。 + - `serviceUrl`: 字符串,指定 Edge 语音服务的 URL。如果提供,将使用此 URL 发送请求。 - `locale`: 字符串,指定要使用的语音区域设置。如果提供,将用于过滤可用语音列表。 -### 示例 - -使用 Bun 直接运行 `EdgeSpeechTTS`: +## 示例 ```js -// bun index.js +// index.js import { EdgeSpeechTTS } from '@lobehub/tts'; import { Buffer } from 'buffer'; import fs from 'fs'; @@ -46,9 +48,13 @@ const mp3Buffer = Buffer.from(await response.arrayBuffer()); fs.writeFileSync(speechFile, mp3Buffer); ``` -在此示例中,首先实例化了 `EdgeSpeechTTS` 类,并指定了后端服务的 URL 和语音区域设置。然后创建了一个包含文本和语音选项的请求负载。最后,通过调用 `create` 方法并传入负载来合成语音。如果合成成功,将返回一个包含音频数据的 `AudioBuffer` 对象。如果出现错误,将捕获并处理。 +使用 Bun 运行: -在 Node.js 中运行 +```shell +$ bun index.js +``` + +在 Node.js 中运行: 由于 Nodejs 环境缺少 `WebSocket` 实例,所以我们需要 polyfill WebSocket。通过引入 ws 包即可。 @@ -59,11 +65,6 @@ import WebSocket from 'ws'; global.WebSocket = WebSocket; ``` -## 属性 - -- `locale`: 字符串,表示实例化时指定的语音区域设置。 -- `BACKEND_URL`: 字符串,表示后端服务的 URL。 - ## 静态属性 - `localeOptions`: 获取所有支持的语音区域选项。 @@ -75,13 +76,9 @@ global.WebSocket = WebSocket; ### `voiceOptions` -获取当前实例的语音选项,这些选项基于实例化时指定的 `locale`。 - -#### 返回值 - -返回一个包含当前可用语音选项的对象。 +获取当前实例的语音选项,这些选项基于实例化时指定的 `locale`。 返回一个包含当前可用语音选项的对象。 -### `create(payload: EdgeSpeechPayload): Promise` +### `createAudio(payload: EdgeSpeechPayload): Promise` 使用给定的请求负载创建语音合成。 diff --git a/docs/api/microsoft-speech-tts.zh-CN.md b/docs/api/microsoft-speech-tts.zh-CN.md new file mode 100644 index 0000000..f494f90 --- /dev/null +++ b/docs/api/microsoft-speech-tts.zh-CN.md @@ -0,0 +1,102 @@ +--- +group: TTS +title: MicrosoftSpeechTTS +apiHeader: + pkg: '@lobehub/tts' +--- + +`MicrosoftSpeechTTS` 是一个基于 Microsoft 语音服务的文本转语音方法类。 + +该类支持将文本转换为语音,并提供了一系列方法来获取语音选项,创建语音合成请求。 + +```ts +constructor(options: MicrosoftSpeechAPI & { locale?: string }): MicrosoftSpeechTTS +``` + +## 参数 + +- `options`: 对象,可选。 + - `serviceUrl`: 字符串,指定 Microsoft 语音服务的 URL。如果提供,将使用此 URL 发送请求。 + - `locale`: 字符串,指定要使用的语音区域设置。如果提供,将用于过滤可用语音列表。 + +## 示例 + +```js +// index.js +import { MicrosoftSpeechTTS } from '@lobehub/tts'; + +// 实例化 MicrosoftSpeechTTS +const tts = new MicrosoftSpeechTTS({ locale: 'zh-CN' }); + +// 创建语音合成请求负载 +const payload: MicrosoftSpeechPayload = { + input: '这是一段语音演示', + options: { + voice: 'yue-CN-XiaoMinNeural', + style: 'embarrassed', + }, +}; + +const speechFile = path.resolve('./speech.mp3'); + +// 调用 create 方法来合成语音 +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); + +fs.writeFileSync(speechFile, mp3Buffer); +``` + +使用 Bun 运行: + +```shell +$ bun index.js +``` + +在 Node.js 中运行: + +由于 Nodejs 环境缺少 `WebSocket` 实例,所以我们需要 polyfill WebSocket。通过引入 ws 包即可。 + +```js +// 在文件顶部引入 +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` + +## 静态属性 + +- `localeOptions`: 获取所有支持的语音区域选项。 +- `voiceList`: 包含所有可用语音的列表。 +- `voiceName`: 包含所有语音名称的对象。 +- `styleList`: 包含所有可用语音风格的列表。 +- `createRequest`: 用于创建语音合成请求的静态方法。 + +## 方法 + +### `voiceOptions` + +获取当前实例的语音选项,这些选项基于实例化时指定的 `locale`。 返回一个包含当前可用语音选项的对象。 + +### `create(payload: MicrosoftSpeechPayload): Promise` + +使用给定的请求负载创建语音合成。 + +#### 参数 + +- `payload`: `MicrosoftSpeechPayload` 类型,包含语音合成请求的必要信息。 + +#### 返回值 + +返回一个 `Promise`,该 `Promise` 解析为 `Response` 对象,包含合成的语音数据。 + +### `createAudio(payload: MicrosoftSpeechPayload): Promise` + +使用给定的请求负载创建语音合成,并将其转换为 `AudioBuffer` 对象。 + +#### 参数 + +- `payload`: `MicrosoftSpeechPayload` 类型,包含语音合成请求的必要信息。 + +#### 返回值 + +返回一个 `Promise`,该 `Promise` 解析为 `AudioBuffer` 对象,包含合成的音频数据。 diff --git a/docs/api/openai-tts.zh-CN.md b/docs/api/openai-tts.zh-CN.md new file mode 100644 index 0000000..50acd02 --- /dev/null +++ b/docs/api/openai-tts.zh-CN.md @@ -0,0 +1,88 @@ +--- +group: TTS +title: OpenAITTS +apiHeader: + pkg: '@lobehub/tts' +--- + +`OpenAITTS` 是一个基于 OpenAI 语音服务的文本转语音方法类。 + +该类支持将文本转换为语音,并提供了一系列方法来获取语音选项,创建语音合成请求。 + +```ts +constructor(options: OpenAITTSAPI): OpenAITTS +``` + +## 参数 + +- `options`: 对象,可选。 + - `OPENAI_PROXY_URL`: 字符串,指定 OpenAI 代理 URL。如果提供,将使用此 URL 发送请求。 + - `OPENAI_API_KEY`: 字符串,指定 OpenAI API 密钥。如果提供,将用于身份验证。 + - `serviceUrl`: 字符串,指定要使用的 OpenAI 语音服务的 URL。如果提供,将用于发送请求。 + +## 示例 + +```js +// index.js +import { OpenAITTS } from '@lobehub/tts'; +import { Buffer } from 'buffer'; +import fs from 'fs'; +import path from 'path'; + +// 实例化 OpenAITTS +const tts = new OpenAITTS({ OPENAI_API_KEY: 'your-api-key' }); + +// 创建语音合成请求负载 +const payload = { + input: 'This is a voice synthesis demo', + options: { + model: 'tts-1', + voice: 'alloy', + }, +}; + +const speechFile = path.resolve('./speech.mp3'); + +// 调用 create 方法来合成语音 +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); + +fs.writeFileSync(speechFile, mp3Buffer); +``` + +使用 Bun 运行: + +```shell +$ bun index.js +``` + +在 Node.js 中运行: + +```js +// 在文件顶部引入 +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` + +## 静态属性 + +- `voiceList`: 包含所有可用语音的列表。 + +## 方法 + +### `voiceOptions` + +获取当前实例的语音选项,这些选项基于实例化时指定的 `serviceUrl`。 返回一个包含当前可用语音选项的对象。 + +### `createAudio(payload: OpenAITTSPayload): Promise` + +使用给定的请求负载创建语音合成。 + +#### 参数 + +- `payload`: `OpenAITTSPayload` 类型,包含语音合成请求的必要信息。 + +#### 返回值 + +返回一个 `Promise`,该 `Promise` 解析为 `AudioBuffer` 对象,包含合成的音频数据。 diff --git a/src/core/OpenAITTS/index.ts b/src/core/OpenAITTS/index.ts index b62ef0c..b81cff7 100644 --- a/src/core/OpenAITTS/index.ts +++ b/src/core/OpenAITTS/index.ts @@ -25,9 +25,9 @@ export interface OpenAITTSPayload { } export interface OpenAITTSAPI { - apiKey?: string; - backendUrl?: string; - baseUrl?: string; + OPENAI_API_KEY?: string; + OPENAI_PROXY_URL?: string; + serviceUrl?: string; } export class OpenAITTS { @@ -35,10 +35,10 @@ export class OpenAITTS { private OPENAI_API_KEY: string | undefined; private BACKEND_URL: string | undefined; - constructor({ baseUrl, apiKey, backendUrl }: OpenAITTSAPI = {}) { - this.OPENAI_BASE_URL = baseUrl || OPENAI_BASE_URL; - this.OPENAI_API_KEY = apiKey; - this.BACKEND_URL = backendUrl; + constructor({ OPENAI_PROXY_URL, OPENAI_API_KEY, serviceUrl }: OpenAITTSAPI = {}) { + this.OPENAI_BASE_URL = OPENAI_PROXY_URL || OPENAI_BASE_URL; + this.OPENAI_API_KEY = OPENAI_API_KEY; + this.BACKEND_URL = serviceUrl; } get voiceOptions() { @@ -65,13 +65,19 @@ export class OpenAITTS { }); }; - create = async (payload: OpenAITTSPayload): Promise => { + create = async (payload: OpenAITTSPayload): Promise => { const response = await this.fetch(payload); if (!response.ok) { throw new Error('Network response was not ok'); } + return response; + }; + + createAudio = async (payload: OpenAITTSPayload): Promise => { + const response = await this.create(payload); + const arrayBuffer = await response.arrayBuffer(); return await arrayBufferConvert(arrayBuffer); }; diff --git a/src/react/useOpenAITTS/index.ts b/src/react/useOpenAITTS/index.ts index 0b10273..eb0fd3c 100644 --- a/src/react/useOpenAITTS/index.ts +++ b/src/react/useOpenAITTS/index.ts @@ -16,7 +16,7 @@ export const useOpenAITTS = (defaultText: string, config: OpenAITTSOptions) => { (segmentText: string) => { const instance = new OpenAITTS(api); - return instance.create({ input: segmentText, options }); + return instance.createAudio({ input: segmentText, options }); }, swrConfig, ); From 47b7aeb3e906122e945736a8cb71c84ed7d911d0 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 17 Nov 2023 14:46:23 +0000 Subject: [PATCH 7/8] :bookmark: chore(release): v1.20.0-beta.3 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## [Version 1.20.0-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.20.0-beta.2...v1.20.0-beta.3) Released on **2023-11-17** #### ✨ Features - **misc**: Update OpenAITTS api.
Improvements and Fixes #### What's improved * **misc**: Update OpenAITTS api ([1a14f4b](https://github.com/lobehub/lobe-tts/commit/1a14f4b))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 25 +++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 271ebd7..f644d27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,31 @@ # Changelog +## [Version 1.20.0-beta.3](https://github.com/lobehub/lobe-tts/compare/v1.20.0-beta.2...v1.20.0-beta.3) + +Released on **2023-11-17** + +#### ✨ Features + +- **misc**: Update OpenAITTS api. + +
+ +
+Improvements and Fixes + +#### What's improved + +- **misc**: Update OpenAITTS api ([1a14f4b](https://github.com/lobehub/lobe-tts/commit/1a14f4b)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ## [Version 1.20.0-beta.2](https://github.com/lobehub/lobe-tts/compare/v1.20.0-beta.1...v1.20.0-beta.2) Released on **2023-11-17** diff --git a/package.json b/package.json index 1888c9e..6a8ffc6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/tts", - "version": "1.20.0-beta.2", + "version": "1.20.0-beta.3", "description": "A high-quality & reliable TTS React Hooks library", "homepage": "https://github.com/lobehub/lobe-tts", "bugs": { From 0679c9bf567df92dc0d25521fea167c577df464d Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 17 Nov 2023 23:33:25 +0800 Subject: [PATCH 8/8] :memo: docs: update docs --- docs/api/edge-speech-tts.md | 91 ++++++++++++++++++++ docs/api/index.zh-CN.md | 11 ++- docs/api/microsoft-speech-tts.md | 103 +++++++++++++++++++++++ docs/api/openai-tts.md | 88 +++++++++++++++++++ src/react/AudioPlayer/index.zh-CN.md | 11 +++ src/react/AudioVisualizer/index.zh-CN.md | 11 +++ 6 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 docs/api/edge-speech-tts.md create mode 100644 docs/api/microsoft-speech-tts.md create mode 100644 docs/api/openai-tts.md create mode 100644 src/react/AudioPlayer/index.zh-CN.md create mode 100644 src/react/AudioVisualizer/index.zh-CN.md diff --git a/docs/api/edge-speech-tts.md b/docs/api/edge-speech-tts.md new file mode 100644 index 0000000..4f8b751 --- /dev/null +++ b/docs/api/edge-speech-tts.md @@ -0,0 +1,91 @@ +--- +group: TTS +title: EdgeSpeechTTS +apiHeader: + pkg: '@lobehub/tts' +--- + +`EdgeSpeechTTS` is a class for text-to-speech conversion based on Edge Speech Service. + +This class supports converting text to speech and provides a set of methods to retrieve voice options and create speech synthesis requests. + +```ts +constructor(options: EdgeSpeechAPI & { locale?: string }): EdgeSpeechTTS +``` + +## Parameters + +- `options`: Object, optional. + - `serviceUrl`: String, specifies the URL of the Edge Speech Service. If provided, requests will be sent to this URL. + - `locale`: String, specifies the voice locale to use. If provided, it will be used to filter the available voice list. + +## Examples + +```js +// index.js +import { EdgeSpeechTTS } from '@lobehub/tts'; +import { Buffer } from 'buffer'; +import fs from 'fs'; +import path from 'path'; + +// Instantiate EdgeSpeechTTS +const tts = new EdgeSpeechTTS({ locale: 'en-US' }); + +// Create speech synthesis request payload +const payload = { + input: 'This is a speech demonstration', + options: { + voice: 'en-US-GuyNeural', + }, +}; + +const speechFile = path.resolve('./speech.mp3'); + +// Call create method to synthesize speech +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); + +fs.writeFileSync(speechFile, mp3Buffer); +``` + +Run with Bun: + +```shell +$ bun index.js +``` + +Run in Node.js: + +As the Node.js environment lacks the `WebSocket` instance, we need to polyfill WebSocket. This can be done by importing the ws package. + +```js +// Import at the top of the file +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` + +## Static Properties + +- `localeOptions`: Get all supported voice locale options. +- `voiceList`: List of all available voices. +- `voiceName`: Object containing all voice names. +- `createRequest`: Static method used to create speech synthesis requests. + +## Methods + +### `voiceOptions` + +Get the voice options for the current instance, based on the `locale` specified during instantiation. Returns an object containing the currently available voice options. + +### `createAudio(payload: EdgeSpeechPayload): Promise` + +Create speech synthesis using the given request payload. + +#### Parameters + +- `payload`: `EdgeSpeechPayload` type, containing the necessary information for the speech synthesis request. + +#### Return Value + +Returns a `Promise` that resolves to an `AudioBuffer` object containing the synthesized audio data. diff --git a/docs/api/index.zh-CN.md b/docs/api/index.zh-CN.md index ef9dd62..60baeb4 100644 --- a/docs/api/index.zh-CN.md +++ b/docs/api/index.zh-CN.md @@ -1,5 +1,14 @@ -# API Reference +--- +title: API Reference +nav: + title: API + order: 10 +--- + +# API 参考指南 ## TTS - [EdgeSpeechTTS](./edge-speech-tts.zh-CN.md) +- [MicrosoftSpeechTTS](microsoft-speech-tts.zh-CN.md) +- [OpenaiTTS](openai-tts.zh-CN.md) diff --git a/docs/api/microsoft-speech-tts.md b/docs/api/microsoft-speech-tts.md new file mode 100644 index 0000000..06f0907 --- /dev/null +++ b/docs/api/microsoft-speech-tts.md @@ -0,0 +1,103 @@ +--- +group: TTS +title: MicrosoftSpeechTTS +apiHeader: + pkg: '@lobehub/tts' +--- + +`MicrosoftSpeechTTS` is a class for text-to-speech using Microsoft Speech Services. + +This class supports converting text to speech and provides a series of methods to retrieve speech options and create speech synthesis requests. + +```ts +constructor(options: MicrosoftSpeechAPI & { locale?: string }): MicrosoftSpeechTTS +``` + +## Parameters + +- `options`: Object, optional. + - `backendUrl`: String, specifies the URL of Microsoft Speech Services. If provided, requests will be sent to this URL. + - `locale`: String, specifies the language region to use. If provided, it will be used to filter the available voices. + +## Examples + +```js +// index.js +// index.js +import { MicrosoftSpeechTTS } from '@lobehub/tts'; + +// get MicrosoftSpeechTTS instance +const tts = new MicrosoftSpeechTTS({ locale: 'zh-CN' }); + +// create payload +const payload: MicrosoftSpeechPayload = { + input: 'this is a message', + options: { + voice: 'en-US-JacobNeural', + style: 'embarrassed', + }, +}; + +const speechFile = path.resolve('./speech.mp3'); + +// create speech +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); + +fs.writeFileSync(speechFile, mp3Buffer); +``` + +Run with Bun: + +```shell +$ bun index.js +``` + +Run in Node.js: + +Due to the lack of `WebSocket` instance in Nodejs environment, we need to polyfill WebSocket. By importing the ws package. + +```js +// import at the top of the file +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` + +## Static Properties + +- `localeOptions`: Get all supported language region options. +- `voiceList`: List of all available voices. +- `voiceName`: Object containing all voice names. +- `styleList`: List of all available voice styles. +- `createRequest`: Static method for creating speech synthesis requests. + +## Methods + +### `voiceOptions` + +Get the voice options for the current instance, based on the `locale` specified during instantiation. Returns an object containing the current available voice options. + +### `create(payload: MicrosoftSpeechPayload): Promise` + +Create speech synthesis using the given request payload. + +#### Parameters + +- `payload`: `MicrosoftSpeechPayload` type, containing the necessary information for the speech synthesis request. + +#### Return Value + +Returns a `Promise` that resolves to a `Response` object containing the synthesized speech data. + +### `createAudio(payload: MicrosoftSpeechPayload): Promise` + +Create speech synthesis using the given request payload and convert it to an `AudioBuffer` object. + +#### Parameters + +- `payload`: `MicrosoftSpeechPayload` type, containing the necessary information for the speech synthesis request. + +#### Return Value + +Returns a `Promise` that resolves to an `AudioBuffer` object containing the synthesized audio data. diff --git a/docs/api/openai-tts.md b/docs/api/openai-tts.md new file mode 100644 index 0000000..95c24bc --- /dev/null +++ b/docs/api/openai-tts.md @@ -0,0 +1,88 @@ +--- +group: TTS +title: OpenAITTS +apiHeader: + pkg: '@lobehub/tts' +--- + +`OpenAITTS` is a class for text-to-speech using the OpenAI voice service. + +This class supports converting text into speech and provides a set of methods for getting voice options and creating speech synthesis requests. + +```ts +constructor(options: OpenAITTSAPI): OpenAITTS +``` + +## Parameters + +- `options`: Object, optional. + - `OPENAI_PROXY_URL`: String, specifies the OpenAI proxy URL. If provided, requests will be sent to this URL. + - `OPENAI_API_KEY`: String, specifies the OpenAI API key. If provided, it will be used for authentication. + - `serviceUrl`: String, specifies the URL of the OpenAI voice service to use. If provided, it will be used for sending requests. + +## Examples + +```js +// index.js +import { OpenAITTS } from '@lobehub/tts'; +import { Buffer } from 'buffer'; +import fs from 'fs'; +import path from 'path'; + +// Instantiate OpenAITTS +const tts = new OpenAITTS({ OPENAI_API_KEY: 'your-api-key' }); + +// Create speech synthesis request payload +const payload = { + input: 'This is a voice synthesis demo', + options: { + model: 'tts-1', + voice: 'alloy', + }, +}; + +const speechFile = path.resolve('./speech.mp3'); + +// Call create method to synthesize speech +const response = await tts.create(payload); +const mp3Buffer = Buffer.from(await response.arrayBuffer()); + +fs.writeFileSync(speechFile, mp3Buffer); +``` + +Run with Bun: + +```shell +$ bun index.js +``` + +In Node.js: + +```js +// Import at the top of the file +import WebSocket from 'ws'; + +global.WebSocket = WebSocket; +``` + +## Static Properties + +- `voiceList`: A list of all available voices. + +## Methods + +### `voiceOptions` + +Get the voice options for the current instance based on the `serviceUrl` specified during instantiation. Returns an object containing the current available voice options. + +### `createAudio(payload: OpenAITTSPayload): Promise` + +Create speech synthesis using the given request payload. + +#### Parameters + +- `payload`: `OpenAITTSPayload` type, contains the necessary information for the speech synthesis request. + +#### Returns + +Returns a `Promise` that resolves to an `AudioBuffer` object containing the synthesized audio data. diff --git a/src/react/AudioPlayer/index.zh-CN.md b/src/react/AudioPlayer/index.zh-CN.md new file mode 100644 index 0000000..9a8e2f6 --- /dev/null +++ b/src/react/AudioPlayer/index.zh-CN.md @@ -0,0 +1,11 @@ +--- +title: AudioPlayer +group: UI +nav: 组件 +apiHeader: + pkg: '@lobehub/tts/react' +--- + +## defualt + + diff --git a/src/react/AudioVisualizer/index.zh-CN.md b/src/react/AudioVisualizer/index.zh-CN.md new file mode 100644 index 0000000..5e47986 --- /dev/null +++ b/src/react/AudioVisualizer/index.zh-CN.md @@ -0,0 +1,11 @@ +--- +title: AudioVisualizer +group: UI +nav: 组件 +apiHeader: + pkg: '@lobehub/tts/react' +--- + +## defualt + +