Skip to content

Commit 3dfcf6b

Browse files
✨ feat: Add openai tts
1 parent a115c3b commit 3dfcf6b

File tree

30 files changed

+301
-36
lines changed

30 files changed

+301
-36
lines changed

.eslintignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ __test__
2424
# production
2525
dist
2626
es
27-
lib
2827
logs
2928

3029
# misc

.prettierignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ __snapshots__
3535
# production
3636
dist
3737
es
38-
lib
3938
logs
4039

4140
# umi

api/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import qs from 'query-string';
22

33
import cors from '../lib/cors';
4+
import { fetchMicrosoftSpeech } from '../lib/fetchMicrosoftSpeech';
45
import { SsmlOptions } from '../lib/genSSML';
5-
import { postMicrosoftSpeech } from '../lib/postMicrosoftSpeech';
66

77
export const config = {
88
runtime: 'edge',
@@ -11,7 +11,7 @@ export const config = {
1111
export default async (req: Request) => {
1212
const { text, ...options }: SsmlOptions & { text: string } = qs.parseUrl(req.url).query as any;
1313

14-
const res = await fetch(...postMicrosoftSpeech(text, options));
14+
const res = await fetchMicrosoftSpeech(text, options);
1515

1616
const newResponse = new Response(res.body, res);
1717

lib/postMicrosoftSpeech.ts renamed to lib/fetchMicrosoftSpeech.ts

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { type SsmlOptions, genSSML } from './genSSML';
55
const API =
66
'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';
77

8-
export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, any] => {
8+
export const fetchMicrosoftSpeech = async (text: string, options: SsmlOptions) => {
99
const data = JSON.stringify({
1010
offsetInPlainText: 0,
1111
properties: {
@@ -15,7 +15,7 @@ export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, a
1515
ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
1616
});
1717

18-
const DEFAULT_HEADERS = {
18+
const DEFAULT_HEADERS = new Headers({
1919
'accept': '*/*',
2020
'accept-language': 'zh-CN,zh;q=0.9',
2121
'authority': 'southeastasia.api.speech.microsoft.com',
@@ -30,15 +30,13 @@ export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, a
3030
'sec-fetch-site': 'same-site',
3131
'user-agent':
3232
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
33-
};
33+
});
3434

35-
return [
36-
API,
37-
{
38-
body: data,
39-
headers: DEFAULT_HEADERS,
40-
method: 'POST',
41-
responseType: 'arraybuffer',
42-
},
43-
];
35+
return await fetch(API, {
36+
body: data,
37+
headers: DEFAULT_HEADERS,
38+
method: 'POST',
39+
// @ts-ignore
40+
responseType: 'arraybuffer',
41+
});
4442
};

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"docs:dev": "dumi dev",
2828
"doctor": "father doctor",
2929
"postinstall": "npm run setup",
30-
"lint": "eslint \"{src,tests}/**/*.{js,jsx,ts,tsx}\" --fix",
30+
"lint": "eslint \"{src,api,lib}/**/*.{js,jsx,ts,tsx}\" --fix",
3131
"lint:md": "remark . --quiet --frail --output",
3232
"prepare": "husky install",
3333
"prepublishOnly": "npm run build",
@@ -65,6 +65,7 @@
6565
"query-string": "^8",
6666
"ssml-document": "^1",
6767
"swr": "^2",
68+
"url-join": "^5.0.0",
6869
"uuid": "^9"
6970
},
7071
"devDependencies": {

src/const/api.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import urlJoin from 'url-join';
2+
3+
export const MICROSOFT_SPEECH_PROXY_URL = process.env.MICROSOFT_SPEECH_PROXY_URL || '';
4+
export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || '';
5+
export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || '';
6+
export const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
7+
export const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL || 'https://api.openai.com/v1';
8+
export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
9+
export const OPENAI_STT_URL = (api: string) =>
10+
urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');

src/data/openaiVoiceList.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
["alloy", "echo", "fable", "onyx", "nova", "shimmer"]

src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
export { fetchAzureSpeech } from './services/fetchAzureSpeech';
2+
export { fetchEdgeSpeech } from './services/fetchEdgeSpeech';
3+
export { fetchMicrosoftSpeech } from './services/fetchMicrosoftSpeech';
4+
export { fetchOpenaiSTT } from './services/fetchOpenaiSTT';
5+
export { fetchOpenaiTTS } from './services/fetchOpenaiTTS';
16
export { useAzureSpeech } from './useAzureSpeech';
27
export { useEdgeSpeech } from './useEdgeSpeech';
38
export { useMicrosoftSpeech } from './useMicrosoftSpeech';
9+
export { useOpenaiTTS } from './useOpenaiTTS';
410
export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition';
511
export { useSpeechRecognition } from './useSpeechRecognition/useSpeechRecognition';
612
export { useSpeechSynthes } from './useSpeechSynthes';
713
export {
814
getAzureVoiceList,
915
getEdgeVoiceList,
16+
getOpenaiVoiceList,
1017
getSpeechSynthesVoiceList,
1118
} from './utils/getVoiceList';

src/services/postAzureSpeech.ts renamed to src/services/fetchAzureSpeech.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import {
88
SpeechSynthesizer,
99
} from 'microsoft-cognitiveservices-speech-sdk';
1010

11+
import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api';
12+
1113
import { type SsmlOptions, genSSML } from '../utils/genSSML';
1214

1315
export interface AzureSpeechOptions extends SsmlOptions {
@@ -18,12 +20,12 @@ export interface AzureSpeechOptions extends SsmlOptions {
1820
}
1921

2022
// 纯文本生成语音
21-
export const postAzureSpeech = async (
23+
export const fetchAzureSpeech = async (
2224
text: string,
2325
{ api, ...options }: AzureSpeechOptions,
2426
): Promise<AudioBufferSourceNode> => {
25-
const key = api.key || process.env.AZURE_SPEECH_KEY || '';
26-
const region = api.key || process.env.AZURE_SPEECH_REGION || '';
27+
const key = api.key || AZURE_SPEECH_KEY;
28+
const region = api.key || AZURE_SPEECH_REGION;
2729
const speechConfig = SpeechConfig.fromSubscription(key, region);
2830
speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true');
2931
speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus;

src/services/postEdgeSpeech.ts renamed to src/services/fetchEdgeSpeech.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { getHeadersAndData } from '../utils/getHeadersAndData';
88
const API = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
99
const TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';
1010

11-
export const postEdgeSpeech = async (text: string, options: SsmlOptions) => {
11+
export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
1212
const connectId = uuidv4().replaceAll('-', '');
1313
const date = new Date().toString();
1414
const audioContext = new AudioContext();

src/services/postMicrosoftSpeech.ts renamed to src/services/fetchMicrosoftSpeech.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,28 @@
11
import qs from 'query-string';
22

3+
import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';
4+
35
import { type SsmlOptions } from '../utils/genSSML';
46

57
export interface MicrosoftSpeechOptions extends SsmlOptions {
68
api?: string;
79
}
810

9-
export const postMicrosoftSpeech = async (
11+
export const fetchMicrosoftSpeech = async (
1012
text: string,
1113
{ api, ...options }: MicrosoftSpeechOptions,
1214
): Promise<AudioBufferSourceNode> => {
1315
const response: Response = await fetch(
1416
qs.stringifyUrl({
1517
query: { text, ...options },
16-
url: api || process.env.MICROSOFT_SPEECH_PROXY_URL || '',
18+
url: api || MICROSOFT_SPEECH_PROXY_URL,
1719
}),
1820
);
1921

22+
if (!response.ok) {
23+
throw new Error('Network response was not ok');
24+
}
25+
2026
const audioData = await response.arrayBuffer();
2127
const audioContext = new AudioContext();
2228
const audioBufferSource = audioContext.createBufferSource();

src/services/fetchOpenaiSTT.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { v4 as uuidv4 } from 'uuid';
2+
3+
import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';
4+
5+
export interface OpenaiTtsOptions {
6+
api: {
7+
key: string;
8+
proxy: string;
9+
};
10+
model?: 'whisper-1';
11+
}
12+
13+
// 纯文本生成语音
14+
export const fetchOpenaiSTT = async (
15+
speech: Blob,
16+
{ api, model = 'whisper-1' }: OpenaiTtsOptions,
17+
): Promise<string> => {
18+
const key = api.key || OPENAI_API_KEY;
19+
const url = OPENAI_STT_URL(api.proxy);
20+
21+
const headers = new Headers({
22+
'Authorization': `Bearer ${key}`,
23+
'Content-Type': 'multipart/form-data',
24+
});
25+
26+
const body = new FormData();
27+
body.append('file', speech, `${uuidv4()}.webm`);
28+
body.append('model', model);
29+
30+
const response: Response = await fetch(url, { body, headers, method: 'POST' });
31+
32+
if (!response.ok) {
33+
throw new Error('Network response was not ok');
34+
}
35+
36+
const json = await response.json();
37+
38+
return json?.text;
39+
};

src/services/fetchOpenaiTTS.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';
2+
3+
import { type SsmlOptions } from '../utils/genSSML';
4+
5+
export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
6+
7+
export interface OpenaiTtsOptions extends SsmlOptions {
8+
api: {
9+
key: string;
10+
proxy: string;
11+
};
12+
model?: 'tts-1' | 'tts-1-hd';
13+
name: OpenaiVoice;
14+
}
15+
16+
// 纯文本生成语音
17+
export const fetchOpenaiTTS = async (
18+
text: string,
19+
{ api, model = 'tts-1', ...options }: OpenaiTtsOptions,
20+
): Promise<AudioBufferSourceNode> => {
21+
const key = api.key || OPENAI_API_KEY;
22+
const url = OPENAI_TTS_URL(api.proxy);
23+
24+
const headers = new Headers({
25+
'Authorization': `Bearer ${key}`,
26+
'Content-Type': 'application/json',
27+
});
28+
29+
const body = JSON.stringify({
30+
input: text,
31+
model,
32+
voice: options.name,
33+
});
34+
35+
const response: Response = await fetch(url, { body, headers, method: 'POST' });
36+
37+
if (!response.ok) {
38+
throw new Error('Network response was not ok');
39+
}
40+
41+
const audioData = await response.arrayBuffer();
42+
const audioContext = new AudioContext();
43+
const audioBufferSource = audioContext.createBufferSource();
44+
audioBufferSource.buffer = await audioContext.decodeAudioData(audioData);
45+
audioBufferSource.connect(audioContext.destination);
46+
return audioBufferSource;
47+
};

src/useAzureSpeech/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ title: useAzureSpeech
88

99
- ENV: `AZURE_SPEECH_KEY` `AZURE_SPEECH_REGION`
1010

11-
<code src="./demos/AzureSpeech.tsx" nopadding></code>
11+
<code src="./demos/index.tsx" nopadding></code>

src/useAzureSpeech/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { useState } from 'react';
22
import useSWR from 'swr';
33

4-
import { AzureSpeechOptions, postAzureSpeech } from '../services/postAzureSpeech';
4+
import { AzureSpeechOptions, fetchAzureSpeech } from '../services/fetchAzureSpeech';
55

66
export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
77
const [data, setDate] = useState<AudioBufferSourceNode>();
@@ -11,7 +11,7 @@ export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions)
1111

1212
const { isLoading } = useSWR(
1313
shouldFetch ? [options.name, text].join('-') : null,
14-
() => postAzureSpeech(text, options),
14+
() => fetchAzureSpeech(text, options),
1515
{
1616
onError: () => setShouldFetch(false),
1717
onSuccess: (audioBufferSource) => {

src/useEdgeSpeech/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ title: useEdgeSpeech
66

77
## hooks
88

9-
<code src="./demos/EdgeSpeech.tsx" nopadding></code>
9+
<code src="./demos/index.tsx" nopadding></code>

src/useEdgeSpeech/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { useState } from 'react';
22
import useSWR from 'swr';
33

4-
import { postEdgeSpeech } from '../services/postEdgeSpeech';
4+
import { fetchEdgeSpeech } from '../services/fetchEdgeSpeech';
55
import { SsmlOptions } from '../utils/genSSML';
66

77
export const useEdgeSpeech = (defaultText: string, options: SsmlOptions) => {
@@ -12,7 +12,7 @@ export const useEdgeSpeech = (defaultText: string, options: SsmlOptions) => {
1212

1313
const { isLoading } = useSWR(
1414
shouldFetch ? [options.name, text].join('-') : null,
15-
() => postEdgeSpeech(text, options),
15+
() => fetchEdgeSpeech(text, options),
1616
{
1717
onError: () => setShouldFetch(false),
1818
onSuccess: (audioBufferSource) => {

src/useMicrosoftSpeech/demos/MicrosoftSpeech.tsx renamed to src/useMicrosoftSpeech/demos/index.tsx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@ import { Flexbox } from 'react-layout-kit';
66

77
const defaultText = '这是一段使用 Microsoft Speech 的语音演示';
88

9-
const API = 'https://lobe-tts-preview.vercel.app/api/index';
109
export default () => {
1110
const store = useCreateStore();
1211
const options: any = useControls(
1312
{
1413
api: {
1514
label: 'MICROSOFT_SPEECH_PROXY_URL',
16-
value: API,
15+
value: '',
1716
},
1817
name: {
1918
options: getEdgeVoiceList(),

src/useMicrosoftSpeech/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ title: useMicrosoftSpeech
1111

1212
> ex: <https://lobe-tts-preview.vercel.app/api/index>
1313
14-
<code src="./demos/MicrosoftSpeech.tsx" nopadding></code>
14+
<code src="./demos/index.tsx" nopadding></code>

src/useMicrosoftSpeech/index.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import { useState } from 'react';
22
import useSWR from 'swr';
33

4-
import { type MicrosoftSpeechOptions, postMicrosoftSpeech } from '../services/postMicrosoftSpeech';
4+
import {
5+
type MicrosoftSpeechOptions,
6+
fetchMicrosoftSpeech,
7+
} from '../services/fetchMicrosoftSpeech';
58

69
export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeechOptions) => {
710
const [data, setDate] = useState<AudioBufferSourceNode>();
@@ -11,7 +14,7 @@ export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeech
1114

1215
const { isLoading } = useSWR(
1316
shouldFetch ? [options.name, text].join('-') : null,
14-
() => postMicrosoftSpeech(text, options),
17+
() => fetchMicrosoftSpeech(text, options),
1518
{
1619
onError: () => setShouldFetch(false),
1720
onSuccess: (audioBufferSource) => {

0 commit comments

Comments
 (0)