Skip to content

Commit 458d971

Browse files
Fedir Zadniprovskyifedirz
Fedir Zadniprovskyi
authored andcommitted
docs: add js example
Addresses #26
1 parent 08cf0b0 commit 458d971

File tree

1 file changed

+159
-0
lines changed

1 file changed

+159
-0
lines changed

examples/javascript/index.js

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/**
2+
* Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
3+
*/
4+
import fs from 'fs';
5+
import WebSocket from 'ws';
6+
import fetch from 'node-fetch';
7+
import FormData from 'form-data';
8+
import path from 'path';
9+
import ffmpeg from 'fluent-ffmpeg';
10+
import dotenv from 'dotenv';
11+
12+
dotenv.config();
13+
14+
const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg';
15+
ffmpeg.setFfmpegPath(ffmpegPath);
16+
17+
/**
18+
* Transcribe an audio file using the HTTP endpoint.
19+
* Supported file types include wav, mp3, webm, and other types supported by the OpenAI API.
20+
* I have tested with these three types.
21+
*
22+
* @param {string} filePath - Path to the audio file
23+
* @param {string} model - Model name
24+
* @param {string} language - Language code
25+
* @param {string} responseFormat - Response format
26+
* @param {string} temperature - Temperature setting
27+
*/
28+
async function transcribeFile(filePath, model, language, responseFormat, temperature) {
29+
const formData = new FormData();
30+
formData.append('file', fs.createReadStream(filePath));
31+
formData.append('model', model);
32+
formData.append('language', language);
33+
formData.append('response_format', responseFormat);
34+
formData.append('temperature', temperature);
35+
36+
const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, {
37+
method: 'POST',
38+
body: formData,
39+
});
40+
41+
const transcription = await response.json();
42+
console.log('Transcription Response:', transcription);
43+
}
44+
45+
/**
46+
* Translate an audio file using the HTTP endpoint.
47+
* Only English is supported for translation.
48+
* Currently, I am using GLM-4-9b-int8 to translate various voices.
49+
* I am not sure if the author can add an endpoint for custom API+Key translation.
50+
* I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment.
51+
*
52+
* @param {string} filePath - Path to the audio file
53+
* @param {string} model - Model name
54+
* @param {string} responseFormat - Response format
55+
* @param {string} temperature - Temperature setting
56+
*/
57+
async function translateFile(filePath, model, responseFormat, temperature) {
58+
const formData = new FormData();
59+
formData.append('file', fs.createReadStream(filePath));
60+
formData.append('model', model);
61+
formData.append('response_format', responseFormat);
62+
formData.append('temperature', temperature);
63+
64+
const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, {
65+
method: 'POST',
66+
body: formData,
67+
});
68+
69+
const translation = await response.json();
70+
console.log('Translation Response:', translation);
71+
}
72+
73+
/**
74+
* Send audio data over WebSocket for transcription.
75+
* Currently, the supported file type for transcription is PCM.
76+
* I am not sure if other types are supported.
77+
*
78+
* @param {string} filePath - Path to the audio file
79+
* @param {string} model - Model name
80+
* @param {string} language - Language code
81+
* @param {string} responseFormat - Response format
82+
* @param {string} temperature - Temperature setting
83+
*/
84+
async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) {
85+
const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`;
86+
const ws = new WebSocket(wsUrl);
87+
88+
ws.on('open', async () => {
89+
const audioBuffer = fs.readFileSync(filePath);
90+
ws.send(audioBuffer);
91+
});
92+
93+
ws.on('message', (message) => {
94+
const response = JSON.parse(message);
95+
console.log('WebSocket Response:', response);
96+
});
97+
98+
ws.on('close', () => {
99+
console.log('WebSocket connection closed');
100+
});
101+
102+
ws.on('error', (error) => {
103+
console.error('WebSocket error:', error);
104+
});
105+
}
106+
107+
/**
108+
* Convert audio file to PCM format.
109+
*
110+
* @param {string} filePath - Path to the audio file
111+
* @returns {string} - Path to the converted PCM file
112+
*/
113+
async function convertToPcm(filePath) {
114+
const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm');
115+
116+
await new Promise((resolve, reject) => {
117+
ffmpeg(filePath)
118+
.audioChannels(1)
119+
.audioFrequency(16000)
120+
.audioCodec('pcm_s16le')
121+
.toFormat('s16le')
122+
.on('end', () => {
123+
console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`);
124+
resolve(pcmFilePath);
125+
})
126+
.on('error', (error) => {
127+
console.error(`Error converting audio to PCM: ${error.message}`);
128+
reject(error);
129+
})
130+
.save(pcmFilePath);
131+
});
132+
133+
return pcmFilePath;
134+
}
135+
136+
async function main() {
137+
const model = 'Systran/faster-whisper-large-v3';
138+
const language = 'en';
139+
const responseFormat = 'json';
140+
const temperature = '0';
141+
const filePath = './path/to/your/audio.webm'; // Replace with the actual file path
142+
143+
// Convert the audio file to PCM format
144+
const pcmFilePath = await convertToPcm(filePath);
145+
146+
// Transcribe the audio file using the HTTP endpoint
147+
await transcribeFile(pcmFilePath, model, language, responseFormat, temperature);
148+
149+
// Translate the audio file using the HTTP endpoint
150+
await translateFile(pcmFilePath, model, responseFormat, temperature);
151+
152+
// Transcribe the audio file using the WebSocket endpoint
153+
await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature);
154+
}
155+
156+
// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x.
157+
main().catch(console.error);
158+
159+
// Project URL: https://github.com/Gan-Xing/whisper

0 commit comments

Comments
 (0)