Skip to content

Commit

Permalink
Add tests, rework toFile piping, properly remove ids from queue
Browse files Browse the repository at this point in the history
  • Loading branch information
Migushthe2nd committed Feb 13, 2024
1 parent 6504dea commit 1a77502
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 60 deletions.
41 changes: 36 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,29 @@
"module": "./dist/index",
"main": "./dist/index",
"scripts": {
"dev": "yarn run build && ts-node test/test.ts",
"preinstall": "npx only-allow pnpm",
"dev": "yarn run build && ts-node src/test/test.ts",
"build": "tsc",
"prepublishOnly": "yarn run build",
"publish": "yarn publish --access=public"
"publish": "yarn publish --access=public",
"test": "jest",
"test:watch": "jest --watch",
"test:cov": "jest --coverage",
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
"test:e2e": "jest --config src/test/jest-e2e.json"
},
"engines": {
"node": ">=16.0.0"
},
"devDependencies": {
"@types/node": "^15.12.1",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.17",
"@types/ws": "^8.5.10",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"ts-node": "^10.9.1",
"typedoc": "^0.22.2",
"typescript": "^4.3.2"
"typedoc": "^0.25.8",
"typescript": "^5.3.3"
},
"dependencies": {
"axios": "^1.5.0",
Expand All @@ -30,6 +42,25 @@
"stream-browserify": "^3.0.0",
"ws": "^8.14.1"
},
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "src",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node",
"setupFilesAfterEnv": [],
"testTimeout": 15000
},
"files": [
"dist/"
]
Expand Down
100 changes: 100 additions & 0 deletions src/MsEdgeTTS.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import "jest"
import {MsEdgeTTS} from "./MsEdgeTTS"
import {OUTPUT_FORMAT} from "./OUTPUT_FORMAT"
import {readFileSync, mkdirSync} from "fs"
import {existsSync, unlinkSync} from "node:fs"
import {tmpdir} from "os"
import {join} from "path"
import {randomBytes} from "crypto"

describe("MsEdgeTTS", () => {
let tts: MsEdgeTTS
let tmpPath: string

beforeAll(() => {
tmpPath = join(tmpdir(), randomBytes(16).toString("hex"))
mkdirSync(tmpPath)
console.log(tmpPath)
})

beforeEach(async () => {
tts = new MsEdgeTTS(null, true)
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS)
})

it("should write audio to file", async () => {
const filePath = await tts.toFile(join(tmpPath, "./example_audio.webm"), "Hi, how are you?")
console.log("Done!", filePath)

expect(filePath).toBeDefined()
expect(filePath).toMatch(/example_audio.webm/)
expect(Object.keys(tts["_streams"]).length).toBe(0)
// have content
expect(readFileSync(filePath).length).toBeGreaterThan(0)
})

it("should handle multiple streams simultaneously", async () => {
const promises = []
for (let i = 0; i < 100; i++) {
const p = new Promise((resolve, reject) => {
const s = tts.toStream("Hi, how are you?")
s.on("close", () => {
console.log(`Stream ${i} ended`)
resolve("done")
})
s.on("data", (data) => {
console.log(`Stream ${i} data`, data.length)
})
s.on("error", (err) => {
console.log(`Stream ${i} error`, err)
reject(err)
})
})
promises.push(p)
}

await expect(Promise.all(promises)).resolves.toBeDefined()
}, 60000)

it("should not write to file if no data", async () => {
// mock _pushAudioData to do nothing, asif no data was received
tts["_pushAudioData"] = jest.fn()
await expect(() => tts.toFile(join(tmpPath, `./example_audio2.webm`), ""))
.rejects.toMatch("No audio")

expect(Object.keys(tts["_streams"]).length).toBe(0)
expect(existsSync("./example_audio2.webm")).toBe(false)
})

// it("should require setMetadata", async () => {
// tts["_ws"].close()
// tts["_ws"] = null
// tts.toFile(join(tmpPath, `./example_audio3.webm`), "Hi, how are you?")
// await expect(() => tts.toFile(join(tmpPath, `./example_audio3.webm`), "Hi, how are you?"))
// .rejects.toThrow("Speech synthesis not configured yet.")
// })

it("should return different audio when a pitch is applied", async () => {
const filePath = await tts.toFile(join(tmpPath, `./example_audio4.webm`), "Hi, how are you?", {pitch: "+10Hz"})
console.log("Done!", filePath)

expect(filePath).toBeDefined()
expect(filePath).toMatch(/example_audio4.webm/)
expect(Object.keys(tts["_streams"]).length).toBe(0)
// have content
expect(readFileSync(filePath).length).toBeGreaterThan(0)
})

afterEach(() => {
tts.close()
expect(Object.keys(tts["_streams"]).length).toBe(0)
})

afterAll(() => {
tts.close()
// if (existsSync(tmpPath)) {
// unlinkSync(tmpPath)
// }
});

})
81 changes: 43 additions & 38 deletions src/MsEdgeTTS.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import axios from "axios";
import * as stream from "stream";
import WebSocket from "isomorphic-ws";
import {Buffer} from "buffer";
import {randomBytes} from "crypto";
import {OUTPUT_FORMAT} from "./OUTPUT_FORMAT";
import {Readable} from "stream";
import * as fs from "fs";
import {Agent} from "http";
import {PITCH} from "./PITCH";
Expand Down Expand Up @@ -54,7 +54,7 @@ export class MsEdgeTTS {
private _voice;
private _voiceLocale;
private _outputFormat;
private _queue: { [key: string]: stream.Readable } = {};
private _streams: { [key: string]: Readable } = {};
private _startTime = 0;
private readonly _agent: Agent;

Expand Down Expand Up @@ -122,23 +122,19 @@ export class MsEdgeTTS {
// start of turn, ignore
} else if (message.includes("Path:turn.end")) {
// end of turn, close stream
this._queue[requestId].push(null);
this._streams[requestId].push(null);
} else if (message.includes("Path:response")) {
// context response, ignore
} else if (message.includes("Path:audio")) {
if (m.data instanceof ArrayBuffer) {
this.cacheAudioData(buffer, requestId)
} else {
this._log("UNKNOWN MESSAGE", message);
}
} else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
this._pushAudioData(buffer, requestId)
} else {
this._log("UNKNOWN MESSAGE", message);
}
}
this._ws.onclose = () => {
this._log("disconnected after:", (Date.now() - this._startTime) / 1000, "seconds")
for (const requestId in this._queue) {
this._queue[requestId].push(null);
for (const requestId in this._streams) {
this._streams[requestId].push(null);
}
}
this._ws.onerror = function (error) {
Expand All @@ -147,11 +143,11 @@ export class MsEdgeTTS {
});
}

private cacheAudioData(m: Buffer, requestId: string) {
const index = m.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
const audioData = m.slice(index, m.length);
this._queue[requestId].push(audioData);
this._log("receive audio chunk size: ", audioData?.length)
private _pushAudioData(audioBuffer: Buffer, requestId: string) {
const audioStartIndex = audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
const audioData = audioBuffer.subarray(audioStartIndex);
this._streams[requestId].push(audioData);
this._log("received audio chunk, size: ", audioData?.length)
}

private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
Expand Down Expand Up @@ -237,14 +233,15 @@ export class MsEdgeTTS {
}

/**
* Writes raw audio synthesised from text in real-time to a {@link stream.Readable}. Uses a basic {@link _SSMLTemplate SML template}.
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param input the text to synthesise. Can include SSML elements.
* @param options (optional) {@link ProsodyOptions}
* @returns {stream.Readable} - a `stream.Readable` with the audio data
* @returns {Readable} - a `stream.Readable` with the audio data
*/
toStream(input, options?: ProsodyOptions): stream.Readable {
return this._rawSSMLRequest(this._SSMLTemplate(input, options));
toStream(input: string, options?: ProsodyOptions): Readable {
const {stream} = this._rawSSMLRequest(this._SSMLTemplate(input, options));
return stream;
}

/**
Expand All @@ -259,49 +256,57 @@ export class MsEdgeTTS {
}

/**
* Writes raw audio synthesised from a request in real-time to a {@link stream.Readable}. Has no SSML template. Basic SSML should be provided in the request.
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
*
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {stream.Readable} - a `stream.Readable` with the audio data
* @returns {Readable} - a `stream.Readable` with the audio data
*/
rawToStream(requestSSML): stream.Readable {
return this._rawSSMLRequest(requestSSML);
rawToStream(requestSSML: string): Readable {
const {stream} = this._rawSSMLRequest(requestSSML);
return stream;
}

private _rawSSMLRequestToFile(path: string, requestSSML: string): Promise<string> {
return new Promise(async (resolve, reject) => {
const stream = this._rawSSMLRequest(requestSSML);
const chunks = [];
const {stream, requestId} = this._rawSSMLRequest(requestSSML);

stream.on("data", (data) => chunks.push(data));
const writableFile = stream.pipe(fs.createWriteStream(path));

stream.once("close", async () => {
if (Object.keys(this._queue).length > 0 && chunks.length === 0) {
writableFile.once("close", async () => {
if (writableFile.bytesWritten > 0) {
resolve(path);
} else {
fs.unlinkSync(path);
reject("No audio data received");
}
const output = fs.createWriteStream(path);
while (chunks.length > 0) {
await new Promise((resolve) => output.write(chunks.shift(), resolve));
}
resolve(path);
});

stream.on("error", (e) => {
stream.destroy();
reject(e);
});
});
}

private _rawSSMLRequest(requestSSML): stream.Readable {
private _rawSSMLRequest(requestSSML: string): {stream: Readable, requestId: string} {
this._metadataCheck();

const requestId = randomBytes(16).toString("hex");
const request = `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
` + requestSSML.trim();
// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
const readable = new stream.Readable({
const self = this;
const stream = new Readable({
read() {
},
destroy(error: Error | null, callback: (error: (Error | null)) => void) {
delete self._streams[requestId];
callback(error);
},
});
this._queue[requestId] = readable;
this._streams[requestId] = stream;
this._send(request).then();
return readable;
return {stream, requestId};
}

}
8 changes: 0 additions & 8 deletions test/test-pitch.ts

This file was deleted.

9 changes: 0 additions & 9 deletions test/test.ts

This file was deleted.

0 comments on commit 1a77502

Please sign in to comment.