Add tests, rework toFile piping, properly remove ids from queue

Migushthe2nd · Feb 13, 2024 · 1a77502 · 1a77502
1 parent 6504dea
commit 1a77502
Show file tree

Hide file tree

Showing 5 changed files with 179 additions and 60 deletions.
diff --git a/package.json b/package.json
@@ -9,17 +9,29 @@
     "module": "./dist/index",
     "main": "./dist/index",
     "scripts": {
-        "dev": "yarn run build && ts-node test/test.ts",
+        "preinstall": "npx only-allow pnpm",
+        "dev": "yarn run build && ts-node src/test/test.ts",
         "build": "tsc",
         "prepublishOnly": "yarn run build",
-        "publish": "yarn publish --access=public"
+        "publish": "yarn publish --access=public",
+        "test": "jest",
+        "test:watch": "jest --watch",
+        "test:cov": "jest --coverage",
+        "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
+        "test:e2e": "jest --config src/test/jest-e2e.json"
+    },
+    "engines": {
+        "node": ">=16.0.0"
     },
     "devDependencies": {
-        "@types/node": "^15.12.1",
+        "@types/jest": "^29.5.12",
+        "@types/node": "^20.11.17",
         "@types/ws": "^8.5.10",
+        "jest": "^29.7.0",
+        "ts-jest": "^29.1.2",
         "ts-node": "^10.9.1",
-        "typedoc": "^0.22.2",
-        "typescript": "^4.3.2"
+        "typedoc": "^0.25.8",
+        "typescript": "^5.3.3"
     },
     "dependencies": {
         "axios": "^1.5.0",
@@ -30,6 +42,25 @@
         "stream-browserify": "^3.0.0",
         "ws": "^8.14.1"
     },
+    "jest": {
+        "moduleFileExtensions": [
+            "js",
+            "json",
+            "ts"
+        ],
+        "rootDir": "src",
+        "testRegex": ".*\\.spec\\.ts$",
+        "transform": {
+            "^.+\\.(t|j)s$": "ts-jest"
+        },
+        "collectCoverageFrom": [
+            "**/*.(t|j)s"
+        ],
+        "coverageDirectory": "../coverage",
+        "testEnvironment": "node",
+        "setupFilesAfterEnv": [],
+        "testTimeout": 15000
+    },
     "files": [
         "dist/"
     ]

diff --git a/src/MsEdgeTTS.spec.ts b/src/MsEdgeTTS.spec.ts
@@ -0,0 +1,100 @@
+import "jest"
+import {MsEdgeTTS} from "./MsEdgeTTS"
+import {OUTPUT_FORMAT} from "./OUTPUT_FORMAT"
+import {readFileSync, mkdirSync} from "fs"
+import {existsSync, unlinkSync} from "node:fs"
+import {tmpdir} from "os"
+import {join} from "path"
+import {randomBytes} from "crypto"
+
+describe("MsEdgeTTS", () => {
+    let tts: MsEdgeTTS
+    let tmpPath: string
+
+    beforeAll(() => {
+        tmpPath = join(tmpdir(), randomBytes(16).toString("hex"))
+        mkdirSync(tmpPath)
+        console.log(tmpPath)
+    })
+
+    beforeEach(async () => {
+        tts = new MsEdgeTTS(null, true)
+        await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS)
+    })
+
+    it("should write audio to file", async () => {
+        const filePath = await tts.toFile(join(tmpPath, "./example_audio.webm"), "Hi, how are you?")
+        console.log("Done!", filePath)
+
+        expect(filePath).toBeDefined()
+        expect(filePath).toMatch(/example_audio.webm/)
+        expect(Object.keys(tts["_streams"]).length).toBe(0)
+        // have content
+        expect(readFileSync(filePath).length).toBeGreaterThan(0)
+    })
+
+    it("should handle multiple streams simultaneously", async () => {
+        const promises = []
+        for (let i = 0; i < 100; i++) {
+            const p = new Promise((resolve, reject) => {
+                const s = tts.toStream("Hi, how are you?")
+                s.on("close", () => {
+                    console.log(`Stream ${i} ended`)
+                    resolve("done")
+                })
+                s.on("data", (data) => {
+                    console.log(`Stream ${i} data`, data.length)
+                })
+                s.on("error", (err) => {
+                    console.log(`Stream ${i} error`, err)
+                    reject(err)
+                })
+            })
+            promises.push(p)
+        }
+
+        await expect(Promise.all(promises)).resolves.toBeDefined()
+    }, 60000)
+
+    it("should not write to file if no data", async () => {
+        // mock _pushAudioData to do nothing, asif no data was received
+        tts["_pushAudioData"] = jest.fn()
+        await expect(() => tts.toFile(join(tmpPath, `./example_audio2.webm`), ""))
+            .rejects.toMatch("No audio")
+
+        expect(Object.keys(tts["_streams"]).length).toBe(0)
+        expect(existsSync("./example_audio2.webm")).toBe(false)
+    })
+
+    // it("should require setMetadata", async () => {
+    //     tts["_ws"].close()
+    //     tts["_ws"] = null
+    //     tts.toFile(join(tmpPath, `./example_audio3.webm`), "Hi, how are you?")
+    //     await expect(() => tts.toFile(join(tmpPath, `./example_audio3.webm`), "Hi, how are you?"))
+    //         .rejects.toThrow("Speech synthesis not configured yet.")
+    // })
+
+    it("should return different audio when a pitch is applied", async () => {
+        const filePath = await tts.toFile(join(tmpPath, `./example_audio4.webm`), "Hi, how are you?", {pitch: "+10Hz"})
+        console.log("Done!", filePath)
+
+        expect(filePath).toBeDefined()
+        expect(filePath).toMatch(/example_audio4.webm/)
+        expect(Object.keys(tts["_streams"]).length).toBe(0)
+        // have content
+        expect(readFileSync(filePath).length).toBeGreaterThan(0)
+    })
+
+    afterEach(() => {
+        tts.close()
+        expect(Object.keys(tts["_streams"]).length).toBe(0)
+    })
+
+    afterAll(() => {
+        tts.close()
+        // if (existsSync(tmpPath)) {
+        //     unlinkSync(tmpPath)
+        // }
+    });
+
+})
diff --git a/src/MsEdgeTTS.ts b/src/MsEdgeTTS.ts
@@ -1,9 +1,9 @@
 import axios from "axios";
-import * as stream from "stream";
 import WebSocket from "isomorphic-ws";
 import {Buffer} from "buffer";
 import {randomBytes} from "crypto";
 import {OUTPUT_FORMAT} from "./OUTPUT_FORMAT";
+import {Readable} from "stream";
 import * as fs from "fs";
 import {Agent} from "http";
 import {PITCH} from "./PITCH";
@@ -54,7 +54,7 @@ export class MsEdgeTTS {
     private _voice;
     private _voiceLocale;
     private _outputFormat;
-    private _queue: { [key: string]: stream.Readable } = {};
+    private _streams: { [key: string]: Readable } = {};
     private _startTime = 0;
     private readonly _agent: Agent;
 
@@ -122,23 +122,19 @@ export class MsEdgeTTS {
                     // start of turn, ignore
                 } else if (message.includes("Path:turn.end")) {
                     // end of turn, close stream
-                    this._queue[requestId].push(null);
+                    this._streams[requestId].push(null);
                 } else if (message.includes("Path:response")) {
                     // context response, ignore
-                } else if (message.includes("Path:audio")) {
-                    if (m.data instanceof ArrayBuffer) {
-                        this.cacheAudioData(buffer, requestId)
-                    } else {
-                        this._log("UNKNOWN MESSAGE", message);
-                    }
+                } else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
+                    this._pushAudioData(buffer, requestId)
                 } else {
                     this._log("UNKNOWN MESSAGE", message);
                 }
             }
             this._ws.onclose = () => {
                 this._log("disconnected after:", (Date.now() - this._startTime) / 1000, "seconds")
-                for (const requestId in this._queue) {
-                    this._queue[requestId].push(null);
+                for (const requestId in this._streams) {
+                    this._streams[requestId].push(null);
                 }
             }
             this._ws.onerror = function (error) {
@@ -147,11 +143,11 @@ export class MsEdgeTTS {
         });
     }
 
-    private cacheAudioData(m: Buffer, requestId: string) {
-        const index = m.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
-        const audioData = m.slice(index, m.length);
-        this._queue[requestId].push(audioData);
-        this._log("receive audio chunk size: ", audioData?.length)
+    private _pushAudioData(audioBuffer: Buffer, requestId: string) {
+        const audioStartIndex = audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
+        const audioData = audioBuffer.subarray(audioStartIndex);
+        this._streams[requestId].push(audioData);
+        this._log("received audio chunk, size: ", audioData?.length)
     }
 
     private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
@@ -237,14 +233,15 @@ export class MsEdgeTTS {
     }
 
     /**
-     * Writes raw audio synthesised from text in real-time to a {@link stream.Readable}. Uses a basic {@link _SSMLTemplate SML template}.
+     * Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
      *
      * @param input the text to synthesise. Can include SSML elements.
      * @param options (optional) {@link ProsodyOptions}
-     * @returns {stream.Readable} - a `stream.Readable` with the audio data
+     * @returns {Readable} - a `stream.Readable` with the audio data
      */
-    toStream(input, options?: ProsodyOptions): stream.Readable {
-        return this._rawSSMLRequest(this._SSMLTemplate(input, options));
+    toStream(input: string, options?: ProsodyOptions): Readable {
+        const {stream} = this._rawSSMLRequest(this._SSMLTemplate(input, options));
+        return stream;
     }
 
     /**
@@ -259,49 +256,57 @@ export class MsEdgeTTS {
     }
 
     /**
-     * Writes raw audio synthesised from a request in real-time to a {@link stream.Readable}. Has no SSML template. Basic SSML should be provided in the request.
+     * Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
      *
      * @param requestSSML the SSML to send. SSML elements required in order to work.
-     * @returns {stream.Readable} - a `stream.Readable` with the audio data
+     * @returns {Readable} - a `stream.Readable` with the audio data
      */
-    rawToStream(requestSSML): stream.Readable {
-        return this._rawSSMLRequest(requestSSML);
+    rawToStream(requestSSML: string): Readable {
+        const {stream} = this._rawSSMLRequest(requestSSML);
+        return stream;
     }
 
     private _rawSSMLRequestToFile(path: string, requestSSML: string): Promise<string> {
         return new Promise(async (resolve, reject) => {
-            const stream = this._rawSSMLRequest(requestSSML);
-            const chunks = [];
+            const {stream, requestId} = this._rawSSMLRequest(requestSSML);
 
-            stream.on("data", (data) => chunks.push(data));
+            const writableFile = stream.pipe(fs.createWriteStream(path));
 
-            stream.once("close", async () => {
-                if (Object.keys(this._queue).length > 0 && chunks.length === 0) {
+            writableFile.once("close", async () => {
+                if (writableFile.bytesWritten > 0) {
+                    resolve(path);
+                } else {
+                    fs.unlinkSync(path);
                     reject("No audio data received");
                 }
-                const output = fs.createWriteStream(path);
-                while (chunks.length > 0) {
-                    await new Promise((resolve) => output.write(chunks.shift(), resolve));
-                }
-                resolve(path);
+            });
+
+            stream.on("error", (e) => {
+                stream.destroy();
+                reject(e);
             });
         });
     }
 
-    private _rawSSMLRequest(requestSSML): stream.Readable {
+    private _rawSSMLRequest(requestSSML: string): {stream: Readable, requestId: string} {
         this._metadataCheck();
 
         const requestId = randomBytes(16).toString("hex");
         const request = `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
                 ` + requestSSML.trim();
         // https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
-        const readable = new stream.Readable({
+        const self = this;
+        const stream = new Readable({
             read() {
             },
+            destroy(error: Error | null, callback: (error: (Error | null)) => void) {
+                delete self._streams[requestId];
+                callback(error);
+            },
         });
-        this._queue[requestId] = readable;
+        this._streams[requestId] = stream;
         this._send(request).then();
-        return readable;
+        return {stream, requestId};
     }
 
 }
diff --git a/test/test-pitch.ts b/test/test-pitch.ts
diff --git a/test/test.ts b/test/test.ts