diff --git a/server/src/__tests__/39-sentence-streaming.test.ts b/server/src/__tests__/39-sentence-streaming.test.ts new file mode 100644 index 00000000..641d5ab9 --- /dev/null +++ b/server/src/__tests__/39-sentence-streaming.test.ts @@ -0,0 +1,130 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +// Mock ffmpeg-static BEFORE any imports +vi.mock("ffmpeg-static", () => ({ default: "/mock/ffmpeg" })); + +// Mock child_process +vi.mock("node:child_process", () => ({ + execFile: vi.fn(), + spawn: vi.fn(), +})); + +// Mock fs/promises for temp file operations +vi.mock("node:fs/promises", () => ({ + writeFile: vi.fn().mockResolvedValue(undefined), + unlink: vi.fn().mockResolvedValue(undefined), +})); + +import { splitSentences, voicePipelineService } from "../services/voice-pipeline.js"; +import { execFile as execFileCb } from "node:child_process"; + +const execFileMock = vi.mocked(execFileCb); + +describe("sentence streaming", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe("splitSentences", () => { + it("splits basic sentences on period, question mark, exclamation", () => { + const result = splitSentences("Hello world. How are you? I am fine."); + expect(result).toEqual(["Hello world.", "How are you?", "I am fine."]); + }); + + it("keeps abbreviation-style dots intact (Dr., D.C.)", () => { + const result = splitSentences("Dr. Smith went to D.C. He liked it."); + // Should split only at sentence boundaries, not abbreviations + // The sentence "Dr. Smith went to D.C." should be one unit + expect(result).toHaveLength(2); + expect(result[0]).toContain("Dr. Smith"); + expect(result[1]).toBe("He liked it."); + }); + + it("returns single-sentence array when text has no sentence boundaries", () => { + const result = splitSentences("Hello world"); + expect(result).toEqual(["Hello world"]); + }); + + it("filters empty strings", () => { + const result = splitSentences("Hello. World."); + expect(result.every((s) => s.length > 0)).toBe(true); + }); + }); + + describe("synthesizeSentenceStream", () => { + it("yields one chunk per sentence", async () => { + execFileMock.mockImplementation((_cmd: any, _args: any, _opts: any, callback: any) => { + callback(null, Buffer.from("wav-chunk"), ""); + return {} as any; + }); + + const svc = voicePipelineService(); + const text = "Hello world. How are you? I am fine."; + + const chunks: Array<{ index: number; total: number; audio: Buffer }> = []; + for await (const chunk of svc.synthesizeSentenceStream(text)) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(3); + expect(chunks[0]).toMatchObject({ index: 0, total: 3 }); + expect(chunks[1]).toMatchObject({ index: 1, total: 3 }); + expect(chunks[2]).toMatchObject({ index: 2, total: 3 }); + chunks.forEach((c) => expect(Buffer.isBuffer(c.audio)).toBe(true)); + }); + + it("yields single chunk for single-sentence text", async () => { + execFileMock.mockImplementation((_cmd: any, _args: any, _opts: any, callback: any) => { + callback(null, Buffer.from("wav-data"), ""); + return {} as any; + }); + + const svc = voicePipelineService(); + const chunks: Array<{ index: number; total: number; audio: Buffer }> = []; + for await (const chunk of svc.synthesizeSentenceStream("Hello world.")) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toMatchObject({ index: 0, total: 1 }); + }); + }); + + describe("synthesizeMultiLang", () => { + it("returns a Map with one entry per voiceId", async () => { + execFileMock.mockImplementation((_cmd: any, _args: any, _opts: any, callback: any) => { + callback(null, Buffer.from("audio-data"), ""); + return {} as any; + }); + + const svc = voicePipelineService(); + const text = "Hello."; + const voiceIds = ["en_US-lessac-medium", "da_DK-talesyntese-medium"]; + + const result = await svc.synthesizeMultiLang(text, voiceIds); + + expect(result).toBeInstanceOf(Map); + expect(result.size).toBe(2); + expect(result.has("en_US-lessac-medium")).toBe(true); + expect(result.has("da_DK-talesyntese-medium")).toBe(true); + voiceIds.forEach((v) => expect(Buffer.isBuffer(result.get(v))).toBe(true)); + }); + + it("calls piper in parallel (Promise.all semantics)", async () => { + const callOrder: string[] = []; + execFileMock.mockImplementation((_cmd: any, args: any, _opts: any, callback: any) => { + const voiceId = args[1]; // second arg after '--model' + callOrder.push(voiceId); + callback(null, Buffer.from("audio"), ""); + return {} as any; + }); + + const svc = voicePipelineService(); + await svc.synthesizeMultiLang("Test.", ["voice-a", "voice-b"]); + + // Both voices should have been requested + expect(callOrder).toContain("voice-a"); + expect(callOrder).toContain("voice-b"); + }); + }); +});