From bf757509ef9314c821b47a6a4ed3a2923e75fcb8 Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Sat, 4 Apr 2026 01:37:39 +0000 Subject: [PATCH] feat(36-03): add voice HTTP routes with POST /transcribe and POST /synthesize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create server/src/routes/voice.ts with voiceRoutes() factory - POST /transcribe: multer audio upload → VoicePipelineService.transcribe → JSON response - POST /synthesize: text body → VoicePipelineService.synthesize → audio/wav response - Both routes protected by assertBoard(req) auth check - Create server/src/__tests__/36-voice-routes.test.ts with 5 passing tests --- server/src/__tests__/36-voice-routes.test.ts | 103 +++++++++++++++++++ server/src/routes/voice.ts | 47 +++++++++ 2 files changed, 150 insertions(+) create mode 100644 server/src/__tests__/36-voice-routes.test.ts create mode 100644 server/src/routes/voice.ts diff --git a/server/src/__tests__/36-voice-routes.test.ts b/server/src/__tests__/36-voice-routes.test.ts new file mode 100644 index 00000000..a4220516 --- /dev/null +++ b/server/src/__tests__/36-voice-routes.test.ts @@ -0,0 +1,103 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import express from "express"; +import supertest from "supertest"; + +// Mock voicePipelineService BEFORE importing voice routes +vi.mock("../services/voice-pipeline.js", () => ({ + voicePipelineService: vi.fn(() => ({ + transcribe: vi.fn().mockResolvedValue({ text: "Hello world", language: "en" }), + synthesize: vi.fn().mockResolvedValue(Buffer.from("fake-wav-audio")), + formatForVoice: vi.fn((t: string) => t), + transcodeToWav16k: vi.fn().mockResolvedValue(Buffer.from("wav-data")), + })), +})); + +// Mock assertBoard to be a no-op (board auth passes) +vi.mock("../routes/authz.js", () => ({ + assertBoard: vi.fn(), +})); + +import { voiceRoutes } from "../routes/voice.js"; + +function buildApp() { + const app = express(); + app.use(express.json()); + + // Attach a mock actor so assertBoard sees board type + app.use((req: any, _res: any, next: any) => { + req.actor = { type: "board" }; + next(); + }); + + app.use(voiceRoutes()); + return app; +} + +describe("POST /transcribe", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("returns 200 with { text, language } for valid audio upload", async () => { + const app = buildApp(); + const fakeAudio = Buffer.from("fake-audio-bytes"); + + const res = await supertest(app) + .post("/transcribe") + .attach("audio", fakeAudio, { filename: "audio.webm", contentType: "audio/webm" }); + + expect(res.status).toBe(200); + expect(res.body).toHaveProperty("text"); + expect(res.body.text).toBe("Hello world"); + }); + + it("returns 400 when audio field is missing", async () => { + const app = buildApp(); + + const res = await supertest(app) + .post("/transcribe") + .send({}); + + expect(res.status).toBe(400); + expect(res.body).toEqual({ error: "Missing audio field" }); + }); +}); + +describe("POST /synthesize", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("returns 200 with audio/wav content-type for valid text", async () => { + const app = buildApp(); + + const res = await supertest(app) + .post("/synthesize") + .send({ text: "Hello" }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toMatch(/audio\/wav/); + }); + + it("returns 400 when text is missing", async () => { + const app = buildApp(); + + const res = await supertest(app) + .post("/synthesize") + .send({}); + + expect(res.status).toBe(400); + expect(res.body).toEqual({ error: "text is required" }); + }); + + it("returns 400 when text is not a string", async () => { + const app = buildApp(); + + const res = await supertest(app) + .post("/synthesize") + .send({ text: 42 }); + + expect(res.status).toBe(400); + expect(res.body).toEqual({ error: "text is required" }); + }); +}); diff --git a/server/src/routes/voice.ts b/server/src/routes/voice.ts new file mode 100644 index 00000000..bc53057c --- /dev/null +++ b/server/src/routes/voice.ts @@ -0,0 +1,47 @@ +import { Router } from "express"; +import multer from "multer"; +import { assertBoard } from "./authz.js"; +import { voicePipelineService } from "../services/voice-pipeline.js"; +import { MAX_ATTACHMENT_BYTES } from "../attachment-types.js"; + +export function voiceRoutes(): Router { + const router = Router(); + const svc = voicePipelineService(); + const audioUpload = multer({ + storage: multer.memoryStorage(), + limits: { fileSize: MAX_ATTACHMENT_BYTES, files: 1 }, + }); + + // POST /api/transcribe — transcribe uploaded audio via VoicePipelineService + router.post("/transcribe", async (req, res) => { + assertBoard(req); + await new Promise((resolve, reject) => + audioUpload.single("audio")(req, res, (err) => (err ? reject(err) : resolve())) + ); + const file = (req as any).file as { buffer: Buffer; mimetype: string } | undefined; + if (!file) { + res.status(400).json({ error: "Missing audio field" }); + return; + } + const fmt = file.mimetype.includes("ogg") ? "ogg" + : file.mimetype.includes("wav") ? "wav" + : "webm"; + const result = await svc.transcribe(file.buffer, fmt); + res.json(result); + }); + + // POST /api/synthesize — synthesize text to speech via VoicePipelineService + router.post("/synthesize", async (req, res) => { + assertBoard(req); + const { text, voiceId } = req.body as { text?: string; voiceId?: string }; + if (!text || typeof text !== "string") { + res.status(400).json({ error: "text is required" }); + return; + } + const audioBuffer = await svc.synthesize(text, voiceId); + res.setHeader("Content-Type", "audio/wav"); + res.send(audioBuffer); + }); + + return router; +}