import { Router } from "express"; import multer from "multer"; import { assertBoard } from "./authz.js"; import { voicePipelineService } from "../services/voice-pipeline.js"; import { MAX_ATTACHMENT_BYTES } from "../attachment-types.js"; export function voiceRoutes(): Router { const router = Router(); const svc = voicePipelineService(); const audioUpload = multer({ storage: multer.memoryStorage(), limits: { fileSize: MAX_ATTACHMENT_BYTES, files: 1 }, }); // POST /api/transcribe — transcribe uploaded audio via VoicePipelineService router.post("/transcribe", async (req, res) => { assertBoard(req); await new Promise((resolve, reject) => audioUpload.single("audio")(req, res, (err) => (err ? reject(err) : resolve())) ); const file = (req as any).file as { buffer: Buffer; mimetype: string } | undefined; if (!file) { res.status(400).json({ error: "Missing audio field" }); return; } const fmt = file.mimetype.includes("ogg") ? "ogg" : file.mimetype.includes("wav") ? "wav" : "webm"; const result = await svc.transcribe(file.buffer, fmt); res.json(result); }); // POST /api/synthesize — synthesize text to speech via VoicePipelineService router.post("/synthesize", async (req, res) => { assertBoard(req); const { text, voiceId } = req.body as { text?: string; voiceId?: string }; if (!text || typeof text !== "string") { res.status(400).json({ error: "text is required" }); return; } const audioBuffer = await svc.synthesize(text, voiceId); res.setHeader("Content-Type", "audio/wav"); res.send(audioBuffer); }); // POST /api/synthesize/stream — sentence-buffered SSE streaming TTS router.post("/synthesize/stream", async (req, res) => { assertBoard(req); const { text, voiceId } = req.body as { text?: string; voiceId?: string }; if (!text || typeof text !== "string") { res.status(400).json({ error: "text is required" }); return; } res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.flushHeaders(); try { for await (const chunk of svc.synthesizeSentenceStream(text, voiceId)) { const payload = JSON.stringify({ index: chunk.index, total: chunk.total, audio: chunk.audio.toString("base64"), }); res.write(`data: ${payload}\n\n`); } res.write(`data: ${JSON.stringify({ done: true })}\n\n`); } catch (err) { const message = err instanceof Error ? err.message : "Synthesis failed"; res.write(`data: ${JSON.stringify({ error: message })}\n\n`); } finally { res.end(); } }); // POST /api/synthesize/multi-lang — synthesize same text in multiple languages/voices router.post("/synthesize/multi-lang", async (req, res) => { assertBoard(req); const { text, voiceIds } = req.body as { text?: string; voiceIds?: unknown }; if (!text || typeof text !== "string") { res.status(400).json({ error: "text is required" }); return; } if (!Array.isArray(voiceIds) || voiceIds.length < 1 || voiceIds.length > 5) { res.status(400).json({ error: "voiceIds must be an array with 1-5 entries" }); return; } if (!voiceIds.every((v) => typeof v === "string")) { res.status(400).json({ error: "voiceIds must be an array of strings" }); return; } const resultMap = await svc.synthesizeMultiLang(text, voiceIds as string[]); const results = Array.from(resultMap.entries()).map(([voiceId, audio]) => ({ voiceId, audio: audio.toString("base64"), })); res.json({ results }); }); return router; }