- Export splitSentences() with title-abbreviation protection (Dr., Mr. etc.) - Add synthesizeSentenceStream() AsyncGenerator yielding per-sentence audio chunks - Add synthesizeMultiLang() synthesizing same text in N voices via Promise.all - Add POST /api/synthesize/stream SSE endpoint with base64 audio per sentence - Add POST /api/synthesize/multi-lang returning array of voiceId+audio pairs - Existing POST /api/synthesize unchanged (backward compatible)
104 lines
3.7 KiB
TypeScript
104 lines
3.7 KiB
TypeScript
import { Router } from "express";
|
|
import multer from "multer";
|
|
import { assertBoard } from "./authz.js";
|
|
import { voicePipelineService } from "../services/voice-pipeline.js";
|
|
import { MAX_ATTACHMENT_BYTES } from "../attachment-types.js";
|
|
|
|
export function voiceRoutes(): Router {
|
|
const router = Router();
|
|
const svc = voicePipelineService();
|
|
const audioUpload = multer({
|
|
storage: multer.memoryStorage(),
|
|
limits: { fileSize: MAX_ATTACHMENT_BYTES, files: 1 },
|
|
});
|
|
|
|
// POST /api/transcribe — transcribe uploaded audio via VoicePipelineService
|
|
router.post("/transcribe", async (req, res) => {
|
|
assertBoard(req);
|
|
await new Promise<void>((resolve, reject) =>
|
|
audioUpload.single("audio")(req, res, (err) => (err ? reject(err) : resolve()))
|
|
);
|
|
const file = (req as any).file as { buffer: Buffer; mimetype: string } | undefined;
|
|
if (!file) {
|
|
res.status(400).json({ error: "Missing audio field" });
|
|
return;
|
|
}
|
|
const fmt = file.mimetype.includes("ogg") ? "ogg"
|
|
: file.mimetype.includes("wav") ? "wav"
|
|
: "webm";
|
|
const result = await svc.transcribe(file.buffer, fmt);
|
|
res.json(result);
|
|
});
|
|
|
|
// POST /api/synthesize — synthesize text to speech via VoicePipelineService
|
|
router.post("/synthesize", async (req, res) => {
|
|
assertBoard(req);
|
|
const { text, voiceId } = req.body as { text?: string; voiceId?: string };
|
|
if (!text || typeof text !== "string") {
|
|
res.status(400).json({ error: "text is required" });
|
|
return;
|
|
}
|
|
const audioBuffer = await svc.synthesize(text, voiceId);
|
|
res.setHeader("Content-Type", "audio/wav");
|
|
res.send(audioBuffer);
|
|
});
|
|
|
|
// POST /api/synthesize/stream — sentence-buffered SSE streaming TTS
|
|
router.post("/synthesize/stream", async (req, res) => {
|
|
assertBoard(req);
|
|
const { text, voiceId } = req.body as { text?: string; voiceId?: string };
|
|
if (!text || typeof text !== "string") {
|
|
res.status(400).json({ error: "text is required" });
|
|
return;
|
|
}
|
|
|
|
res.setHeader("Content-Type", "text/event-stream");
|
|
res.setHeader("Cache-Control", "no-cache");
|
|
res.setHeader("Connection", "keep-alive");
|
|
res.flushHeaders();
|
|
|
|
try {
|
|
for await (const chunk of svc.synthesizeSentenceStream(text, voiceId)) {
|
|
const payload = JSON.stringify({
|
|
index: chunk.index,
|
|
total: chunk.total,
|
|
audio: chunk.audio.toString("base64"),
|
|
});
|
|
res.write(`data: ${payload}\n\n`);
|
|
}
|
|
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : "Synthesis failed";
|
|
res.write(`data: ${JSON.stringify({ error: message })}\n\n`);
|
|
} finally {
|
|
res.end();
|
|
}
|
|
});
|
|
|
|
// POST /api/synthesize/multi-lang — synthesize same text in multiple languages/voices
|
|
router.post("/synthesize/multi-lang", async (req, res) => {
|
|
assertBoard(req);
|
|
const { text, voiceIds } = req.body as { text?: string; voiceIds?: unknown };
|
|
if (!text || typeof text !== "string") {
|
|
res.status(400).json({ error: "text is required" });
|
|
return;
|
|
}
|
|
if (!Array.isArray(voiceIds) || voiceIds.length < 1 || voiceIds.length > 5) {
|
|
res.status(400).json({ error: "voiceIds must be an array with 1-5 entries" });
|
|
return;
|
|
}
|
|
if (!voiceIds.every((v) => typeof v === "string")) {
|
|
res.status(400).json({ error: "voiceIds must be an array of strings" });
|
|
return;
|
|
}
|
|
|
|
const resultMap = await svc.synthesizeMultiLang(text, voiceIds as string[]);
|
|
const results = Array.from(resultMap.entries()).map(([voiceId, audio]) => ({
|
|
voiceId,
|
|
audio: audio.toString("base64"),
|
|
}));
|
|
res.json({ results });
|
|
});
|
|
|
|
return router;
|
|
}
|