nexus/server/src/routes/voice.ts

import { Router } from "express";
import multer from "multer";
import { assertBoard } from "./authz.js";
import { voicePipelineService } from "../services/voice-pipeline.js";
import { MAX_ATTACHMENT_BYTES } from "../attachment-types.js";

export function voiceRoutes(): Router {
  const router = Router();
  const svc = voicePipelineService();
  const audioUpload = multer({
    storage: multer.memoryStorage(),
    limits: { fileSize: MAX_ATTACHMENT_BYTES, files: 1 },
  });

  // POST /api/transcribe — transcribe uploaded audio via VoicePipelineService
  router.post("/transcribe", async (req, res) => {
    assertBoard(req);
    await new Promise<void>((resolve, reject) =>
      audioUpload.single("audio")(req, res, (err) => (err ? reject(err) : resolve()))
    );
    const file = (req as any).file as { buffer: Buffer; mimetype: string } | undefined;
    if (!file) {
      res.status(400).json({ error: "Missing audio field" });
      return;
    }
    const fmt = file.mimetype.includes("ogg") ? "ogg"
      : file.mimetype.includes("wav") ? "wav"
      : "webm";
    const result = await svc.transcribe(file.buffer, fmt);
    res.json(result);
  });

  // POST /api/synthesize — synthesize text to speech via VoicePipelineService
  router.post("/synthesize", async (req, res) => {
    assertBoard(req);
    const { text, voiceId } = req.body as { text?: string; voiceId?: string };
    if (!text || typeof text !== "string") {
      res.status(400).json({ error: "text is required" });
      return;
    }
    const audioBuffer = await svc.synthesize(text, voiceId);
    res.setHeader("Content-Type", "audio/wav");
    res.send(audioBuffer);
  });

  // POST /api/synthesize/stream — sentence-buffered SSE streaming TTS
  router.post("/synthesize/stream", async (req, res) => {
    assertBoard(req);
    const { text, voiceId } = req.body as { text?: string; voiceId?: string };
    if (!text || typeof text !== "string") {
      res.status(400).json({ error: "text is required" });
      return;
    }

    res.setHeader("Content-Type", "text/event-stream");
    res.setHeader("Cache-Control", "no-cache");
    res.setHeader("Connection", "keep-alive");
    res.flushHeaders();

    try {
      for await (const chunk of svc.synthesizeSentenceStream(text, voiceId)) {
        const payload = JSON.stringify({
          index: chunk.index,
          total: chunk.total,
          audio: chunk.audio.toString("base64"),
        });
        res.write(`data: ${payload}\n\n`);
      }
      res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
    } catch (err) {
      const message = err instanceof Error ? err.message : "Synthesis failed";
      res.write(`data: ${JSON.stringify({ error: message })}\n\n`);
    } finally {
      res.end();
    }
  });

  // POST /api/synthesize/multi-lang — synthesize same text in multiple languages/voices
  router.post("/synthesize/multi-lang", async (req, res) => {
    assertBoard(req);
    const { text, voiceIds } = req.body as { text?: string; voiceIds?: unknown };
    if (!text || typeof text !== "string") {
      res.status(400).json({ error: "text is required" });
      return;
    }
    if (!Array.isArray(voiceIds) || voiceIds.length < 1 || voiceIds.length > 5) {
      res.status(400).json({ error: "voiceIds must be an array with 1-5 entries" });
      return;
    }
    if (!voiceIds.every((v) => typeof v === "string")) {
      res.status(400).json({ error: "voiceIds must be an array of strings" });
      return;
    }

    const resultMap = await svc.synthesizeMultiLang(text, voiceIds as string[]);
    const results = Array.from(resultMap.entries()).map(([voiceId, audio]) => ({
      voiceId,
      audio: audio.toString("base64"),
    }));
    res.json({ results });
  });

  return router;
}