diff --git a/server/src/app.ts b/server/src/app.ts index e851b90f..389a656c 100644 --- a/server/src/app.ts +++ b/server/src/app.ts @@ -32,6 +32,7 @@ import { assistantMemoryRoutes } from "./routes/assistant-memory.js"; import { assistantHandoffRoutes } from "./routes/assistant-handoff.js"; import { chatFileRoutes } from "./routes/chat-files.js"; import { nexusSettingsRoutes } from "./routes/nexus-settings.js"; +import { voiceRoutes } from "./routes/voice.js"; import { pluginRoutes } from "./routes/plugins.js"; import { pluginUiStaticRoutes } from "./routes/plugin-ui-static.js"; import { applyUiBranding } from "./ui-branding.js"; @@ -174,6 +175,7 @@ export async function createApp( api.use(assistantHandoffRoutes(db)); api.use(chatFileRoutes(db, opts.storageService)); api.use(nexusSettingsRoutes()); + api.use(voiceRoutes()); const hostServicesDisposers = new Map void>(); const workerManager = createPluginWorkerManager(); const pluginRegistry = pluginRegistryService(db); diff --git a/server/src/routes/chat-files.ts b/server/src/routes/chat-files.ts index 5c7f993f..25b0c3f5 100644 --- a/server/src/routes/chat-files.ts +++ b/server/src/routes/chat-files.ts @@ -294,96 +294,5 @@ export function chatFileRoutes(db: Db, storage: StorageService) { res.json(updated); }); - // POST /transcribe — Transcribe audio via local Whisper - const audioUpload = multer({ - storage: multer.memoryStorage(), - limits: { fileSize: MAX_ATTACHMENT_BYTES, files: 1 }, - }); - - async function runAudioUpload( - upload: ReturnType, - req: Request, - res: Response, - ) { - await new Promise((resolve, reject) => { - upload.single("audio")(req, res, (err: unknown) => { - if (err) reject(err); - else resolve(); - }); - }); - } - - router.post("/transcribe", async (req, res) => { - assertBoard(req); - - try { - await runAudioUpload(audioUpload, req, res); - } catch (err) { - if (err instanceof multer.MulterError) { - res.status(400).json({ error: err.message }); - return; - } - throw err; - } - - const file = (req as Request & { file?: { buffer: Buffer; mimetype: string } }).file; - if (!file) { - res.status(400).json({ error: "Missing audio field" }); - return; - } - - // Write to temp file - const { writeFile, unlink } = await import("node:fs/promises"); - const { tmpdir } = await import("node:os"); - const tmpPath = path.join(tmpdir(), `nexus-audio-${Date.now()}.webm`); - - try { - await writeFile(tmpPath, file.buffer); - - // Try whisper CLI (whisper.cpp or openai-whisper) - const { promisify } = await import("node:util"); - const { execFile: execFileCb } = await import("node:child_process"); - const execFileAsync = promisify(execFileCb); - - try { - // Try whisper.cpp first (outputs transcription to stdout with --no-timestamps) - const { stdout } = await execFileAsync("whisper-cpp", [ - "--model", "base.en", - "--file", tmpPath, - "--no-timestamps", - "--output-txt", - ], { timeout: 30000 }); - res.json({ text: stdout.trim() }); - } catch { - try { - // Fallback: openai-whisper Python CLI - const { stdout } = await execFileAsync("whisper", [ - tmpPath, - "--model", "base.en", - "--output_format", "txt", - "--output_dir", tmpdir(), - ], { timeout: 60000 }); - // whisper CLI writes to a .txt file alongside the input - const txtPath = tmpPath.replace(/\.webm$/, ".txt"); - try { - const { readFile } = await import("node:fs/promises"); - const text = await readFile(txtPath, "utf-8"); - await unlink(txtPath).catch(() => {}); - res.json({ text: text.trim() }); - } catch { - // Parse stdout as fallback - res.json({ text: stdout.trim() }); - } - } catch { - res.status(503).json({ - error: "Whisper not available. Install whisper-cpp or openai-whisper for voice input.", - }); - } - } - } finally { - await unlink(tmpPath).catch(() => {}); - } - }); - return router; } diff --git a/server/src/routes/chat.ts b/server/src/routes/chat.ts index 73c7ca28..f8b0e85f 100644 --- a/server/src/routes/chat.ts +++ b/server/src/routes/chat.ts @@ -90,7 +90,9 @@ export function chatRoutes(db: Db): Router { // POST /api/conversations/:id/stream -- SSE streaming endpoint (CHAT-01, PERF-02) router.post("/conversations/:id/stream", async (req, res) => { assertBoard(req); - const { content, agentId } = req.body; + const { content, agentId, voiceMode } = req.body as { + content: string; agentId?: string; voiceMode?: "text" | "voice_input" | "full_voice"; + }; if (!content || typeof content !== "string") { res.status(400).json({ error: "content is required" }); return; @@ -139,6 +141,20 @@ export function chatRoutes(db: Db): Router { // Add the new user message messagesWithMemory.push({ role: "user", content }); + // Inject dual-output formatting prompt when voice mode is full_voice (VPIPE-06) + if (voiceMode === "full_voice") { + messagesWithMemory.push({ + role: "system", + content: [ + "Format your response with EXACTLY these two labeled sections:", + "", + "SPOKEN: [Natural speech prose only. No markdown. No bullet points. No code blocks. Max 2-3 sentences for spoken delivery.]", + "", + "DETAILED: [Your full response with all detail, code blocks, and markdown formatting.]", + ].join("\n"), + }); + } + // Set SSE headers and flush BEFORE any generation (PERF-02) res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); @@ -168,6 +184,9 @@ export function chatRoutes(db: Db): Router { role: "assistant", content: fullContent.trim(), agentId: agentId || undefined, + messageType: voiceMode === "full_voice" ? "voice_full" + : voiceMode === "voice_input" ? "voice_input" + : undefined, }); res.write(`data: ${JSON.stringify({ type: "done", messageId: message.id, content: fullContent.trim() })}\n\n`);