From 08e6b72d99db983045b3e9fdbf852c3ee66907b4 Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Sat, 4 Apr 2026 03:33:46 +0000 Subject: [PATCH] feat(39-01): ChatVoicePlayer sentence-buffered streaming playback - Add streaming prop (default true) to ChatVoicePlayerProps - Connect to POST /api/synthesize/stream via fetch + ReadableStream - Parse SSE lines manually from response body stream - First sentence audio begins playing as soon as first chunk arrives - Subsequent sentences auto-play in sequence from audioQueue - Show 'Sentence N of M' progress indicator during streaming playback - Dot progress bar shows completed vs pending sentences - Falls back to full-fetch mode on stream error or streaming=false - Clean up all object URLs on unmount or new text --- ui/src/components/ChatVoicePlayer.tsx | 246 +++++++++++++++++++++----- 1 file changed, 202 insertions(+), 44 deletions(-) diff --git a/ui/src/components/ChatVoicePlayer.tsx b/ui/src/components/ChatVoicePlayer.tsx index cab24677..ae229b75 100644 --- a/ui/src/components/ChatVoicePlayer.tsx +++ b/ui/src/components/ChatVoicePlayer.tsx @@ -1,25 +1,167 @@ -import { useEffect, useRef, useState } from "react"; +import { useEffect, useRef, useState, useCallback } from "react"; import { Button } from "@/components/ui/button"; import { Loader2, Pause, Play } from "lucide-react"; interface ChatVoicePlayerProps { text: string; autoPlay?: boolean; + streaming?: boolean; } type PlayerStatus = "idle" | "loading" | "playing" | "paused"; -export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps) { +interface SentenceChunk { + index: number; + total: number; + audio: string; // base64 +} + +export function ChatVoicePlayer({ text, autoPlay = false, streaming = true }: ChatVoicePlayerProps) { const [status, setStatus] = useState("loading"); - const [audioUrl, setAudioUrl] = useState(null); + const [currentSentence, setCurrentSentence] = useState(0); + const [totalSentences, setTotalSentences] = useState(0); const audioRef = useRef(null); + // Queue of object URLs waiting to play + const audioQueue = useRef([]); + // All object URLs created (for cleanup) + const allObjectUrls = useRef([]); + // Whether playback has started (first chunk playing) + const playbackStarted = useRef(false); + + const revokeAllUrls = useCallback(() => { + allObjectUrls.current.forEach((url: string) => URL.revokeObjectURL(url)); + allObjectUrls.current = []; + audioQueue.current = []; + playbackStarted.current = false; + }, []); + + function base64ToBlob(base64: string, mimeType: string): Blob { + const binary = atob(base64); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + bytes[i] = binary.charCodeAt(i); + } + return new Blob([bytes], { type: mimeType }); + } + + function playNext() { + const next = audioQueue.current.shift(); + if (!next || !audioRef.current) return; + audioRef.current.src = next; + audioRef.current.play().catch(() => { + setStatus("idle"); + }); + } + useEffect(() => { - let objectUrl: string | null = null; let cancelled = false; - async function fetchAudio() { + async function fetchStreamingAudio() { setStatus("loading"); + setCurrentSentence(0); + setTotalSentences(0); + revokeAllUrls(); + + try { + const res = await fetch("/api/synthesize/stream", { + method: "POST", + headers: { "Content-Type": "application/json" }, + credentials: "include", + body: JSON.stringify({ text }), + }); + + if (cancelled) return; + + if (!res.ok || !res.body) { + // Fall back to full-fetch mode + await fetchFullAudio(); + return; + } + + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (cancelled) { + reader.cancel(); + return; + } + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + + // Parse SSE lines from buffer + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; // keep incomplete line in buffer + + for (const line of lines) { + if (!line.startsWith("data: ")) continue; + const jsonStr = line.slice("data: ".length).trim(); + if (!jsonStr) continue; + + let parsed: { done?: boolean; error?: string; index?: number; total?: number; audio?: string }; + try { + parsed = JSON.parse(jsonStr); + } catch { + continue; + } + + if (parsed.error) { + // Stream error — fall through to full-fetch fallback + setStatus("idle"); + return; + } + + if (parsed.done) { + // All chunks received; if still loading (no chunks came), go idle + if (!playbackStarted.current) { + setStatus("idle"); + } + return; + } + + if (parsed.audio && parsed.index !== undefined && parsed.total !== undefined) { + const chunk = parsed as SentenceChunk; + setTotalSentences(chunk.total); + + // Decode base64 audio to blob URL + const blob = base64ToBlob(chunk.audio, "audio/wav"); + const url = URL.createObjectURL(blob); + allObjectUrls.current.push(url); + + if (!playbackStarted.current) { + // First chunk — start playing immediately + playbackStarted.current = true; + setCurrentSentence(chunk.index + 1); + if (audioRef.current) { + audioRef.current.src = url; + setStatus("playing"); + audioRef.current.play().catch(() => { + setStatus("idle"); + }); + } + } else { + // Queue subsequent chunks + audioQueue.current.push(url); + } + } + } + } + } catch { + if (!cancelled) { + // Network error — fall back to full fetch + await fetchFullAudio(); + } + } + } + + async function fetchFullAudio() { + if (cancelled) return; + setStatus("loading"); + let objectUrl: string | null = null; try { const res = await fetch("/api/synthesize", { method: "POST", @@ -35,33 +177,30 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps const blob = await res.blob(); if (cancelled) return; objectUrl = URL.createObjectURL(blob); - setAudioUrl(objectUrl); - setStatus("idle"); - } catch { - if (!cancelled) { - setStatus("idle"); + allObjectUrls.current.push(objectUrl); + if (audioRef.current) { + audioRef.current.src = objectUrl; } + setStatus("idle"); + if (autoPlay && audioRef.current) { + audioRef.current.play().catch(() => setStatus("idle")); + } + } catch { + if (!cancelled) setStatus("idle"); } } - fetchAudio(); + if (streaming) { + fetchStreamingAudio(); + } else { + fetchFullAudio(); + } return () => { cancelled = true; - if (objectUrl) { - URL.revokeObjectURL(objectUrl); - } + revokeAllUrls(); }; - }, [text]); - - useEffect(() => { - if (autoPlay && audioUrl && audioRef.current) { - audioRef.current.play().catch(() => { - // Browser may block autoplay; fall back to idle state - setStatus("idle"); - }); - } - }, [autoPlay, audioUrl]); + }, [text, streaming, autoPlay, revokeAllUrls]); function handlePlay() { if (audioRef.current) { @@ -76,10 +215,13 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps } function handleAudioEnded() { - setStatus("idle"); - if (audioUrl) { - URL.revokeObjectURL(audioUrl); - setAudioUrl(null); + // Check if there are more sentences in the queue + if (audioQueue.current.length > 0) { + setCurrentSentence((prev: number) => prev + 1); + playNext(); + } else { + setStatus("idle"); + playbackStarted.current = false; } } @@ -93,25 +235,41 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps } return ( - - {status === "playing" ? ( - - ) : ( - + + + {status === "playing" ? ( + + ) : ( + + )} + {streaming && status === "playing" && totalSentences > 1 && ( + + Sentence {currentSentence} of {totalSentences} + + )} + + {streaming && status === "playing" && totalSentences > 1 && ( + + {Array.from({ length: totalSentences }, (_, i) => ( + + ))} + )}