feat(39-01): ChatVoicePlayer sentence-buffered streaming playback

- Add streaming prop (default true) to ChatVoicePlayerProps
- Connect to POST /api/synthesize/stream via fetch + ReadableStream
- Parse SSE lines manually from response body stream
- First sentence audio begins playing as soon as first chunk arrives
- Subsequent sentences auto-play in sequence from audioQueue
- Show 'Sentence N of M' progress indicator during streaming playback
- Dot progress bar shows completed vs pending sentences
- Falls back to full-fetch mode on stream error or streaming=false
- Clean up all object URLs on unmount or new text
This commit is contained in:
Nexus Dev 2026-04-04 03:33:46 +00:00
parent b95634c61a
commit 08e6b72d99

View file

@ -1,25 +1,167 @@
import { useEffect, useRef, useState } from "react";
import { useEffect, useRef, useState, useCallback } from "react";
import { Button } from "@/components/ui/button";
import { Loader2, Pause, Play } from "lucide-react";
interface ChatVoicePlayerProps {
text: string;
autoPlay?: boolean;
streaming?: boolean;
}
type PlayerStatus = "idle" | "loading" | "playing" | "paused";
export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps) {
interface SentenceChunk {
index: number;
total: number;
audio: string; // base64
}
export function ChatVoicePlayer({ text, autoPlay = false, streaming = true }: ChatVoicePlayerProps) {
const [status, setStatus] = useState<PlayerStatus>("loading");
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const [currentSentence, setCurrentSentence] = useState<number>(0);
const [totalSentences, setTotalSentences] = useState<number>(0);
const audioRef = useRef<HTMLAudioElement | null>(null);
// Queue of object URLs waiting to play
const audioQueue = useRef<string[]>([]);
// All object URLs created (for cleanup)
const allObjectUrls = useRef<string[]>([]);
// Whether playback has started (first chunk playing)
const playbackStarted = useRef<boolean>(false);
const revokeAllUrls = useCallback(() => {
allObjectUrls.current.forEach((url: string) => URL.revokeObjectURL(url));
allObjectUrls.current = [];
audioQueue.current = [];
playbackStarted.current = false;
}, []);
function base64ToBlob(base64: string, mimeType: string): Blob {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Blob([bytes], { type: mimeType });
}
function playNext() {
const next = audioQueue.current.shift();
if (!next || !audioRef.current) return;
audioRef.current.src = next;
audioRef.current.play().catch(() => {
setStatus("idle");
});
}
useEffect(() => {
let objectUrl: string | null = null;
let cancelled = false;
async function fetchAudio() {
async function fetchStreamingAudio() {
setStatus("loading");
setCurrentSentence(0);
setTotalSentences(0);
revokeAllUrls();
try {
const res = await fetch("/api/synthesize/stream", {
method: "POST",
headers: { "Content-Type": "application/json" },
credentials: "include",
body: JSON.stringify({ text }),
});
if (cancelled) return;
if (!res.ok || !res.body) {
// Fall back to full-fetch mode
await fetchFullAudio();
return;
}
const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
while (true) {
const { done, value } = await reader.read();
if (cancelled) {
reader.cancel();
return;
}
if (done) break;
buffer += decoder.decode(value, { stream: true });
// Parse SSE lines from buffer
const lines = buffer.split("\n");
buffer = lines.pop() ?? ""; // keep incomplete line in buffer
for (const line of lines) {
if (!line.startsWith("data: ")) continue;
const jsonStr = line.slice("data: ".length).trim();
if (!jsonStr) continue;
let parsed: { done?: boolean; error?: string; index?: number; total?: number; audio?: string };
try {
parsed = JSON.parse(jsonStr);
} catch {
continue;
}
if (parsed.error) {
// Stream error — fall through to full-fetch fallback
setStatus("idle");
return;
}
if (parsed.done) {
// All chunks received; if still loading (no chunks came), go idle
if (!playbackStarted.current) {
setStatus("idle");
}
return;
}
if (parsed.audio && parsed.index !== undefined && parsed.total !== undefined) {
const chunk = parsed as SentenceChunk;
setTotalSentences(chunk.total);
// Decode base64 audio to blob URL
const blob = base64ToBlob(chunk.audio, "audio/wav");
const url = URL.createObjectURL(blob);
allObjectUrls.current.push(url);
if (!playbackStarted.current) {
// First chunk — start playing immediately
playbackStarted.current = true;
setCurrentSentence(chunk.index + 1);
if (audioRef.current) {
audioRef.current.src = url;
setStatus("playing");
audioRef.current.play().catch(() => {
setStatus("idle");
});
}
} else {
// Queue subsequent chunks
audioQueue.current.push(url);
}
}
}
}
} catch {
if (!cancelled) {
// Network error — fall back to full fetch
await fetchFullAudio();
}
}
}
async function fetchFullAudio() {
if (cancelled) return;
setStatus("loading");
let objectUrl: string | null = null;
try {
const res = await fetch("/api/synthesize", {
method: "POST",
@ -35,33 +177,30 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
const blob = await res.blob();
if (cancelled) return;
objectUrl = URL.createObjectURL(blob);
setAudioUrl(objectUrl);
setStatus("idle");
} catch {
if (!cancelled) {
setStatus("idle");
allObjectUrls.current.push(objectUrl);
if (audioRef.current) {
audioRef.current.src = objectUrl;
}
setStatus("idle");
if (autoPlay && audioRef.current) {
audioRef.current.play().catch(() => setStatus("idle"));
}
} catch {
if (!cancelled) setStatus("idle");
}
}
fetchAudio();
if (streaming) {
fetchStreamingAudio();
} else {
fetchFullAudio();
}
return () => {
cancelled = true;
if (objectUrl) {
URL.revokeObjectURL(objectUrl);
}
revokeAllUrls();
};
}, [text]);
useEffect(() => {
if (autoPlay && audioUrl && audioRef.current) {
audioRef.current.play().catch(() => {
// Browser may block autoplay; fall back to idle state
setStatus("idle");
});
}
}, [autoPlay, audioUrl]);
}, [text, streaming, autoPlay, revokeAllUrls]);
function handlePlay() {
if (audioRef.current) {
@ -76,10 +215,13 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
}
function handleAudioEnded() {
setStatus("idle");
if (audioUrl) {
URL.revokeObjectURL(audioUrl);
setAudioUrl(null);
// Check if there are more sentences in the queue
if (audioQueue.current.length > 0) {
setCurrentSentence((prev: number) => prev + 1);
playNext();
} else {
setStatus("idle");
playbackStarted.current = false;
}
}
@ -93,25 +235,41 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
}
return (
<span className="inline-flex items-center">
{status === "playing" ? (
<Button variant="ghost" size="sm" onClick={handlePause} aria-label="Pause voice response">
<Pause className="h-3 w-3" />
</Button>
) : (
<Button
variant="ghost"
size="sm"
onClick={handlePlay}
disabled={!audioUrl}
aria-label="Play voice response"
>
<Play className="h-3 w-3" />
</Button>
<span className="inline-flex flex-col items-start gap-0.5">
<span className="inline-flex items-center">
{status === "playing" ? (
<Button variant="ghost" size="sm" onClick={handlePause} aria-label="Pause voice response">
<Pause className="h-3 w-3" />
</Button>
) : (
<Button
variant="ghost"
size="sm"
onClick={handlePlay}
disabled={!audioRef.current?.src}
aria-label="Play voice response"
>
<Play className="h-3 w-3" />
</Button>
)}
{streaming && status === "playing" && totalSentences > 1 && (
<span className="text-xs text-muted-foreground ml-1">
Sentence {currentSentence} of {totalSentences}
</span>
)}
</span>
{streaming && status === "playing" && totalSentences > 1 && (
<span className="inline-flex gap-0.5 ml-1">
{Array.from({ length: totalSentences }, (_, i) => (
<span
key={i}
className={`h-1 w-3 rounded-full ${i < currentSentence ? "bg-primary" : "bg-muted"}`}
/>
))}
</span>
)}
<audio
ref={audioRef}
src={audioUrl ?? undefined}
aria-label="Voice response"
onPlay={() => setStatus("playing")}
onPause={() => setStatus("paused")}