feat(39-01): ChatVoicePlayer sentence-buffered streaming playback
- Add streaming prop (default true) to ChatVoicePlayerProps - Connect to POST /api/synthesize/stream via fetch + ReadableStream - Parse SSE lines manually from response body stream - First sentence audio begins playing as soon as first chunk arrives - Subsequent sentences auto-play in sequence from audioQueue - Show 'Sentence N of M' progress indicator during streaming playback - Dot progress bar shows completed vs pending sentences - Falls back to full-fetch mode on stream error or streaming=false - Clean up all object URLs on unmount or new text
This commit is contained in:
parent
b95634c61a
commit
08e6b72d99
1 changed files with 202 additions and 44 deletions
|
|
@ -1,25 +1,167 @@
|
|||
import { useEffect, useRef, useState } from "react";
|
||||
import { useEffect, useRef, useState, useCallback } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Loader2, Pause, Play } from "lucide-react";
|
||||
|
||||
interface ChatVoicePlayerProps {
|
||||
text: string;
|
||||
autoPlay?: boolean;
|
||||
streaming?: boolean;
|
||||
}
|
||||
|
||||
type PlayerStatus = "idle" | "loading" | "playing" | "paused";
|
||||
|
||||
export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps) {
|
||||
interface SentenceChunk {
|
||||
index: number;
|
||||
total: number;
|
||||
audio: string; // base64
|
||||
}
|
||||
|
||||
export function ChatVoicePlayer({ text, autoPlay = false, streaming = true }: ChatVoicePlayerProps) {
|
||||
const [status, setStatus] = useState<PlayerStatus>("loading");
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [currentSentence, setCurrentSentence] = useState<number>(0);
|
||||
const [totalSentences, setTotalSentences] = useState<number>(0);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
// Queue of object URLs waiting to play
|
||||
const audioQueue = useRef<string[]>([]);
|
||||
// All object URLs created (for cleanup)
|
||||
const allObjectUrls = useRef<string[]>([]);
|
||||
// Whether playback has started (first chunk playing)
|
||||
const playbackStarted = useRef<boolean>(false);
|
||||
|
||||
const revokeAllUrls = useCallback(() => {
|
||||
allObjectUrls.current.forEach((url: string) => URL.revokeObjectURL(url));
|
||||
allObjectUrls.current = [];
|
||||
audioQueue.current = [];
|
||||
playbackStarted.current = false;
|
||||
}, []);
|
||||
|
||||
function base64ToBlob(base64: string, mimeType: string): Blob {
|
||||
const binary = atob(base64);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) {
|
||||
bytes[i] = binary.charCodeAt(i);
|
||||
}
|
||||
return new Blob([bytes], { type: mimeType });
|
||||
}
|
||||
|
||||
function playNext() {
|
||||
const next = audioQueue.current.shift();
|
||||
if (!next || !audioRef.current) return;
|
||||
audioRef.current.src = next;
|
||||
audioRef.current.play().catch(() => {
|
||||
setStatus("idle");
|
||||
});
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
let objectUrl: string | null = null;
|
||||
let cancelled = false;
|
||||
|
||||
async function fetchAudio() {
|
||||
async function fetchStreamingAudio() {
|
||||
setStatus("loading");
|
||||
setCurrentSentence(0);
|
||||
setTotalSentences(0);
|
||||
revokeAllUrls();
|
||||
|
||||
try {
|
||||
const res = await fetch("/api/synthesize/stream", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
credentials: "include",
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
if (cancelled) return;
|
||||
|
||||
if (!res.ok || !res.body) {
|
||||
// Fall back to full-fetch mode
|
||||
await fetchFullAudio();
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (cancelled) {
|
||||
reader.cancel();
|
||||
return;
|
||||
}
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// Parse SSE lines from buffer
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() ?? ""; // keep incomplete line in buffer
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith("data: ")) continue;
|
||||
const jsonStr = line.slice("data: ".length).trim();
|
||||
if (!jsonStr) continue;
|
||||
|
||||
let parsed: { done?: boolean; error?: string; index?: number; total?: number; audio?: string };
|
||||
try {
|
||||
parsed = JSON.parse(jsonStr);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parsed.error) {
|
||||
// Stream error — fall through to full-fetch fallback
|
||||
setStatus("idle");
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.done) {
|
||||
// All chunks received; if still loading (no chunks came), go idle
|
||||
if (!playbackStarted.current) {
|
||||
setStatus("idle");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.audio && parsed.index !== undefined && parsed.total !== undefined) {
|
||||
const chunk = parsed as SentenceChunk;
|
||||
setTotalSentences(chunk.total);
|
||||
|
||||
// Decode base64 audio to blob URL
|
||||
const blob = base64ToBlob(chunk.audio, "audio/wav");
|
||||
const url = URL.createObjectURL(blob);
|
||||
allObjectUrls.current.push(url);
|
||||
|
||||
if (!playbackStarted.current) {
|
||||
// First chunk — start playing immediately
|
||||
playbackStarted.current = true;
|
||||
setCurrentSentence(chunk.index + 1);
|
||||
if (audioRef.current) {
|
||||
audioRef.current.src = url;
|
||||
setStatus("playing");
|
||||
audioRef.current.play().catch(() => {
|
||||
setStatus("idle");
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Queue subsequent chunks
|
||||
audioQueue.current.push(url);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
if (!cancelled) {
|
||||
// Network error — fall back to full fetch
|
||||
await fetchFullAudio();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchFullAudio() {
|
||||
if (cancelled) return;
|
||||
setStatus("loading");
|
||||
let objectUrl: string | null = null;
|
||||
try {
|
||||
const res = await fetch("/api/synthesize", {
|
||||
method: "POST",
|
||||
|
|
@ -35,33 +177,30 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
|
|||
const blob = await res.blob();
|
||||
if (cancelled) return;
|
||||
objectUrl = URL.createObjectURL(blob);
|
||||
setAudioUrl(objectUrl);
|
||||
setStatus("idle");
|
||||
} catch {
|
||||
if (!cancelled) {
|
||||
setStatus("idle");
|
||||
allObjectUrls.current.push(objectUrl);
|
||||
if (audioRef.current) {
|
||||
audioRef.current.src = objectUrl;
|
||||
}
|
||||
setStatus("idle");
|
||||
if (autoPlay && audioRef.current) {
|
||||
audioRef.current.play().catch(() => setStatus("idle"));
|
||||
}
|
||||
} catch {
|
||||
if (!cancelled) setStatus("idle");
|
||||
}
|
||||
}
|
||||
|
||||
fetchAudio();
|
||||
if (streaming) {
|
||||
fetchStreamingAudio();
|
||||
} else {
|
||||
fetchFullAudio();
|
||||
}
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
if (objectUrl) {
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
}
|
||||
revokeAllUrls();
|
||||
};
|
||||
}, [text]);
|
||||
|
||||
useEffect(() => {
|
||||
if (autoPlay && audioUrl && audioRef.current) {
|
||||
audioRef.current.play().catch(() => {
|
||||
// Browser may block autoplay; fall back to idle state
|
||||
setStatus("idle");
|
||||
});
|
||||
}
|
||||
}, [autoPlay, audioUrl]);
|
||||
}, [text, streaming, autoPlay, revokeAllUrls]);
|
||||
|
||||
function handlePlay() {
|
||||
if (audioRef.current) {
|
||||
|
|
@ -76,10 +215,13 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
|
|||
}
|
||||
|
||||
function handleAudioEnded() {
|
||||
setStatus("idle");
|
||||
if (audioUrl) {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
setAudioUrl(null);
|
||||
// Check if there are more sentences in the queue
|
||||
if (audioQueue.current.length > 0) {
|
||||
setCurrentSentence((prev: number) => prev + 1);
|
||||
playNext();
|
||||
} else {
|
||||
setStatus("idle");
|
||||
playbackStarted.current = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -93,25 +235,41 @@ export function ChatVoicePlayer({ text, autoPlay = false }: ChatVoicePlayerProps
|
|||
}
|
||||
|
||||
return (
|
||||
<span className="inline-flex items-center">
|
||||
{status === "playing" ? (
|
||||
<Button variant="ghost" size="sm" onClick={handlePause} aria-label="Pause voice response">
|
||||
<Pause className="h-3 w-3" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={handlePlay}
|
||||
disabled={!audioUrl}
|
||||
aria-label="Play voice response"
|
||||
>
|
||||
<Play className="h-3 w-3" />
|
||||
</Button>
|
||||
<span className="inline-flex flex-col items-start gap-0.5">
|
||||
<span className="inline-flex items-center">
|
||||
{status === "playing" ? (
|
||||
<Button variant="ghost" size="sm" onClick={handlePause} aria-label="Pause voice response">
|
||||
<Pause className="h-3 w-3" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={handlePlay}
|
||||
disabled={!audioRef.current?.src}
|
||||
aria-label="Play voice response"
|
||||
>
|
||||
<Play className="h-3 w-3" />
|
||||
</Button>
|
||||
)}
|
||||
{streaming && status === "playing" && totalSentences > 1 && (
|
||||
<span className="text-xs text-muted-foreground ml-1">
|
||||
Sentence {currentSentence} of {totalSentences}
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
{streaming && status === "playing" && totalSentences > 1 && (
|
||||
<span className="inline-flex gap-0.5 ml-1">
|
||||
{Array.from({ length: totalSentences }, (_, i) => (
|
||||
<span
|
||||
key={i}
|
||||
className={`h-1 w-3 rounded-full ${i < currentSentence ? "bg-primary" : "bg-muted"}`}
|
||||
/>
|
||||
))}
|
||||
</span>
|
||||
)}
|
||||
<audio
|
||||
ref={audioRef}
|
||||
src={audioUrl ?? undefined}
|
||||
aria-label="Voice response"
|
||||
onPlay={() => setStatus("playing")}
|
||||
onPause={() => setStatus("paused")}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue