- Add streaming prop (default true) to ChatVoicePlayerProps - Connect to POST /api/synthesize/stream via fetch + ReadableStream - Parse SSE lines manually from response body stream - First sentence audio begins playing as soon as first chunk arrives - Subsequent sentences auto-play in sequence from audioQueue - Show 'Sentence N of M' progress indicator during streaming playback - Dot progress bar shows completed vs pending sentences - Falls back to full-fetch mode on stream error or streaming=false - Clean up all object URLs on unmount or new text
280 lines
8.1 KiB
TypeScript
280 lines
8.1 KiB
TypeScript
import { useEffect, useRef, useState, useCallback } from "react";
|
|
import { Button } from "@/components/ui/button";
|
|
import { Loader2, Pause, Play } from "lucide-react";
|
|
|
|
interface ChatVoicePlayerProps {
|
|
text: string;
|
|
autoPlay?: boolean;
|
|
streaming?: boolean;
|
|
}
|
|
|
|
type PlayerStatus = "idle" | "loading" | "playing" | "paused";
|
|
|
|
interface SentenceChunk {
|
|
index: number;
|
|
total: number;
|
|
audio: string; // base64
|
|
}
|
|
|
|
export function ChatVoicePlayer({ text, autoPlay = false, streaming = true }: ChatVoicePlayerProps) {
|
|
const [status, setStatus] = useState<PlayerStatus>("loading");
|
|
const [currentSentence, setCurrentSentence] = useState<number>(0);
|
|
const [totalSentences, setTotalSentences] = useState<number>(0);
|
|
const audioRef = useRef<HTMLAudioElement | null>(null);
|
|
|
|
// Queue of object URLs waiting to play
|
|
const audioQueue = useRef<string[]>([]);
|
|
// All object URLs created (for cleanup)
|
|
const allObjectUrls = useRef<string[]>([]);
|
|
// Whether playback has started (first chunk playing)
|
|
const playbackStarted = useRef<boolean>(false);
|
|
|
|
const revokeAllUrls = useCallback(() => {
|
|
allObjectUrls.current.forEach((url: string) => URL.revokeObjectURL(url));
|
|
allObjectUrls.current = [];
|
|
audioQueue.current = [];
|
|
playbackStarted.current = false;
|
|
}, []);
|
|
|
|
function base64ToBlob(base64: string, mimeType: string): Blob {
|
|
const binary = atob(base64);
|
|
const bytes = new Uint8Array(binary.length);
|
|
for (let i = 0; i < binary.length; i++) {
|
|
bytes[i] = binary.charCodeAt(i);
|
|
}
|
|
return new Blob([bytes], { type: mimeType });
|
|
}
|
|
|
|
function playNext() {
|
|
const next = audioQueue.current.shift();
|
|
if (!next || !audioRef.current) return;
|
|
audioRef.current.src = next;
|
|
audioRef.current.play().catch(() => {
|
|
setStatus("idle");
|
|
});
|
|
}
|
|
|
|
useEffect(() => {
|
|
let cancelled = false;
|
|
|
|
async function fetchStreamingAudio() {
|
|
setStatus("loading");
|
|
setCurrentSentence(0);
|
|
setTotalSentences(0);
|
|
revokeAllUrls();
|
|
|
|
try {
|
|
const res = await fetch("/api/synthesize/stream", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
credentials: "include",
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
|
|
if (cancelled) return;
|
|
|
|
if (!res.ok || !res.body) {
|
|
// Fall back to full-fetch mode
|
|
await fetchFullAudio();
|
|
return;
|
|
}
|
|
|
|
const reader = res.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let buffer = "";
|
|
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (cancelled) {
|
|
reader.cancel();
|
|
return;
|
|
}
|
|
if (done) break;
|
|
|
|
buffer += decoder.decode(value, { stream: true });
|
|
|
|
// Parse SSE lines from buffer
|
|
const lines = buffer.split("\n");
|
|
buffer = lines.pop() ?? ""; // keep incomplete line in buffer
|
|
|
|
for (const line of lines) {
|
|
if (!line.startsWith("data: ")) continue;
|
|
const jsonStr = line.slice("data: ".length).trim();
|
|
if (!jsonStr) continue;
|
|
|
|
let parsed: { done?: boolean; error?: string; index?: number; total?: number; audio?: string };
|
|
try {
|
|
parsed = JSON.parse(jsonStr);
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
if (parsed.error) {
|
|
// Stream error — fall through to full-fetch fallback
|
|
setStatus("idle");
|
|
return;
|
|
}
|
|
|
|
if (parsed.done) {
|
|
// All chunks received; if still loading (no chunks came), go idle
|
|
if (!playbackStarted.current) {
|
|
setStatus("idle");
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (parsed.audio && parsed.index !== undefined && parsed.total !== undefined) {
|
|
const chunk = parsed as SentenceChunk;
|
|
setTotalSentences(chunk.total);
|
|
|
|
// Decode base64 audio to blob URL
|
|
const blob = base64ToBlob(chunk.audio, "audio/wav");
|
|
const url = URL.createObjectURL(blob);
|
|
allObjectUrls.current.push(url);
|
|
|
|
if (!playbackStarted.current) {
|
|
// First chunk — start playing immediately
|
|
playbackStarted.current = true;
|
|
setCurrentSentence(chunk.index + 1);
|
|
if (audioRef.current) {
|
|
audioRef.current.src = url;
|
|
setStatus("playing");
|
|
audioRef.current.play().catch(() => {
|
|
setStatus("idle");
|
|
});
|
|
}
|
|
} else {
|
|
// Queue subsequent chunks
|
|
audioQueue.current.push(url);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
if (!cancelled) {
|
|
// Network error — fall back to full fetch
|
|
await fetchFullAudio();
|
|
}
|
|
}
|
|
}
|
|
|
|
async function fetchFullAudio() {
|
|
if (cancelled) return;
|
|
setStatus("loading");
|
|
let objectUrl: string | null = null;
|
|
try {
|
|
const res = await fetch("/api/synthesize", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
credentials: "include",
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
if (cancelled) return;
|
|
if (!res.ok) {
|
|
setStatus("idle");
|
|
return;
|
|
}
|
|
const blob = await res.blob();
|
|
if (cancelled) return;
|
|
objectUrl = URL.createObjectURL(blob);
|
|
allObjectUrls.current.push(objectUrl);
|
|
if (audioRef.current) {
|
|
audioRef.current.src = objectUrl;
|
|
}
|
|
setStatus("idle");
|
|
if (autoPlay && audioRef.current) {
|
|
audioRef.current.play().catch(() => setStatus("idle"));
|
|
}
|
|
} catch {
|
|
if (!cancelled) setStatus("idle");
|
|
}
|
|
}
|
|
|
|
if (streaming) {
|
|
fetchStreamingAudio();
|
|
} else {
|
|
fetchFullAudio();
|
|
}
|
|
|
|
return () => {
|
|
cancelled = true;
|
|
revokeAllUrls();
|
|
};
|
|
}, [text, streaming, autoPlay, revokeAllUrls]);
|
|
|
|
function handlePlay() {
|
|
if (audioRef.current) {
|
|
audioRef.current.play();
|
|
}
|
|
}
|
|
|
|
function handlePause() {
|
|
if (audioRef.current) {
|
|
audioRef.current.pause();
|
|
}
|
|
}
|
|
|
|
function handleAudioEnded() {
|
|
// Check if there are more sentences in the queue
|
|
if (audioQueue.current.length > 0) {
|
|
setCurrentSentence((prev: number) => prev + 1);
|
|
playNext();
|
|
} else {
|
|
setStatus("idle");
|
|
playbackStarted.current = false;
|
|
}
|
|
}
|
|
|
|
if (status === "loading") {
|
|
return (
|
|
<span className="inline-flex items-center gap-1 text-xs text-muted-foreground">
|
|
<Loader2 className="h-3 w-3 animate-spin" />
|
|
Loading audio...
|
|
</span>
|
|
);
|
|
}
|
|
|
|
return (
|
|
<span className="inline-flex flex-col items-start gap-0.5">
|
|
<span className="inline-flex items-center">
|
|
{status === "playing" ? (
|
|
<Button variant="ghost" size="sm" onClick={handlePause} aria-label="Pause voice response">
|
|
<Pause className="h-3 w-3" />
|
|
</Button>
|
|
) : (
|
|
<Button
|
|
variant="ghost"
|
|
size="sm"
|
|
onClick={handlePlay}
|
|
disabled={!audioRef.current?.src}
|
|
aria-label="Play voice response"
|
|
>
|
|
<Play className="h-3 w-3" />
|
|
</Button>
|
|
)}
|
|
{streaming && status === "playing" && totalSentences > 1 && (
|
|
<span className="text-xs text-muted-foreground ml-1">
|
|
Sentence {currentSentence} of {totalSentences}
|
|
</span>
|
|
)}
|
|
</span>
|
|
{streaming && status === "playing" && totalSentences > 1 && (
|
|
<span className="inline-flex gap-0.5 ml-1">
|
|
{Array.from({ length: totalSentences }, (_, i) => (
|
|
<span
|
|
key={i}
|
|
className={`h-1 w-3 rounded-full ${i < currentSentence ? "bg-primary" : "bg-muted"}`}
|
|
/>
|
|
))}
|
|
</span>
|
|
)}
|
|
<audio
|
|
ref={audioRef}
|
|
aria-label="Voice response"
|
|
onPlay={() => setStatus("playing")}
|
|
onPause={() => setStatus("paused")}
|
|
onEnded={handleAudioEnded}
|
|
/>
|
|
</span>
|
|
);
|
|
}
|