- encodeWav: 44-byte WAV header encoder (RIFF/WAVE/fmt/data), PCM mono 16-bit - useVadRecorder: wraps useMicVAD with startOnLoad:false, auto-stop on speech end, POSTs to /api/transcribe - useVoiceMode: reads/writes voiceMode from GET/PATCH /api/nexus/settings with optimistic update
98 lines
2.6 KiB
TypeScript
98 lines
2.6 KiB
TypeScript
import { useState, useRef, useCallback } from "react";
|
|
import { useMicVAD } from "@ricky0123/vad-react";
|
|
import { encodeWav } from "../lib/encodeWav";
|
|
|
|
interface UseVadRecorderOptions {
|
|
onTranscript: (text: string) => void;
|
|
}
|
|
|
|
interface UseVadRecorderReturn {
|
|
state: "idle" | "recording" | "processing";
|
|
start: () => void;
|
|
stop: () => void;
|
|
mediaStream: MediaStream | null;
|
|
}
|
|
|
|
export function useVadRecorder(opts: UseVadRecorderOptions): UseVadRecorderReturn {
|
|
const [state, setState] = useState<"idle" | "recording" | "processing">("idle");
|
|
const mediaStreamRef = useRef<MediaStream | null>(null);
|
|
|
|
const handleSpeechEnd = useCallback(
|
|
async (audio: Float32Array) => {
|
|
vad.pause();
|
|
setState("processing");
|
|
|
|
try {
|
|
const wavBlob = encodeWav(audio);
|
|
const formData = new FormData();
|
|
formData.append("audio", wavBlob, "recording.wav");
|
|
|
|
const res = await fetch("/api/transcribe", {
|
|
method: "POST",
|
|
credentials: "include",
|
|
body: formData,
|
|
});
|
|
|
|
if (res.ok) {
|
|
const data = (await res.json()) as { text: string };
|
|
if (data.text && data.text.length >= 2) {
|
|
opts.onTranscript(data.text.trim());
|
|
}
|
|
}
|
|
} catch (err) {
|
|
console.error("[useVadRecorder] Transcription error:", err);
|
|
} finally {
|
|
setState("idle");
|
|
}
|
|
},
|
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
[opts.onTranscript],
|
|
);
|
|
|
|
const vad = useMicVAD({
|
|
startOnLoad: false,
|
|
baseAssetPath: "/",
|
|
onnxWASMBasePath: "/",
|
|
positiveSpeechThreshold: 0.8,
|
|
negativeSpeechThreshold: 0.65,
|
|
redemptionFrames: 8,
|
|
minSpeechFrames: 5,
|
|
onSpeechStart: () => {
|
|
// VAD detected start of speech — no action needed, state was set to "recording" in start()
|
|
},
|
|
onSpeechEnd: handleSpeechEnd,
|
|
});
|
|
|
|
const start = useCallback(async () => {
|
|
try {
|
|
// Request a separate stream reference for VoiceWaveform AnalyserNode
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
mediaStreamRef.current = stream;
|
|
} catch (err) {
|
|
console.error("[useVadRecorder] Microphone access denied:", err);
|
|
return;
|
|
}
|
|
|
|
vad.start();
|
|
setState("recording");
|
|
}, [vad]);
|
|
|
|
const stop = useCallback(() => {
|
|
vad.pause();
|
|
|
|
// Stop the separate stream tracks
|
|
if (mediaStreamRef.current) {
|
|
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
|
|
mediaStreamRef.current = null;
|
|
}
|
|
|
|
setState("idle");
|
|
}, [vad]);
|
|
|
|
return {
|
|
state,
|
|
start,
|
|
stop,
|
|
mediaStream: mediaStreamRef.current,
|
|
};
|
|
}
|