nexus/ui/src/hooks/useVadRecorder.ts
Nexus Dev 3676c9c349 feat(37-02): encodeWav utility, useVadRecorder + useVoiceMode hooks
- encodeWav: 44-byte WAV header encoder (RIFF/WAVE/fmt/data), PCM mono 16-bit
- useVadRecorder: wraps useMicVAD with startOnLoad:false, auto-stop on speech end, POSTs to /api/transcribe
- useVoiceMode: reads/writes voiceMode from GET/PATCH /api/nexus/settings with optimistic update
2026-04-04 02:35:27 +00:00

98 lines
2.6 KiB
TypeScript

import { useState, useRef, useCallback } from "react";
import { useMicVAD } from "@ricky0123/vad-react";
import { encodeWav } from "../lib/encodeWav";
interface UseVadRecorderOptions {
onTranscript: (text: string) => void;
}
interface UseVadRecorderReturn {
state: "idle" | "recording" | "processing";
start: () => void;
stop: () => void;
mediaStream: MediaStream | null;
}
export function useVadRecorder(opts: UseVadRecorderOptions): UseVadRecorderReturn {
const [state, setState] = useState<"idle" | "recording" | "processing">("idle");
const mediaStreamRef = useRef<MediaStream | null>(null);
const handleSpeechEnd = useCallback(
async (audio: Float32Array) => {
vad.pause();
setState("processing");
try {
const wavBlob = encodeWav(audio);
const formData = new FormData();
formData.append("audio", wavBlob, "recording.wav");
const res = await fetch("/api/transcribe", {
method: "POST",
credentials: "include",
body: formData,
});
if (res.ok) {
const data = (await res.json()) as { text: string };
if (data.text && data.text.length >= 2) {
opts.onTranscript(data.text.trim());
}
}
} catch (err) {
console.error("[useVadRecorder] Transcription error:", err);
} finally {
setState("idle");
}
},
// eslint-disable-next-line react-hooks/exhaustive-deps
[opts.onTranscript],
);
const vad = useMicVAD({
startOnLoad: false,
baseAssetPath: "/",
onnxWASMBasePath: "/",
positiveSpeechThreshold: 0.8,
negativeSpeechThreshold: 0.65,
redemptionFrames: 8,
minSpeechFrames: 5,
onSpeechStart: () => {
// VAD detected start of speech — no action needed, state was set to "recording" in start()
},
onSpeechEnd: handleSpeechEnd,
});
const start = useCallback(async () => {
try {
// Request a separate stream reference for VoiceWaveform AnalyserNode
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaStreamRef.current = stream;
} catch (err) {
console.error("[useVadRecorder] Microphone access denied:", err);
return;
}
vad.start();
setState("recording");
}, [vad]);
const stop = useCallback(() => {
vad.pause();
// Stop the separate stream tracks
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
mediaStreamRef.current = null;
}
setState("idle");
}, [vad]);
return {
state,
start,
stop,
mediaStream: mediaStreamRef.current,
};
}