feat(37-02): encodeWav utility, useVadRecorder + useVoiceMode hooks

- encodeWav: 44-byte WAV header encoder (RIFF/WAVE/fmt/data), PCM mono 16-bit
- useVadRecorder: wraps useMicVAD with startOnLoad:false, auto-stop on speech end, POSTs to /api/transcribe
- useVoiceMode: reads/writes voiceMode from GET/PATCH /api/nexus/settings with optimistic update
This commit is contained in:
Nexus Dev 2026-04-04 02:35:27 +00:00
parent 16371f01f5
commit 0d0b17c8a0
3 changed files with 225 additions and 0 deletions

View file

@ -0,0 +1,98 @@
import { useState, useRef, useCallback } from "react";
import { useMicVAD } from "@ricky0123/vad-react";
import { encodeWav } from "../lib/encodeWav";
interface UseVadRecorderOptions {
onTranscript: (text: string) => void;
}
interface UseVadRecorderReturn {
state: "idle" | "recording" | "processing";
start: () => void;
stop: () => void;
mediaStream: MediaStream | null;
}
export function useVadRecorder(opts: UseVadRecorderOptions): UseVadRecorderReturn {
const [state, setState] = useState<"idle" | "recording" | "processing">("idle");
const mediaStreamRef = useRef<MediaStream | null>(null);
const handleSpeechEnd = useCallback(
async (audio: Float32Array) => {
vad.pause();
setState("processing");
try {
const wavBlob = encodeWav(audio);
const formData = new FormData();
formData.append("audio", wavBlob, "recording.wav");
const res = await fetch("/api/transcribe", {
method: "POST",
credentials: "include",
body: formData,
});
if (res.ok) {
const data = (await res.json()) as { text: string };
if (data.text && data.text.length >= 2) {
opts.onTranscript(data.text.trim());
}
}
} catch (err) {
console.error("[useVadRecorder] Transcription error:", err);
} finally {
setState("idle");
}
},
// eslint-disable-next-line react-hooks/exhaustive-deps
[opts.onTranscript],
);
const vad = useMicVAD({
startOnLoad: false,
baseAssetPath: "/",
onnxWASMBasePath: "/",
positiveSpeechThreshold: 0.8,
negativeSpeechThreshold: 0.65,
redemptionFrames: 8,
minSpeechFrames: 5,
onSpeechStart: () => {
// VAD detected start of speech — no action needed, state was set to "recording" in start()
},
onSpeechEnd: handleSpeechEnd,
});
const start = useCallback(async () => {
try {
// Request a separate stream reference for VoiceWaveform AnalyserNode
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaStreamRef.current = stream;
} catch (err) {
console.error("[useVadRecorder] Microphone access denied:", err);
return;
}
vad.start();
setState("recording");
}, [vad]);
const stop = useCallback(() => {
vad.pause();
// Stop the separate stream tracks
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
mediaStreamRef.current = null;
}
setState("idle");
}, [vad]);
return {
state,
start,
stop,
mediaStream: mediaStreamRef.current,
};
}

View file

@ -0,0 +1,71 @@
import { useState, useEffect } from "react";
type VoiceMode = "text" | "voice_input" | "full_voice";
interface UseVoiceModeReturn {
mode: VoiceMode;
setMode: (next: VoiceMode) => Promise<void>;
isLoading: boolean;
}
export function useVoiceMode(): UseVoiceModeReturn {
const [mode, setModeState] = useState<VoiceMode>("text");
const [isLoading, setIsLoading] = useState(true);
// Load current voiceMode from nexus-settings on mount
useEffect(() => {
let cancelled = false;
const load = async () => {
try {
const res = await fetch("/api/nexus/settings", {
credentials: "include",
});
if (res.ok && !cancelled) {
const data = (await res.json()) as { voiceMode?: string };
const raw = data.voiceMode;
if (raw === "voice_input" || raw === "full_voice" || raw === "text") {
setModeState(raw as VoiceMode);
}
}
} catch (err) {
console.error("[useVoiceMode] Failed to load settings:", err);
} finally {
if (!cancelled) {
setIsLoading(false);
}
}
};
load();
return () => {
cancelled = true;
};
}, []);
const setMode = async (next: VoiceMode): Promise<void> => {
const previous = mode;
// Optimistic update
setModeState(next);
try {
const res = await fetch("/api/nexus/settings", {
method: "PATCH",
credentials: "include",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ voiceMode: next }),
});
if (!res.ok) {
throw new Error(`PATCH /api/nexus/settings returned ${res.status}`);
}
} catch (err) {
console.error("[useVoiceMode] Failed to update voiceMode:", err);
// Revert on error
setModeState(previous);
}
};
return { mode, setMode, isLoading };
}

56
ui/src/lib/encodeWav.ts Normal file
View file

@ -0,0 +1,56 @@
/**
* Encodes a Float32Array of audio samples (mono, 16kHz) into a WAV Blob.
*
* WAV format: 44-byte header + PCM 16-bit samples
* - RIFF chunk: "RIFF", file size, "WAVE"
* - fmt chunk: PCM (1), mono (1), 16kHz, 16-bit depth
* - data chunk: raw PCM samples
*/
function writeString(view: DataView, offset: number, str: string): void {
for (let i = 0; i < str.length; i++) {
view.setUint8(offset + i, str.charCodeAt(i));
}
}
export function encodeWav(samples: Float32Array, sampleRate = 16000): Blob {
const numChannels = 1;
const bitsPerSample = 16;
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
const blockAlign = (numChannels * bitsPerSample) / 8;
const dataLength = samples.length * 2; // 2 bytes per 16-bit sample
const headerLength = 44;
const buffer = new ArrayBuffer(headerLength + dataLength);
const view = new DataView(buffer);
// RIFF chunk
writeString(view, 0, "RIFF");
view.setUint32(4, 36 + dataLength, true); // file size - 8
writeString(view, 8, "WAVE");
// fmt chunk
writeString(view, 12, "fmt ");
view.setUint32(16, 16, true); // chunk size (16 for PCM)
view.setUint16(20, 1, true); // PCM format
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
// data chunk
writeString(view, 36, "data");
view.setUint32(40, dataLength, true);
// Write PCM samples — clamp to [-1, 1] then convert to int16
let offset = 44;
for (let i = 0; i < samples.length; i++) {
const clamped = Math.max(-1, Math.min(1, samples[i]));
const int16 = clamped < 0 ? clamped * 0x8000 : clamped * 0x7fff;
view.setInt16(offset, Math.round(int16), true);
offset += 2;
}
return new Blob([buffer], { type: "audio/wav" });
}