From b32e8029c064822c7feb79f8b8f7b10f0ef567ea Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Sat, 4 Apr 2026 02:52:55 +0000 Subject: [PATCH] fix(37): pass voiceMode in ChatPanel handleEdit path + add verification --- .../37-web-chat-voice-ui/37-VERIFICATION.md | 200 ++++++++++++++++++ ui/src/components/ChatPanel.tsx | 2 +- 2 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 .planning/phases/37-web-chat-voice-ui/37-VERIFICATION.md diff --git a/.planning/phases/37-web-chat-voice-ui/37-VERIFICATION.md b/.planning/phases/37-web-chat-voice-ui/37-VERIFICATION.md new file mode 100644 index 00000000..435c0857 --- /dev/null +++ b/.planning/phases/37-web-chat-voice-ui/37-VERIFICATION.md @@ -0,0 +1,200 @@ +--- +phase: 37-web-chat-voice-ui +verified: 2026-04-03T12:00:00Z +status: gaps_found +score: 7/8 must-haves verified +re_verification: false +gaps: + - truth: "Full voice flow works end-to-end: mic -> VAD -> transcribe -> stream -> voice badge + audio" + status: partial + reason: "One of five startStream call sites (in-place edit path in handleEdit) is missing the voiceMode argument. When a user edits a message in place (no subsequent messages), the stream is initiated without voiceMode, so voice mode is not sent to the server for that specific interaction path." + artifacts: + - path: "ui/src/components/ChatPanel.tsx" + issue: "Line 231: startStream(newContent, activeAgentId ?? undefined) — missing third voiceMode argument in the else branch of handleEdit (in-place edit, no branching)" + missing: + - "Pass voiceMode as third argument: startStream(newContent, activeAgentId ?? undefined, voiceMode) in the in-place edit branch of handleEdit" +human_verification: + - test: "Verify waveform renders during recording" + expected: "After clicking the mic button, an animated canvas with vertical bars appears inside the button ring during recording" + why_human: "VoiceWaveform reads from Web Audio API AnalyserNode in a requestAnimationFrame loop — cannot verify animated canvas output programmatically" + - test: "Verify VAD auto-stop triggers transcription" + expected: "After speaking and then being silent for ~1 second, recording stops automatically, the mic button shows a spinner, and the transcribed text appears in the input field" + why_human: "Requires actual microphone input and silence detection from Silero VAD ONNX model running in an AudioWorklet — not testable without a browser" + - test: "Verify voice_full response plays audio automatically" + expected: "In Full Voice mode, after receiving an assistant response, the audio player auto-plays and the spoken text is shown above a collapsible 'Show full response' section" + why_human: "Requires TTS (synthesize endpoint via whisper/piper binaries), browser audio playback, and localStorage autoplay toggle — not testable statically" + - test: "Verify VoiceModeToggle persists across page refresh" + expected: "Selecting 'Full Voice' pill, refreshing the page, and re-opening chat shows 'Full Voice' still selected" + why_human: "Requires server-side nexus-settings PATCH round-trip and page reload — not testable statically" +--- + +# Phase 37: Web Chat Voice UI Verification Report + +**Phase Goal:** Users can speak to any agent in web chat — recording auto-stops on silence, a live waveform confirms the mic is active, responses play back automatically (toggleable), and voice mode is a first-class setting + +**Verified:** 2026-04-03T12:00:00Z +**Status:** gaps_found (1 minor gap + 4 human verification items) +**Re-verification:** No — initial verification + +**Note on worktrees:** All phase 37 code was committed to the `gsd/phase-37-web-chat-voice-ui` branch, not the current worktree branch. All verification was performed against `gsd/phase-37-web-chat-voice-ui` via `git show` and `git cat-file`. The branch exists and contains all phase commits up through `c294277b` (docs(37-04)). + +--- + +## Goal Achievement + +### Observable Truths + +| # | Truth | Status | Evidence | +|---|-------|--------|----------| +| 1 | POST /api/transcribe accepts audio upload and returns { text } | VERIFIED | `server/src/routes/voice.ts` uses multer memoryStorage, calls `voicePipelineService().transcribe()`, returns `res.json(result)` where result is `{ text: string; language?: string }` | +| 2 | POST /api/synthesize accepts { text } and returns audio/wav | VERIFIED | `server/src/routes/voice.ts` calls `voicePipelineService().synthesize(text, voiceId)`, sends buffer with `Content-Type: audio/wav` | +| 3 | Recording auto-stops on silence via VAD onSpeechEnd callback | VERIFIED | `useVadRecorder.ts` uses `useMicVAD` with `startOnLoad: false`, `onSpeechEnd: handleSpeechEnd` — handler calls `vad.pause()`, POSTs WAV to `/api/transcribe`, calls `opts.onTranscript` | +| 4 | Live waveform canvas renders animated bars during recording | VERIFIED | `VoiceWaveform.tsx` uses `canvas`, `createAnalyser()` with `fftSize=64`, `getByteFrequencyData()`, `requestAnimationFrame` loop — wired into `VoiceMicButton` recording state | +| 5 | Voice response audio plays inline with play/pause and auto-play | VERIFIED | `ChatVoicePlayer.tsx` POSTs to `/api/synthesize`, creates object URL, renders `