From a50daa2129f75b970a5c772eeb816eda63836c7f Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Sat, 4 Apr 2026 02:38:38 +0000 Subject: [PATCH] docs(37-03): complete voice output components plan --- .planning/REQUIREMENTS.md | 4 +- .planning/ROADMAP.md | 2 +- .planning/STATE.md | 14 +- .../37-web-chat-voice-ui/37-03-SUMMARY.md | 120 ++++++++++++++++++ 4 files changed, 131 insertions(+), 9 deletions(-) create mode 100644 .planning/phases/37-web-chat-voice-ui/37-03-SUMMARY.md diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index a73dec3d..848e980c 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -23,7 +23,7 @@ - [x] **WCHAT-03**: Real-time waveform/amplitude visualization displays while recording - [x] **WCHAT-04**: Voice response audio plays inline in chat message with audio player controls - [x] **WCHAT-05**: User can toggle voice mode: text only / voice input only / full voice (input + output) -- [ ] **WCHAT-06**: Auto-play of voice responses is configurable (on/off in settings) +- [x] **WCHAT-06**: Auto-play of voice responses is configurable (on/off in settings) ### Telegram Bridge @@ -85,7 +85,7 @@ | WCHAT-03 | Phase 37 | Complete | | WCHAT-04 | Phase 37 | Complete | | WCHAT-05 | Phase 37 | Complete | -| WCHAT-06 | Phase 37 | Pending | +| WCHAT-06 | Phase 37 | Complete | | TGRAM-01 | Phase 38 | Pending | | TGRAM-02 | Phase 38 | Pending | | TGRAM-03 | Phase 38 | Pending | diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 94a19671..877dbb27 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -222,6 +222,6 @@ All 23 v1.6 requirements are mapped to exactly one phase. No orphans. | 34. Voice | v1.5 | 2/2 | Complete | 2026-04-03 | | 35. npx buildthis CLI | v1.5 | 1/1 | Complete | 2026-04-03 | | 36. Voice Pipeline Foundation | v1.6 | 2/3 | Complete | 2026-04-04 | -| 37. Web Chat Voice UI | v1.6 | 2/4 | In Progress| | +| 37. Web Chat Voice UI | v1.6 | 3/4 | In Progress| | | 38. Telegram Bridge | v1.6 | 0/TBD | Not started | - | | 39. Voice Polish | v1.6 | 0/TBD | Not started | - | diff --git a/.planning/STATE.md b/.planning/STATE.md index 08b0bd69..0b0d37be 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -3,14 +3,14 @@ gsd_state_version: 1.0 milestone: v1.6 milestone_name: Voice Pipeline + Minimal Message Bridge status: executing -stopped_at: Completed 37-02-PLAN.md — Voice Recording Components (encodeWav, useVadRecorder, useVoiceMode, VoiceWaveform, VoiceMicButton) -last_updated: "2026-04-04T02:37:14.447Z" +stopped_at: Completed 37-03-PLAN.md — Voice Output Components (ChatVoicePlayer, ChatVoiceBadge, VoiceModeToggle) +last_updated: "2026-04-04T02:38:29.166Z" last_activity: 2026-04-04 progress: total_phases: 4 completed_phases: 1 total_plans: 7 - completed_plans: 5 + completed_plans: 6 percent: 0 --- @@ -26,7 +26,7 @@ See: .planning/PROJECT.md (updated 2026-04-03) ## Current Position Phase: 37 (web-chat-voice-ui) — EXECUTING -Plan: 3 of 4 +Plan: 4 of 4 Status: Ready to execute Last activity: 2026-04-04 @@ -63,6 +63,8 @@ Key constraints for v1.6: - [Phase 37]: VAD ONNX assets served from ui/public/ same-origin to avoid COEP blocking CDN-served binary files - [Phase 37]: useVadRecorder requests separate MediaStream ref for VoiceWaveform AnalyserNode — useMicVAD manages its own stream internally - [Phase 37]: AudioContext not closed on cleanup in VoiceWaveform — reused across recording cycles to avoid repeated autoplay unlock prompts +- [Phase 37]: useVoiceMode hook created in plan 37-03 to unblock VoiceModeToggle during parallel execution +- [Phase 37]: Auto-play preference stored in localStorage (nexus:voice:autoplay), not nexus-settings — avoids server round-trip for fast UX ### Pending Todos @@ -76,6 +78,6 @@ None yet. ## Session Continuity -Last session: 2026-04-04T02:37:14.444Z -Stopped at: Completed 37-02-PLAN.md — Voice Recording Components (encodeWav, useVadRecorder, useVoiceMode, VoiceWaveform, VoiceMicButton) +Last session: 2026-04-04T02:38:29.163Z +Stopped at: Completed 37-03-PLAN.md — Voice Output Components (ChatVoicePlayer, ChatVoiceBadge, VoiceModeToggle) Resume file: None diff --git a/.planning/phases/37-web-chat-voice-ui/37-03-SUMMARY.md b/.planning/phases/37-web-chat-voice-ui/37-03-SUMMARY.md new file mode 100644 index 00000000..40b92467 --- /dev/null +++ b/.planning/phases/37-web-chat-voice-ui/37-03-SUMMARY.md @@ -0,0 +1,120 @@ +--- +phase: 37-web-chat-voice-ui +plan: 03 +subsystem: ui +tags: [react, voice, audio, shadcn, lucide-react, localStorage, tailwind] + +# Dependency graph +requires: + - phase: 36-voice-pipeline-foundation + provides: POST /api/synthesize and POST /api/transcribe endpoints; voiceMode persisted via nexus-settings + - phase: 37-01 + provides: COOP/COEP headers for SharedArrayBuffer; VAD ONNX assets +provides: + - ChatVoicePlayer: inline audio player with play/pause, auto-play, blob URL cleanup + - ChatVoiceBadge: Voice badge + SPOKEN/DETAILED parsing + collapsible full markdown + - VoiceModeToggle: three-pill Text/Voice In/Full Voice toggle with nexus-settings persistence + - useVoiceMode: hook for reading/writing voiceMode via PATCH /api/nexus/settings +affects: [37-04, ChatMessage integration, ChatInput integration] + +# Tech tracking +tech-stack: + added: [] + patterns: + - Blob URL lifecycle: createObjectURL on fetch response, revokeObjectURL in onEnded and cleanup + - SPOKEN/DETAILED regex parsing for dual-format voice responses + - Three-pill toggle pattern with active/inactive Tailwind classes + role="group" + aria-label + +key-files: + created: + - ui/src/components/ChatVoicePlayer.tsx + - ui/src/components/ChatVoiceBadge.tsx + - ui/src/components/VoiceModeToggle.tsx + - ui/src/hooks/useVoiceMode.ts + modified: [] + +key-decisions: + - "useVoiceMode hook created in plan 37-03 (not 37-02) to unblock VoiceModeToggle — parallel execution" + - "ChatVoicePlayer fetches audio on text change, not on play — pre-loads audio for smoother UX" + - "Auto-play preference stored in localStorage (nexus:voice:autoplay), not nexus-settings — avoids server round-trip for fast UX" + +patterns-established: + - "Pattern 1: blob URL cleanup — always revokeObjectURL in both onEnded and effect cleanup to prevent memory leaks" + - "Pattern 2: voice badge renders differently per messageType — voice_input shows badge+text only; voice_full adds player+collapsible" + +requirements-completed: [WCHAT-04, WCHAT-05, WCHAT-06] + +# Metrics +duration: 2min +completed: 2026-04-04 +--- + +# Phase 37 Plan 03: Voice Output Components Summary + +**Inline audio player (ChatVoicePlayer), voice badge with collapsible markdown (ChatVoiceBadge), and three-pill mode toggle (VoiceModeToggle) — complete output-side voice UI** + +## Performance + +- **Duration:** 2 min +- **Started:** 2026-04-04T02:35:25Z +- **Completed:** 2026-04-04T02:37:21Z +- **Tasks:** 2 +- **Files modified:** 4 + +## Accomplishments +- ChatVoicePlayer: POST /api/synthesize → blob URL → native `