From c0710c7100d6091c03e72e1b7998f5b14885c46c Mon Sep 17 00:00:00 2001 From: Nexus Dev Date: Fri, 3 Apr 2026 22:29:45 +0000 Subject: [PATCH] docs(34-voice): create phase plan --- .planning/ROADMAP.md | 9 +- .planning/phases/34-voice/34-01-PLAN.md | 332 ++++++++++++++++++++ .planning/phases/34-voice/34-02-PLAN.md | 396 ++++++++++++++++++++++++ 3 files changed, 735 insertions(+), 2 deletions(-) create mode 100644 .planning/phases/34-voice/34-01-PLAN.md create mode 100644 .planning/phases/34-voice/34-02-PLAN.md diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index e21bb7cb..58a62af9 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -172,7 +172,12 @@ Plans: 1. On a CPU-only machine (no GPU), enabling Piper TTS in the assistant produces audible speech output within a reasonable time after the first synthesis (not a silent hang) 2. When Piper's WASM voice model is downloading for the first time, a visible progress indicator is shown before the TTS toggle is enabled; the download completes and TTS works without a page reload 3. The onboarding voice step offers Whisper STT and Piper TTS toggles only when the hardware detection step has confirmed sufficient capability; on hardware below the threshold, the step is skipped or shows a capability warning -**Plans**: TBD +**Plans**: 2 plans + +Plans: +- [ ] 34-01-PLAN.md — Fix /transcribe route registration, Piper TTS hook + TtsButton, voiceEnabled in nexus-settings +- [ ] 34-02-PLAN.md — VoiceStep onboarding component, wizard step insertion, PersonalAssistant voice wiring +**UI hint**: yes ### Phase 35: npx buildthis CLI **Goal**: A developer can run `npx buildthis` on a fresh machine and either open an already-running Nexus or be guided through install — with the same hardware detection and provider tiering as the web onboarding @@ -233,5 +238,5 @@ All 21 v1.5 requirements are mapped to exactly one phase. No orphans. | 31. Puter.js Zero-Config Cloud | v1.5 | 4/4 | Complete | 2026-04-03 | | 32. Multi-Step Onboarding Wizard | v1.5 | 1/1 | Complete | 2026-04-03 | | 33. Persistent Memory + Personal Assistant Mode | v1.5 | 3/3 | Complete | 2026-04-03 | -| 34. Voice | v1.5 | 0/TBD | Not started | - | +| 34. Voice | v1.5 | 0/2 | Not started | - | | 35. npx buildthis CLI | v1.5 | 0/TBD | Not started | - | diff --git a/.planning/phases/34-voice/34-01-PLAN.md b/.planning/phases/34-voice/34-01-PLAN.md new file mode 100644 index 00000000..8e7dac45 --- /dev/null +++ b/.planning/phases/34-voice/34-01-PLAN.md @@ -0,0 +1,332 @@ +--- +phase: 34-voice +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - server/src/app.ts + - server/src/services/nexus-settings.ts + - server/src/routes/nexus-settings.ts + - ui/src/api/hardware.ts + - ui/src/hooks/usePiperTts.ts + - ui/src/components/TtsButton.tsx +autonomous: true +requirements: + - VOICE-01 + - VOICE-02 + +must_haves: + truths: + - "POST /api/transcribe is reachable and returns 503 with descriptive error when no Whisper CLI is installed" + - "usePiperTts hook exposes prewarm/speak/status/progress and transitions idle->downloading->ready->speaking" + - "TtsButton renders a speaker icon that calls speak() and shows download progress during prewarm" + - "voiceEnabled boolean is persisted in nexus-settings.json and exposed via GET/PATCH /nexus/settings" + artifacts: + - path: "ui/src/hooks/usePiperTts.ts" + provides: "Piper TTS hook with prewarm, speak, status, progress" + exports: ["usePiperTts"] + - path: "ui/src/components/TtsButton.tsx" + provides: "Speaker button component for TTS playback" + exports: ["TtsButton"] + key_links: + - from: "server/src/app.ts" + to: "server/src/routes/chat-files.ts" + via: "api.use(chatFileRoutes(db, opts.storageService))" + pattern: "chatFileRoutes" + - from: "ui/src/hooks/usePiperTts.ts" + to: "@mintplex-labs/piper-tts-web" + via: "import { tts }" + pattern: "tts\\.download|tts\\.predict" +--- + + +Fix the broken /transcribe route registration, create the Piper TTS browser hook and button component, and add voiceEnabled to nexus-settings persistence. + +Purpose: VOICE-01 requires TTS on CPU-only hardware (browser WASM satisfies this). VOICE-02 requires visible download progress before first synthesis. The /transcribe route exists but is never mounted — a 1-line fix. voiceEnabled persistence is needed so onboarding voice opt-in survives sessions. + +Output: Working /api/transcribe endpoint, usePiperTts hook, TtsButton component, voiceEnabled in nexus-settings. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/34-voice/34-RESEARCH.md + +@server/src/app.ts +@server/src/routes/chat-files.ts +@server/src/services/nexus-settings.ts +@ui/src/api/hardware.ts +@ui/src/components/VoiceRecordButton.tsx + + + + +From server/src/routes/chat-files.ts: +```typescript +export function chatFileRoutes(db: Db, storage: StorageService) { ... } +// POST /transcribe — accepts multipart audio, returns { text: string } or 503 +``` + +From server/src/app.ts (line 147 pattern): +```typescript +api.use(assetRoutes(db, opts.storageService)); +// chatFileRoutes uses the same (db, opts.storageService) signature +``` + +From server/src/services/nexus-settings.ts: +```typescript +export const NEXUS_MODES = ["personal_ai", "project_builder", "both"] as const; +export type NexusMode = (typeof NEXUS_MODES)[number]; +const nexusSettingsSchema = z.object({ + mode: z.enum(NEXUS_MODES).default("both"), +}); +export function nexusSettingsService() { get(), set(patch) } +``` + +From ui/src/api/hardware.ts: +```typescript +export type NexusMode = "personal_ai" | "project_builder" | "both"; +export interface NexusSettings { mode: NexusMode; } +export function fetchNexusSettings(): Promise; +export function updateNexusSettings(settings: Partial): Promise; +``` + + + + + + + Task 1: Register chatFileRoutes in app.ts and add voiceEnabled to nexus-settings + server/src/app.ts, server/src/services/nexus-settings.ts, server/src/routes/nexus-settings.ts, ui/src/api/hardware.ts + + - server/src/app.ts (full file — find insertion point after assistantHandoffRoutes) + - server/src/services/nexus-settings.ts (full file — understand schema) + - server/src/routes/nexus-settings.ts (full file — understand PATCH handler) + - ui/src/api/hardware.ts (full file — understand client types) + + +**1. Register chatFileRoutes in app.ts:** +- Add import at top with other route imports: `import { chatFileRoutes } from "./routes/chat-files.js";` +- Add `api.use(chatFileRoutes(db, opts.storageService));` after the `api.use(assistantHandoffRoutes(db));` line (around line 161). Mirror the `assetRoutes(db, opts.storageService)` pattern exactly. +- Do NOT place it before boardMutationGuard — the /transcribe route calls assertBoard(req) and needs to be inside the guarded api sub-router. + +**2. Add voiceEnabled to nexusSettingsSchema (server/src/services/nexus-settings.ts):** +- Add `voiceEnabled: z.boolean().default(false)` to the nexusSettingsSchema z.object. +- This is a file-backed JSON field, NOT a DB migration — acceptable under the "no DB schema changes" constraint. + +**3. Update NexusSettings type on client (ui/src/api/hardware.ts):** +- Add `voiceEnabled?: boolean` to the `NexusSettings` interface. +- No changes to API functions needed — they already handle Partial. + +**4. Check nexus-settings route handler (server/src/routes/nexus-settings.ts):** +- Read the file. The PATCH handler should already forward arbitrary fields to `nexusSettingsService().set(patch)` since it uses the Zod schema. If it manually picks fields, add voiceEnabled to the pick list. If it passes req.body through, no change needed. + + + cd /opt/nexus && npx vitest run server/src/__tests__/chat-file-routes.test.ts 2>&1 | tail -5 + + + - grep -q "chatFileRoutes" server/src/app.ts returns 0 + - grep -q "voiceEnabled" server/src/services/nexus-settings.ts returns 0 + - grep -q "voiceEnabled" ui/src/api/hardware.ts returns 0 + + POST /api/transcribe is reachable (returns 503 when no Whisper CLI installed, not 404). voiceEnabled persists in nexus-settings.json via the existing settings route. + + + + Task 2: Create usePiperTts hook and TtsButton component + ui/src/hooks/usePiperTts.ts, ui/src/components/TtsButton.tsx + + - ui/src/components/VoiceRecordButton.tsx (reference for button style patterns) + - ui/src/components/ui/button.tsx (Button component API) + + +**0. Install piper-tts-web:** +```bash +pnpm --filter @paperclipai/ui add @mintplex-labs/piper-tts-web +``` + +**1. Create ui/src/hooks/usePiperTts.ts:** +```typescript +import { useState, useCallback, useRef } from "react"; +import { tts } from "@mintplex-labs/piper-tts-web"; + +const DEFAULT_VOICE = "en_US-hfc_female-medium"; + +export type TtsStatus = "idle" | "downloading" | "ready" | "speaking" | "error"; + +export function usePiperTts() { + const [status, setStatus] = useState("idle"); + const [progress, setProgress] = useState(0); + const audioRef = useRef(null); + + const prewarm = useCallback(async () => { + if (status === "ready" || status === "downloading") return; + setStatus("downloading"); + setProgress(0); + try { + const stored = await tts.stored(); + if (!stored.includes(DEFAULT_VOICE)) { + await tts.download(DEFAULT_VOICE, (p: { loaded: number; total: number }) => { + setProgress(Math.round((p.loaded / p.total) * 100)); + }); + } + setStatus("ready"); + setProgress(100); + } catch { + setStatus("error"); + } + }, [status]); + + const speak = useCallback(async (text: string) => { + if (status !== "ready") return; + // Stop any currently playing audio + if (audioRef.current) { + audioRef.current.pause(); + audioRef.current = null; + } + setStatus("speaking"); + try { + const wav = await tts.predict({ text, voiceId: DEFAULT_VOICE }); + const audio = new Audio(wav); + audioRef.current = audio; + audio.onended = () => { + audioRef.current = null; + setStatus("ready"); + }; + audio.onerror = () => { + audioRef.current = null; + setStatus("ready"); + }; + await audio.play(); + } catch { + setStatus("ready"); + } + }, [status]); + + const stop = useCallback(() => { + if (audioRef.current) { + audioRef.current.pause(); + audioRef.current = null; + } + if (status === "speaking") setStatus("ready"); + }, [status]); + + return { status, progress, prewarm, speak, stop }; +} +``` + +Key points: +- `tts.stored()` checks IndexedDB cache — skips download if model already present (VOICE-02). +- `tts.download()` with progress callback provides visible download progress (VOICE-02). +- `tts.predict()` returns a Blob URL (WAV) — use `new Audio(url).play()` (VOICE-01, CPU-safe WASM). +- `stop()` allows interrupting playback. +- Do NOT import this in any server-side or test file running in Node — browser-only. + +**2. Create ui/src/components/TtsButton.tsx:** +```typescript +import { Volume2, VolumeX, Loader2 } from "lucide-react"; +import { Button } from "./ui/button"; +import type { TtsStatus } from "../hooks/usePiperTts"; + +interface TtsButtonProps { + status: TtsStatus; + progress: number; + onSpeak: () => void; + onStop: () => void; + onPrewarm: () => void; + disabled?: boolean; +} + +export function TtsButton({ status, progress, onSpeak, onStop, onPrewarm, disabled }: TtsButtonProps) { + if (status === "downloading") { + return ( + + ); + } + + if (status === "speaking") { + return ( + + ); + } + + // idle or error: clicking triggers prewarm then speak + // ready: clicking triggers speak directly + const handleClick = () => { + if (status === "ready") { + onSpeak(); + } else { + onPrewarm(); + } + }; + + return ( + + ); +} +``` + +The TtsButton receives status/progress from the hook and delegates actions. It does NOT import piper-tts-web directly — all TTS logic stays in the hook. The button is reusable: PersonalAssistant (Plan 02) will place it next to assistant messages. + + + cd /opt/nexus && grep -q "usePiperTts" ui/src/hooks/usePiperTts.ts && grep -q "TtsButton" ui/src/components/TtsButton.tsx && grep -q "piper-tts-web" ui/package.json 2>/dev/null || grep -q "piper-tts-web" pnpm-lock.yaml && echo "PASS" || echo "FAIL" + + + - grep -q "tts.download" ui/src/hooks/usePiperTts.ts returns 0 + - grep -q "tts.predict" ui/src/hooks/usePiperTts.ts returns 0 + - grep -q "tts.stored" ui/src/hooks/usePiperTts.ts returns 0 + - grep -q "TtsButton" ui/src/components/TtsButton.tsx returns 0 + - grep -q "piper-tts-web" pnpm-lock.yaml returns 0 + - grep -q "Volume2" ui/src/components/TtsButton.tsx returns 0 + + usePiperTts hook handles download progress (VOICE-02) and CPU-safe WASM synthesis (VOICE-01). TtsButton shows download progress during prewarm and speaker icon for playback. piper-tts-web is installed as a UI dependency. + + + + + +- `grep -q "chatFileRoutes" server/src/app.ts` — route is registered +- `grep -q "voiceEnabled" server/src/services/nexus-settings.ts` — settings schema extended +- `ls ui/src/hooks/usePiperTts.ts ui/src/components/TtsButton.tsx` — both files exist +- `npx vitest run server/src/__tests__/chat-file-routes.test.ts` — existing route tests pass + + + +1. POST /api/transcribe returns 503 (not 404) when no Whisper CLI is installed — route is mounted +2. usePiperTts hook exports prewarm(), speak(), stop(), status, progress +3. TtsButton renders download progress during prewarm and speaker icon for playback +4. voiceEnabled persists in nexus-settings.json + + + +After completion, create `.planning/phases/34-voice/34-01-SUMMARY.md` + diff --git a/.planning/phases/34-voice/34-02-PLAN.md b/.planning/phases/34-voice/34-02-PLAN.md new file mode 100644 index 00000000..07c363ba --- /dev/null +++ b/.planning/phases/34-voice/34-02-PLAN.md @@ -0,0 +1,396 @@ +--- +phase: 34-voice +plan: 02 +type: execute +wave: 2 +depends_on: ["34-01"] +files_modified: + - ui/src/components/onboarding/VoiceStep.tsx + - ui/src/components/NexusOnboardingWizard.tsx + - ui/src/pages/PersonalAssistant.tsx +autonomous: true +requirements: + - VOICE-03 + +must_haves: + truths: + - "Onboarding wizard shows a voice opt-in step (step 4) with enable/skip buttons" + - "Step numbering shifts correctly: rootDir is step 5, summary is step 6" + - "VoiceRecordButton appears in PersonalAssistant input bar for STT" + - "TtsButton appears next to assistant messages in PersonalAssistant for TTS playback" + - "Voice step detects microphone availability and shows appropriate messaging" + artifacts: + - path: "ui/src/components/onboarding/VoiceStep.tsx" + provides: "Voice opt-in onboarding step" + exports: ["VoiceStep"] + - path: "ui/src/components/NexusOnboardingWizard.tsx" + provides: "6-step wizard with voice at step 4" + - path: "ui/src/pages/PersonalAssistant.tsx" + provides: "Full-page assistant chat with voice input and TTS" + key_links: + - from: "ui/src/components/NexusOnboardingWizard.tsx" + to: "ui/src/components/onboarding/VoiceStep.tsx" + via: "import and render at step 4" + pattern: "VoiceStep" + - from: "ui/src/pages/PersonalAssistant.tsx" + to: "ui/src/components/VoiceRecordButton.tsx" + via: "import and render in input bar" + pattern: "VoiceRecordButton" + - from: "ui/src/pages/PersonalAssistant.tsx" + to: "ui/src/hooks/usePiperTts.ts" + via: "usePiperTts hook for TTS playback" + pattern: "usePiperTts" +--- + + +Add the onboarding voice step and wire VoiceRecordButton + TtsButton into PersonalAssistant. + +Purpose: VOICE-03 requires voice features offered during onboarding based on hardware capability. The PersonalAssistant is the primary chat surface for v1.5 and must have both STT (VoiceRecordButton) and TTS (TtsButton) controls. + +Output: VoiceStep component, updated 6-step wizard, PersonalAssistant with voice I/O. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/34-voice/34-RESEARCH.md +@.planning/phases/34-voice/34-01-SUMMARY.md + +@ui/src/components/NexusOnboardingWizard.tsx +@ui/src/pages/PersonalAssistant.tsx +@ui/src/components/VoiceRecordButton.tsx + + + + +From ui/src/hooks/usePiperTts.ts (created in Plan 01): +```typescript +export type TtsStatus = "idle" | "downloading" | "ready" | "speaking" | "error"; +export function usePiperTts(): { + status: TtsStatus; + progress: number; + prewarm: () => Promise; + speak: (text: string) => Promise; + stop: () => void; +}; +``` + +From ui/src/components/TtsButton.tsx (created in Plan 01): +```typescript +interface TtsButtonProps { + status: TtsStatus; + progress: number; + onSpeak: () => void; + onStop: () => void; + onPrewarm: () => void; + disabled?: boolean; +} +export function TtsButton(props: TtsButtonProps): JSX.Element; +``` + +From ui/src/api/hardware.ts (updated in Plan 01): +```typescript +export interface NexusSettings { + mode: NexusMode; + voiceEnabled?: boolean; +} +export function updateNexusSettings(settings: Partial): Promise; +``` + +From ui/src/components/VoiceRecordButton.tsx (existing): +```typescript +interface VoiceRecordButtonProps { + onTranscription: (text: string) => void; + disabled?: boolean; +} +export function VoiceRecordButton(props: VoiceRecordButtonProps): JSX.Element; +``` + +From ui/src/components/NexusOnboardingWizard.tsx (existing step structure): +- Step 1: hardware detection +- Step 2: mode selection +- Step 3: provider selection +- Step 4: root directory (will become step 5) +- Step 5: summary (will become step 6) +- Step state: `const [step, setStep] = useState(1);` +- Label: `{step === 5 ? "Summary" : \`Step ${step} of 4\`}` + + + + + + + Task 1: Create VoiceStep component and insert into NexusOnboardingWizard as step 4 + ui/src/components/onboarding/VoiceStep.tsx, ui/src/components/NexusOnboardingWizard.tsx + + - ui/src/components/NexusOnboardingWizard.tsx (full file — understand all setStep calls and step rendering) + - ui/src/components/onboarding/ModeSelector.tsx (reference for onboarding step component patterns) + - ui/src/components/onboarding/HardwareSummaryStep.tsx (reference for step component patterns) + - ui/src/api/hardware.ts (NexusSettings type with voiceEnabled) + + +**1. Create ui/src/components/onboarding/VoiceStep.tsx:** + +```tsx +import { useEffect, useState } from "react"; +import { Mic, Volume2 } from "lucide-react"; +import { Button } from "@/components/ui/button"; + +interface VoiceStepProps { + onEnable: () => void; + onSkip: () => void; +} + +export function VoiceStep({ onEnable, onSkip }: VoiceStepProps) { + const [micAvailable, setMicAvailable] = useState(null); + + useEffect(() => { + navigator.mediaDevices?.enumerateDevices() + .then(devices => setMicAvailable(devices.some(d => d.kind === "audioinput"))) + .catch(() => setMicAvailable(false)); + }, []); + + return ( +
+
+
+ +
+

Speech-to-Text (Whisper)

+

+ {micAvailable === false + ? "No microphone detected — unavailable" + : micAvailable === true + ? "Microphone detected — speak to your assistant" + : "Checking microphone..."} +

+
+
+ +
+ +
+

Text-to-Speech (Piper)

+

+ Hear responses read aloud. Runs entirely on your device — no server needed. +

+
+
+
+ +
+ + +
+
+ ); +} +``` + +**2. Update NexusOnboardingWizard.tsx — insert step 4 (voice), shift steps:** + +This is a precise step-number shift. Do a full audit of all `setStep(N)` calls and update: + +a. Add imports at top: + - `import { VoiceStep } from "./onboarding/VoiceStep";` + - `import { updateNexusSettings } from "../api/hardware";` (already imported) + +b. Add voiceEnabled state: + ```typescript + const [voiceEnabled, setVoiceEnabled] = useState(false); + ``` + +c. Step number shift — ALL occurrences: + - Old step 4 (rootDir) becomes step 5 + - Old step 5 (summary) becomes step 6 + - Every `setStep(4)` that meant "go to rootDir" becomes `setStep(5)` + - Every `setStep(5)` that meant "go to summary" becomes `setStep(6)` + - The Back button on old step 4 (rootDir) that said `setStep(3)` becomes `setStep(4)` (back to voice) + - Old step 3 (provider) onSkip/onContinue `setStep(4)` becomes `setStep(4)` (now goes to voice, not rootDir) — no change needed here since 4 IS the voice step + +d. Step indicator label: + - Change `{step === 5 ? "Summary" : \`Step ${step} of 4\`}` to `{step === 6 ? "Summary" : \`Step ${step} of 5\`}` + +e. Reset voiceEnabled in the cleanup useEffect: + - Add `setVoiceEnabled(false);` alongside other resets + +f. Add step 4 rendering block (voice) — insert between step 3 and the rootDir step: + ```tsx + {/* Step 4 — Voice */} + {step === 4 && ( + <> +
+

+ Voice features +

+

+ Speak to your assistant and hear responses read aloud. Runs entirely on your device. +

+
+ + { + setVoiceEnabled(true); + setStep(5); + }} + onSkip={() => setStep(5)} + /> + + + + )} + ``` + +g. Persist voiceEnabled in createWorkspace() — add after the existing mode save: + ```typescript + // Persist voice preference — non-blocking + if (voiceEnabled) { + try { + await updateNexusSettings({ voiceEnabled: true }); + } catch { + // Non-blocking + } + } + ``` + +h. Update old step 4 comment to say "Step 5 — Root Directory (was step 4)" + Update old step 5 comment to say "Step 6 — Summary (was step 5)" + +**Step number audit checklist (verify each):** +- Step 3 provider: onSkip → `setStep(4)` (voice) -- was already 4, now means voice +- Step 3 provider: onContinue → `setStep(4)` (voice) -- same +- Step 4 (NEW voice): Enable → `setStep(5)`, Skip → `setStep(5)`, Back → `setStep(3)` +- Step 5 (was 4, rootDir): "Review & finish" → `setStep(6)`, Back → `setStep(4)` (voice), "Skip to summary" → `setStep(6)` +- Step 6 (was 5, summary): onBack → `setStep(5)` (rootDir) +- Step rendering: `{step === 4 && ...}` for rootDir becomes `{step === 5 && ...}` +- Step rendering: `{step === 5 && ...}` for summary becomes `{step === 6 && ...}` +
+ + cd /opt/nexus && grep -c "setStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "step === 4" ui/src/components/NexusOnboardingWizard.tsx && grep -q "Step 4" ui/src/components/onboarding/VoiceStep.tsx 2>/dev/null; grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx && echo "PASS" || echo "FAIL" + + + - grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx returns 0 + - grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx returns 0 + - grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx returns 0 (summary is now step 6) + - grep -q "Step.*of 5" ui/src/components/NexusOnboardingWizard.tsx returns 0 (label updated from "of 4") + - grep -q "voiceEnabled" ui/src/components/NexusOnboardingWizard.tsx returns 0 + - grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx returns 0 + + Onboarding wizard has 6 steps with voice at step 4. VoiceStep probes mic availability and offers enable/skip. voiceEnabled is persisted on workspace creation. All setStep() calls use correct updated numbers. +
+ + + Task 2: Wire VoiceRecordButton and TtsButton into PersonalAssistant + ui/src/pages/PersonalAssistant.tsx + + - ui/src/pages/PersonalAssistant.tsx (full file — understand input bar and message rendering) + - ui/src/components/VoiceRecordButton.tsx (props interface) + - ui/src/components/TtsButton.tsx (props interface, from Plan 01) + - ui/src/hooks/usePiperTts.ts (hook API, from Plan 01) + + +**1. Add imports to PersonalAssistant.tsx:** +```typescript +import { VoiceRecordButton } from "@/components/VoiceRecordButton"; +import { TtsButton } from "@/components/TtsButton"; +import { usePiperTts } from "../hooks/usePiperTts"; +import { Volume2 } from "lucide-react"; +``` + +**2. Add usePiperTts hook in PersonalAssistant component body:** +```typescript +const { status: ttsStatus, progress: ttsProgress, prewarm, speak, stop } = usePiperTts(); +``` + +**3. Add VoiceRecordButton to the input bar:** +In the input bar section (`{selectedConvId && (
...
)}`), add VoiceRecordButton inside the `
` container, between the textarea and Send button: + +```tsx + setInputValue((prev) => prev ? prev + " " + text : text)} + disabled={isSending} +/> +``` + +The onTranscription callback appends transcribed text to the input field (does not auto-send). This lets users review before sending. + +**4. Add TtsButton next to assistant messages in MessageBubble:** +Modify the MessageBubble component to accept an optional `onSpeak` callback and show a TtsButton for assistant messages: + +Actually, a cleaner approach: add the TtsButton inline where messages are rendered, not inside MessageBubble (to avoid prop drilling the hook through). In the messages.map section, render a small TTS button after each assistant message: + +```tsx +{messages.map((msg) => ( +
+ + {msg.role === "assistant" && msg.content && ( +
+ speak(msg.content)} + onStop={stop} + onPrewarm={prewarm} + /> +
+ )} +
+))} +``` + +The `pl-10` aligns the button under the message bubble (past the avatar). The `-mt-1 mb-1` tucks it close. + +**5. Auto-prewarm TTS when PersonalAssistant mounts (optional optimization):** +Do NOT auto-prewarm. Let the user trigger it on first click of any TtsButton. This avoids unexpected downloads. + + + cd /opt/nexus && grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx && grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx && grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx && echo "PASS" || echo "FAIL" + + + - grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx returns 0 + - grep -q "onTranscription" ui/src/pages/PersonalAssistant.tsx returns 0 + - grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx returns 0 + - grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx returns 0 + - grep -q "speak" ui/src/pages/PersonalAssistant.tsx returns 0 + + PersonalAssistant has VoiceRecordButton in the input bar (STT via /api/transcribe) and TtsButton next to each assistant message (TTS via Piper WASM). Voice input appends to textarea for review before sending. + + + + + +- `grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx` — voice step integrated +- `grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx` — summary correctly at step 6 +- `grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx` — STT wired +- `grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx` — TTS wired +- `grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx` — mic detection + + + +1. Onboarding wizard has voice at step 4 with mic detection and enable/skip (VOICE-03) +2. Steps 5 (rootDir) and 6 (summary) work with correct Back/Continue navigation +3. PersonalAssistant has VoiceRecordButton for STT input +4. PersonalAssistant has TtsButton for TTS playback on assistant messages +5. voiceEnabled preference is persisted when user enables voice during onboarding + + + +After completion, create `.planning/phases/34-voice/34-02-SUMMARY.md` +