nexus/.planning/milestones/v1.5-phases/34-voice/34-02-PLAN.md

---
phase: 34-voice
plan: 02
type: execute
wave: 2
depends_on: ["34-01"]
files_modified:
  - ui/src/components/onboarding/VoiceStep.tsx
  - ui/src/components/NexusOnboardingWizard.tsx
  - ui/src/pages/PersonalAssistant.tsx
autonomous: true
requirements:
  - VOICE-03

must_haves:
  truths:
    - "Onboarding wizard shows a voice opt-in step (step 4) with enable/skip buttons"
    - "Step numbering shifts correctly: rootDir is step 5, summary is step 6"
    - "VoiceRecordButton appears in PersonalAssistant input bar for STT"
    - "TtsButton appears next to assistant messages in PersonalAssistant for TTS playback"
    - "Voice step detects microphone availability and shows appropriate messaging"
  artifacts:
    - path: "ui/src/components/onboarding/VoiceStep.tsx"
      provides: "Voice opt-in onboarding step"
      exports: ["VoiceStep"]
    - path: "ui/src/components/NexusOnboardingWizard.tsx"
      provides: "6-step wizard with voice at step 4"
    - path: "ui/src/pages/PersonalAssistant.tsx"
      provides: "Full-page assistant chat with voice input and TTS"
  key_links:
    - from: "ui/src/components/NexusOnboardingWizard.tsx"
      to: "ui/src/components/onboarding/VoiceStep.tsx"
      via: "import and render at step 4"
      pattern: "VoiceStep"
    - from: "ui/src/pages/PersonalAssistant.tsx"
      to: "ui/src/components/VoiceRecordButton.tsx"
      via: "import and render in input bar"
      pattern: "VoiceRecordButton"
    - from: "ui/src/pages/PersonalAssistant.tsx"
      to: "ui/src/hooks/usePiperTts.ts"
      via: "usePiperTts hook for TTS playback"
      pattern: "usePiperTts"
---

<objective>
Add the onboarding voice step and wire VoiceRecordButton + TtsButton into PersonalAssistant.

Purpose: VOICE-03 requires voice features offered during onboarding based on hardware capability. The PersonalAssistant is the primary chat surface for v1.5 and must have both STT (VoiceRecordButton) and TTS (TtsButton) controls.

Output: VoiceStep component, updated 6-step wizard, PersonalAssistant with voice I/O.
</objective>

<execution_context>
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
@$HOME/.claude/get-shit-done/templates/summary.md
</execution_context>

<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/34-voice/34-RESEARCH.md
@.planning/phases/34-voice/34-01-SUMMARY.md

@ui/src/components/NexusOnboardingWizard.tsx
@ui/src/pages/PersonalAssistant.tsx
@ui/src/components/VoiceRecordButton.tsx

<interfaces>
<!-- From Plan 01 outputs -->

From ui/src/hooks/usePiperTts.ts (created in Plan 01):
```typescript
export type TtsStatus = "idle" | "downloading" | "ready" | "speaking" | "error";
export function usePiperTts(): {
  status: TtsStatus;
  progress: number;
  prewarm: () => Promise<void>;
  speak: (text: string) => Promise<void>;
  stop: () => void;
};
```

From ui/src/components/TtsButton.tsx (created in Plan 01):
```typescript
interface TtsButtonProps {
  status: TtsStatus;
  progress: number;
  onSpeak: () => void;
  onStop: () => void;
  onPrewarm: () => void;
  disabled?: boolean;
}
export function TtsButton(props: TtsButtonProps): JSX.Element;
```

From ui/src/api/hardware.ts (updated in Plan 01):
```typescript
export interface NexusSettings {
  mode: NexusMode;
  voiceEnabled?: boolean;
}
export function updateNexusSettings(settings: Partial<NexusSettings>): Promise<NexusSettings>;
```

From ui/src/components/VoiceRecordButton.tsx (existing):
```typescript
interface VoiceRecordButtonProps {
  onTranscription: (text: string) => void;
  disabled?: boolean;
}
export function VoiceRecordButton(props: VoiceRecordButtonProps): JSX.Element;
```

From ui/src/components/NexusOnboardingWizard.tsx (existing step structure):
- Step 1: hardware detection
- Step 2: mode selection
- Step 3: provider selection
- Step 4: root directory (will become step 5)
- Step 5: summary (will become step 6)
- Step state: `const [step, setStep] = useState(1);`
- Label: `{step === 5 ? "Summary" : \`Step ${step} of 4\`}`
</interfaces>
</context>

<tasks>

<task type="auto">
  <name>Task 1: Create VoiceStep component and insert into NexusOnboardingWizard as step 4</name>
  <files>ui/src/components/onboarding/VoiceStep.tsx, ui/src/components/NexusOnboardingWizard.tsx</files>
  <read_first>
    - ui/src/components/NexusOnboardingWizard.tsx (full file — understand all setStep calls and step rendering)
    - ui/src/components/onboarding/ModeSelector.tsx (reference for onboarding step component patterns)
    - ui/src/components/onboarding/HardwareSummaryStep.tsx (reference for step component patterns)
    - ui/src/api/hardware.ts (NexusSettings type with voiceEnabled)
  </read_first>
  <action>
**1. Create ui/src/components/onboarding/VoiceStep.tsx:**

```tsx
import { useEffect, useState } from "react";
import { Mic, Volume2 } from "lucide-react";
import { Button } from "@/components/ui/button";

interface VoiceStepProps {
  onEnable: () => void;
  onSkip: () => void;
}

export function VoiceStep({ onEnable, onSkip }: VoiceStepProps) {
  const [micAvailable, setMicAvailable] = useState<boolean | null>(null);

  useEffect(() => {
    navigator.mediaDevices?.enumerateDevices()
      .then(devices => setMicAvailable(devices.some(d => d.kind === "audioinput")))
      .catch(() => setMicAvailable(false));
  }, []);

  return (
    <div className="flex flex-col gap-4">
      <div className="flex flex-col gap-3">
        <div className="flex items-center gap-3 rounded-lg border p-3">
          <Mic className="h-5 w-5 text-primary shrink-0" />
          <div>
            <p className="text-sm font-medium">Speech-to-Text (Whisper)</p>
            <p className="text-xs text-muted-foreground">
              {micAvailable === false
                ? "No microphone detected — unavailable"
                : micAvailable === true
                  ? "Microphone detected — speak to your assistant"
                  : "Checking microphone..."}
            </p>
          </div>
        </div>

        <div className="flex items-center gap-3 rounded-lg border p-3">
          <Volume2 className="h-5 w-5 text-primary shrink-0" />
          <div>
            <p className="text-sm font-medium">Text-to-Speech (Piper)</p>
            <p className="text-xs text-muted-foreground">
              Hear responses read aloud. Runs entirely on your device — no server needed.
            </p>
          </div>
        </div>
      </div>

      <div className="flex flex-col gap-2">
        <Button onClick={onEnable} className="w-full">
          Enable voice
        </Button>
        <Button variant="ghost" onClick={onSkip} className="w-full">
          Skip
        </Button>
      </div>
    </div>
  );
}
```

**2. Update NexusOnboardingWizard.tsx — insert step 4 (voice), shift steps:**

This is a precise step-number shift. Do a full audit of all `setStep(N)` calls and update:

a. Add imports at top:
   - `import { VoiceStep } from "./onboarding/VoiceStep";`
   - `import { updateNexusSettings } from "../api/hardware";` (already imported)

b. Add voiceEnabled state:
   ```typescript
   const [voiceEnabled, setVoiceEnabled] = useState(false);
   ```

c. Step number shift — ALL occurrences:
   - Old step 4 (rootDir) becomes step 5
   - Old step 5 (summary) becomes step 6
   - Every `setStep(4)` that meant "go to rootDir" becomes `setStep(5)`
   - Every `setStep(5)` that meant "go to summary" becomes `setStep(6)`
   - The Back button on old step 4 (rootDir) that said `setStep(3)` becomes `setStep(4)` (back to voice)
   - Old step 3 (provider) onSkip/onContinue `setStep(4)` becomes `setStep(4)` (now goes to voice, not rootDir) — no change needed here since 4 IS the voice step

d. Step indicator label:
   - Change `{step === 5 ? "Summary" : \`Step ${step} of 4\`}` to `{step === 6 ? "Summary" : \`Step ${step} of 5\`}`

e. Reset voiceEnabled in the cleanup useEffect:
   - Add `setVoiceEnabled(false);` alongside other resets

f. Add step 4 rendering block (voice) — insert between step 3 and the rootDir step:
   ```tsx
   {/* Step 4 — Voice */}
   {step === 4 && (
     <>
       <div className="flex flex-col gap-2 text-center">
         <h1 className="text-2xl font-semibold tracking-tight">
           Voice features
         </h1>
         <p className="text-sm text-muted-foreground">
           Speak to your assistant and hear responses read aloud. Runs entirely on your device.
         </p>
       </div>

       <VoiceStep
         onEnable={() => {
           setVoiceEnabled(true);
           setStep(5);
         }}
         onSkip={() => setStep(5)}
       />

       <Button
         type="button"
         variant="ghost"
         onClick={() => setStep(3)}
         className="w-full"
       >
         Back
       </Button>
     </>
   )}
   ```

g. Persist voiceEnabled in createWorkspace() — add after the existing mode save:
   ```typescript
   // Persist voice preference — non-blocking
   if (voiceEnabled) {
     try {
       await updateNexusSettings({ voiceEnabled: true });
     } catch {
       // Non-blocking
     }
   }
   ```

h. Update old step 4 comment to say "Step 5 — Root Directory (was step 4)"
   Update old step 5 comment to say "Step 6 — Summary (was step 5)"

**Step number audit checklist (verify each):**
- Step 3 provider: onSkip → `setStep(4)` (voice) -- was already 4, now means voice
- Step 3 provider: onContinue → `setStep(4)` (voice) -- same
- Step 4 (NEW voice): Enable → `setStep(5)`, Skip → `setStep(5)`, Back → `setStep(3)`
- Step 5 (was 4, rootDir): "Review & finish" → `setStep(6)`, Back → `setStep(4)` (voice), "Skip to summary" → `setStep(6)`
- Step 6 (was 5, summary): onBack → `setStep(5)` (rootDir)
- Step rendering: `{step === 4 && ...}` for rootDir becomes `{step === 5 && ...}`
- Step rendering: `{step === 5 && ...}` for summary becomes `{step === 6 && ...}`
  </action>
  <verify>
    <automated>cd /opt/nexus && grep -c "setStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "step === 4" ui/src/components/NexusOnboardingWizard.tsx && grep -q "Step 4" ui/src/components/onboarding/VoiceStep.tsx 2>/dev/null; grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx && echo "PASS" || echo "FAIL"</automated>
  </verify>
  <acceptance_criteria>
    - grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx returns 0
    - grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx returns 0
    - grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx returns 0 (summary is now step 6)
    - grep -q "Step.*of 5" ui/src/components/NexusOnboardingWizard.tsx returns 0 (label updated from "of 4")
    - grep -q "voiceEnabled" ui/src/components/NexusOnboardingWizard.tsx returns 0
    - grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx returns 0
  </acceptance_criteria>
  <done>Onboarding wizard has 6 steps with voice at step 4. VoiceStep probes mic availability and offers enable/skip. voiceEnabled is persisted on workspace creation. All setStep() calls use correct updated numbers.</done>
</task>

<task type="auto">
  <name>Task 2: Wire VoiceRecordButton and TtsButton into PersonalAssistant</name>
  <files>ui/src/pages/PersonalAssistant.tsx</files>
  <read_first>
    - ui/src/pages/PersonalAssistant.tsx (full file — understand input bar and message rendering)
    - ui/src/components/VoiceRecordButton.tsx (props interface)
    - ui/src/components/TtsButton.tsx (props interface, from Plan 01)
    - ui/src/hooks/usePiperTts.ts (hook API, from Plan 01)
  </read_first>
  <action>
**1. Add imports to PersonalAssistant.tsx:**
```typescript
import { VoiceRecordButton } from "@/components/VoiceRecordButton";
import { TtsButton } from "@/components/TtsButton";
import { usePiperTts } from "../hooks/usePiperTts";
import { Volume2 } from "lucide-react";
```

**2. Add usePiperTts hook in PersonalAssistant component body:**
```typescript
const { status: ttsStatus, progress: ttsProgress, prewarm, speak, stop } = usePiperTts();
```

**3. Add VoiceRecordButton to the input bar:**
In the input bar section (`{selectedConvId && ( <div className="px-6 py-4 ..."> ... </div> )}`), add VoiceRecordButton inside the `<div className="flex gap-3 items-end">` container, between the textarea and Send button:

```tsx
<VoiceRecordButton
  onTranscription={(text) => setInputValue((prev) => prev ? prev + " " + text : text)}
  disabled={isSending}
/>
```

The onTranscription callback appends transcribed text to the input field (does not auto-send). This lets users review before sending.

**4. Add TtsButton next to assistant messages in MessageBubble:**
Modify the MessageBubble component to accept an optional `onSpeak` callback and show a TtsButton for assistant messages:

Actually, a cleaner approach: add the TtsButton inline where messages are rendered, not inside MessageBubble (to avoid prop drilling the hook through). In the messages.map section, render a small TTS button after each assistant message:

```tsx
{messages.map((msg) => (
  <div key={msg.id}>
    <MessageBubble message={msg} />
    {msg.role === "assistant" && msg.content && (
      <div className="flex justify-start pl-10 -mt-1 mb-1">
        <TtsButton
          status={ttsStatus}
          progress={ttsProgress}
          onSpeak={() => speak(msg.content)}
          onStop={stop}
          onPrewarm={prewarm}
        />
      </div>
    )}
  </div>
))}
```

The `pl-10` aligns the button under the message bubble (past the avatar). The `-mt-1 mb-1` tucks it close.

**5. Auto-prewarm TTS when PersonalAssistant mounts (optional optimization):**
Do NOT auto-prewarm. Let the user trigger it on first click of any TtsButton. This avoids unexpected downloads.
  </action>
  <verify>
    <automated>cd /opt/nexus && grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx && grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx && grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx && echo "PASS" || echo "FAIL"</automated>
  </verify>
  <acceptance_criteria>
    - grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx returns 0
    - grep -q "onTranscription" ui/src/pages/PersonalAssistant.tsx returns 0
    - grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx returns 0
    - grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx returns 0
    - grep -q "speak" ui/src/pages/PersonalAssistant.tsx returns 0
  </acceptance_criteria>
  <done>PersonalAssistant has VoiceRecordButton in the input bar (STT via /api/transcribe) and TtsButton next to each assistant message (TTS via Piper WASM). Voice input appends to textarea for review before sending.</done>
</task>

</tasks>

<verification>
- `grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx` — voice step integrated
- `grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx` — summary correctly at step 6
- `grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx` — STT wired
- `grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx` — TTS wired
- `grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx` — mic detection
</verification>

<success_criteria>
1. Onboarding wizard has voice at step 4 with mic detection and enable/skip (VOICE-03)
2. Steps 5 (rootDir) and 6 (summary) work with correct Back/Continue navigation
3. PersonalAssistant has VoiceRecordButton for STT input
4. PersonalAssistant has TtsButton for TTS playback on assistant messages
5. voiceEnabled preference is persisted when user enables voice during onboarding
</success_criteria>

<output>
After completion, create `.planning/phases/34-voice/34-02-SUMMARY.md`
</output>