docs(34-voice): create phase plan

2026-04-03 22:29:45 +00:00 · 2026-04-03 22:29:45 +00:00 · c0710c7100
commit c0710c7100
parent 1d8f1c5912
3 changed files with 735 additions and 2 deletions
--- a/.planning/ROADMAP.md
+++ b/.planning/ROADMAP.md
@ -172,7 +172,12 @@ Plans:
  1. On a CPU-only machine (no GPU), enabling Piper TTS in the assistant produces audible speech output within a reasonable time after the first synthesis (not a silent hang)
  2. When Piper's WASM voice model is downloading for the first time, a visible progress indicator is shown before the TTS toggle is enabled; the download completes and TTS works without a page reload
  3. The onboarding voice step offers Whisper STT and Piper TTS toggles only when the hardware detection step has confirmed sufficient capability; on hardware below the threshold, the step is skipped or shows a capability warning
-**Plans**: TBD
+**Plans**: 2 plans
+
+Plans:
+- [ ] 34-01-PLAN.md — Fix /transcribe route registration, Piper TTS hook + TtsButton, voiceEnabled in nexus-settings
+- [ ] 34-02-PLAN.md — VoiceStep onboarding component, wizard step insertion, PersonalAssistant voice wiring
+**UI hint**: yes

 ### Phase 35: npx buildthis CLI
 **Goal**: A developer can run `npx buildthis` on a fresh machine and either open an already-running Nexus or be guided through install — with the same hardware detection and provider tiering as the web onboarding
@ -233,5 +238,5 @@ All 21 v1.5 requirements are mapped to exactly one phase. No orphans.
 | 31. Puter.js Zero-Config Cloud | v1.5 | 4/4 | Complete    | 2026-04-03 |
 | 32. Multi-Step Onboarding Wizard | v1.5 | 1/1 | Complete    | 2026-04-03 |
 | 33. Persistent Memory + Personal Assistant Mode | v1.5 | 3/3 | Complete    | 2026-04-03 |
-| 34. Voice | v1.5 | 0/TBD | Not started | - |
+| 34. Voice | v1.5 | 0/2 | Not started | - |
 | 35. npx buildthis CLI | v1.5 | 0/TBD | Not started | - |
--- a/.planning/phases/34-voice/34-01-PLAN.md
+++ b/.planning/phases/34-voice/34-01-PLAN.md
@ -0,0 +1,332 @@
+---
+phase: 34-voice
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified:
+  - server/src/app.ts
+  - server/src/services/nexus-settings.ts
+  - server/src/routes/nexus-settings.ts
+  - ui/src/api/hardware.ts
+  - ui/src/hooks/usePiperTts.ts
+  - ui/src/components/TtsButton.tsx
+autonomous: true
+requirements:
+  - VOICE-01
+  - VOICE-02
+
+must_haves:
+  truths:
+    - "POST /api/transcribe is reachable and returns 503 with descriptive error when no Whisper CLI is installed"
+    - "usePiperTts hook exposes prewarm/speak/status/progress and transitions idle->downloading->ready->speaking"
+    - "TtsButton renders a speaker icon that calls speak() and shows download progress during prewarm"
+    - "voiceEnabled boolean is persisted in nexus-settings.json and exposed via GET/PATCH /nexus/settings"
+  artifacts:
+    - path: "ui/src/hooks/usePiperTts.ts"
+      provides: "Piper TTS hook with prewarm, speak, status, progress"
+      exports: ["usePiperTts"]
+    - path: "ui/src/components/TtsButton.tsx"
+      provides: "Speaker button component for TTS playback"
+      exports: ["TtsButton"]
+  key_links:
+    - from: "server/src/app.ts"
+      to: "server/src/routes/chat-files.ts"
+      via: "api.use(chatFileRoutes(db, opts.storageService))"
+      pattern: "chatFileRoutes"
+    - from: "ui/src/hooks/usePiperTts.ts"
+      to: "@mintplex-labs/piper-tts-web"
+      via: "import { tts }"
+      pattern: "tts\\.download|tts\\.predict"
+---
+
+<objective>
+Fix the broken /transcribe route registration, create the Piper TTS browser hook and button component, and add voiceEnabled to nexus-settings persistence.
+
+Purpose: VOICE-01 requires TTS on CPU-only hardware (browser WASM satisfies this). VOICE-02 requires visible download progress before first synthesis. The /transcribe route exists but is never mounted — a 1-line fix. voiceEnabled persistence is needed so onboarding voice opt-in survives sessions.
+
+Output: Working /api/transcribe endpoint, usePiperTts hook, TtsButton component, voiceEnabled in nexus-settings.
+</objective>
+
+<execution_context>
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+</execution_context>
+
+<context>
+@.planning/PROJECT.md
+@.planning/ROADMAP.md
+@.planning/STATE.md
+@.planning/phases/34-voice/34-RESEARCH.md
+
+@server/src/app.ts
+@server/src/routes/chat-files.ts
+@server/src/services/nexus-settings.ts
+@ui/src/api/hardware.ts
+@ui/src/components/VoiceRecordButton.tsx
+
+<interfaces>
+<!-- Existing interfaces the executor needs -->
+
+From server/src/routes/chat-files.ts:
+```typescript
+export function chatFileRoutes(db: Db, storage: StorageService) { ... }
+// POST /transcribe — accepts multipart audio, returns { text: string } or 503
+```
+
+From server/src/app.ts (line 147 pattern):
+```typescript
+api.use(assetRoutes(db, opts.storageService));
+// chatFileRoutes uses the same (db, opts.storageService) signature
+```
+
+From server/src/services/nexus-settings.ts:
+```typescript
+export const NEXUS_MODES = ["personal_ai", "project_builder", "both"] as const;
+export type NexusMode = (typeof NEXUS_MODES)[number];
+const nexusSettingsSchema = z.object({
+  mode: z.enum(NEXUS_MODES).default("both"),
+});
+export function nexusSettingsService() { get(), set(patch) }
+```
+
+From ui/src/api/hardware.ts:
+```typescript
+export type NexusMode = "personal_ai" | "project_builder" | "both";
+export interface NexusSettings { mode: NexusMode; }
+export function fetchNexusSettings(): Promise<NexusSettings>;
+export function updateNexusSettings(settings: Partial<NexusSettings>): Promise<NexusSettings>;
+```
+</interfaces>
+</context>
+
+<tasks>
+
+<task type="auto">
+  <name>Task 1: Register chatFileRoutes in app.ts and add voiceEnabled to nexus-settings</name>
+  <files>server/src/app.ts, server/src/services/nexus-settings.ts, server/src/routes/nexus-settings.ts, ui/src/api/hardware.ts</files>
+  <read_first>
+    - server/src/app.ts (full file — find insertion point after assistantHandoffRoutes)
+    - server/src/services/nexus-settings.ts (full file — understand schema)
+    - server/src/routes/nexus-settings.ts (full file — understand PATCH handler)
+    - ui/src/api/hardware.ts (full file — understand client types)
+  </read_first>
+  <action>
+**1. Register chatFileRoutes in app.ts:**
+- Add import at top with other route imports: `import { chatFileRoutes } from "./routes/chat-files.js";`
+- Add `api.use(chatFileRoutes(db, opts.storageService));` after the `api.use(assistantHandoffRoutes(db));` line (around line 161). Mirror the `assetRoutes(db, opts.storageService)` pattern exactly.
+- Do NOT place it before boardMutationGuard — the /transcribe route calls assertBoard(req) and needs to be inside the guarded api sub-router.
+
+**2. Add voiceEnabled to nexusSettingsSchema (server/src/services/nexus-settings.ts):**
+- Add `voiceEnabled: z.boolean().default(false)` to the nexusSettingsSchema z.object.
+- This is a file-backed JSON field, NOT a DB migration — acceptable under the "no DB schema changes" constraint.
+
+**3. Update NexusSettings type on client (ui/src/api/hardware.ts):**
+- Add `voiceEnabled?: boolean` to the `NexusSettings` interface.
+- No changes to API functions needed — they already handle Partial<NexusSettings>.
+
+**4. Check nexus-settings route handler (server/src/routes/nexus-settings.ts):**
+- Read the file. The PATCH handler should already forward arbitrary fields to `nexusSettingsService().set(patch)` since it uses the Zod schema. If it manually picks fields, add voiceEnabled to the pick list. If it passes req.body through, no change needed.
+  </action>
+  <verify>
+    <automated>cd /opt/nexus && npx vitest run server/src/__tests__/chat-file-routes.test.ts 2>&1 | tail -5</automated>
+  </verify>
+  <acceptance_criteria>
+    - grep -q "chatFileRoutes" server/src/app.ts returns 0
+    - grep -q "voiceEnabled" server/src/services/nexus-settings.ts returns 0
+    - grep -q "voiceEnabled" ui/src/api/hardware.ts returns 0
+  </acceptance_criteria>
+  <done>POST /api/transcribe is reachable (returns 503 when no Whisper CLI installed, not 404). voiceEnabled persists in nexus-settings.json via the existing settings route.</done>
+</task>
+
+<task type="auto">
+  <name>Task 2: Create usePiperTts hook and TtsButton component</name>
+  <files>ui/src/hooks/usePiperTts.ts, ui/src/components/TtsButton.tsx</files>
+  <read_first>
+    - ui/src/components/VoiceRecordButton.tsx (reference for button style patterns)
+    - ui/src/components/ui/button.tsx (Button component API)
+  </read_first>
+  <action>
+**0. Install piper-tts-web:**
+```bash
+pnpm --filter @paperclipai/ui add @mintplex-labs/piper-tts-web
+```
+
+**1. Create ui/src/hooks/usePiperTts.ts:**
+```typescript
+import { useState, useCallback, useRef } from "react";
+import { tts } from "@mintplex-labs/piper-tts-web";
+
+const DEFAULT_VOICE = "en_US-hfc_female-medium";
+
+export type TtsStatus = "idle" | "downloading" | "ready" | "speaking" | "error";
+
+export function usePiperTts() {
+  const [status, setStatus] = useState<TtsStatus>("idle");
+  const [progress, setProgress] = useState(0);
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  const prewarm = useCallback(async () => {
+    if (status === "ready" || status === "downloading") return;
+    setStatus("downloading");
+    setProgress(0);
+    try {
+      const stored = await tts.stored();
+      if (!stored.includes(DEFAULT_VOICE)) {
+        await tts.download(DEFAULT_VOICE, (p: { loaded: number; total: number }) => {
+          setProgress(Math.round((p.loaded / p.total) * 100));
+        });
+      }
+      setStatus("ready");
+      setProgress(100);
+    } catch {
+      setStatus("error");
+    }
+  }, [status]);
+
+  const speak = useCallback(async (text: string) => {
+    if (status !== "ready") return;
+    // Stop any currently playing audio
+    if (audioRef.current) {
+      audioRef.current.pause();
+      audioRef.current = null;
+    }
+    setStatus("speaking");
+    try {
+      const wav = await tts.predict({ text, voiceId: DEFAULT_VOICE });
+      const audio = new Audio(wav);
+      audioRef.current = audio;
+      audio.onended = () => {
+        audioRef.current = null;
+        setStatus("ready");
+      };
+      audio.onerror = () => {
+        audioRef.current = null;
+        setStatus("ready");
+      };
+      await audio.play();
+    } catch {
+      setStatus("ready");
+    }
+  }, [status]);
+
+  const stop = useCallback(() => {
+    if (audioRef.current) {
+      audioRef.current.pause();
+      audioRef.current = null;
+    }
+    if (status === "speaking") setStatus("ready");
+  }, [status]);
+
+  return { status, progress, prewarm, speak, stop };
+}
+```
+
+Key points:
+- `tts.stored()` checks IndexedDB cache — skips download if model already present (VOICE-02).
+- `tts.download()` with progress callback provides visible download progress (VOICE-02).
+- `tts.predict()` returns a Blob URL (WAV) — use `new Audio(url).play()` (VOICE-01, CPU-safe WASM).
+- `stop()` allows interrupting playback.
+- Do NOT import this in any server-side or test file running in Node — browser-only.
+
+**2. Create ui/src/components/TtsButton.tsx:**
+```typescript
+import { Volume2, VolumeX, Loader2 } from "lucide-react";
+import { Button } from "./ui/button";
+import type { TtsStatus } from "../hooks/usePiperTts";
+
+interface TtsButtonProps {
+  status: TtsStatus;
+  progress: number;
+  onSpeak: () => void;
+  onStop: () => void;
+  onPrewarm: () => void;
+  disabled?: boolean;
+}
+
+export function TtsButton({ status, progress, onSpeak, onStop, onPrewarm, disabled }: TtsButtonProps) {
+  if (status === "downloading") {
+    return (
+      <Button variant="ghost" size="icon" className="h-8 w-8 relative" disabled title={`Downloading voice model: ${progress}%`}>
+        <Loader2 className="h-4 w-4 animate-spin" />
+        <span className="absolute -bottom-1 text-[10px] text-muted-foreground">{progress}%</span>
+      </Button>
+    );
+  }
+
+  if (status === "speaking") {
+    return (
+      <Button
+        variant="ghost"
+        size="icon"
+        className="h-8 w-8 text-primary"
+        onClick={onStop}
+        aria-label="Stop speaking"
+        title="Stop speaking"
+      >
+        <VolumeX className="h-4 w-4" />
+      </Button>
+    );
+  }
+
+  // idle or error: clicking triggers prewarm then speak
+  // ready: clicking triggers speak directly
+  const handleClick = () => {
+    if (status === "ready") {
+      onSpeak();
+    } else {
+      onPrewarm();
+    }
+  };
+
+  return (
+    <Button
+      variant="ghost"
+      size="icon"
+      className="h-8 w-8"
+      onClick={handleClick}
+      disabled={disabled || status === "error"}
+      aria-label="Read aloud"
+      title={status === "error" ? "TTS unavailable" : status === "idle" ? "Download voice model and read aloud" : "Read aloud"}
+    >
+      <Volume2 className="h-4 w-4" />
+    </Button>
+  );
+}
+```
+
+The TtsButton receives status/progress from the hook and delegates actions. It does NOT import piper-tts-web directly — all TTS logic stays in the hook. The button is reusable: PersonalAssistant (Plan 02) will place it next to assistant messages.
+  </action>
+  <verify>
+    <automated>cd /opt/nexus && grep -q "usePiperTts" ui/src/hooks/usePiperTts.ts && grep -q "TtsButton" ui/src/components/TtsButton.tsx && grep -q "piper-tts-web" ui/package.json 2>/dev/null || grep -q "piper-tts-web" pnpm-lock.yaml && echo "PASS" || echo "FAIL"</automated>
+  </verify>
+  <acceptance_criteria>
+    - grep -q "tts.download" ui/src/hooks/usePiperTts.ts returns 0
+    - grep -q "tts.predict" ui/src/hooks/usePiperTts.ts returns 0
+    - grep -q "tts.stored" ui/src/hooks/usePiperTts.ts returns 0
+    - grep -q "TtsButton" ui/src/components/TtsButton.tsx returns 0
+    - grep -q "piper-tts-web" pnpm-lock.yaml returns 0
+    - grep -q "Volume2" ui/src/components/TtsButton.tsx returns 0
+  </acceptance_criteria>
+  <done>usePiperTts hook handles download progress (VOICE-02) and CPU-safe WASM synthesis (VOICE-01). TtsButton shows download progress during prewarm and speaker icon for playback. piper-tts-web is installed as a UI dependency.</done>
+</task>
+
+</tasks>
+
+<verification>
+- `grep -q "chatFileRoutes" server/src/app.ts` — route is registered
+- `grep -q "voiceEnabled" server/src/services/nexus-settings.ts` — settings schema extended
+- `ls ui/src/hooks/usePiperTts.ts ui/src/components/TtsButton.tsx` — both files exist
+- `npx vitest run server/src/__tests__/chat-file-routes.test.ts` — existing route tests pass
+</verification>
+
+<success_criteria>
+1. POST /api/transcribe returns 503 (not 404) when no Whisper CLI is installed — route is mounted
+2. usePiperTts hook exports prewarm(), speak(), stop(), status, progress
+3. TtsButton renders download progress during prewarm and speaker icon for playback
+4. voiceEnabled persists in nexus-settings.json
+</success_criteria>
+
+<output>
+After completion, create `.planning/phases/34-voice/34-01-SUMMARY.md`
+</output>
--- a/.planning/phases/34-voice/34-02-PLAN.md
+++ b/.planning/phases/34-voice/34-02-PLAN.md
@ -0,0 +1,396 @@
+---
+phase: 34-voice
+plan: 02
+type: execute
+wave: 2
+depends_on: ["34-01"]
+files_modified:
+  - ui/src/components/onboarding/VoiceStep.tsx
+  - ui/src/components/NexusOnboardingWizard.tsx
+  - ui/src/pages/PersonalAssistant.tsx
+autonomous: true
+requirements:
+  - VOICE-03
+
+must_haves:
+  truths:
+    - "Onboarding wizard shows a voice opt-in step (step 4) with enable/skip buttons"
+    - "Step numbering shifts correctly: rootDir is step 5, summary is step 6"
+    - "VoiceRecordButton appears in PersonalAssistant input bar for STT"
+    - "TtsButton appears next to assistant messages in PersonalAssistant for TTS playback"
+    - "Voice step detects microphone availability and shows appropriate messaging"
+  artifacts:
+    - path: "ui/src/components/onboarding/VoiceStep.tsx"
+      provides: "Voice opt-in onboarding step"
+      exports: ["VoiceStep"]
+    - path: "ui/src/components/NexusOnboardingWizard.tsx"
+      provides: "6-step wizard with voice at step 4"
+    - path: "ui/src/pages/PersonalAssistant.tsx"
+      provides: "Full-page assistant chat with voice input and TTS"
+  key_links:
+    - from: "ui/src/components/NexusOnboardingWizard.tsx"
+      to: "ui/src/components/onboarding/VoiceStep.tsx"
+      via: "import and render at step 4"
+      pattern: "VoiceStep"
+    - from: "ui/src/pages/PersonalAssistant.tsx"
+      to: "ui/src/components/VoiceRecordButton.tsx"
+      via: "import and render in input bar"
+      pattern: "VoiceRecordButton"
+    - from: "ui/src/pages/PersonalAssistant.tsx"
+      to: "ui/src/hooks/usePiperTts.ts"
+      via: "usePiperTts hook for TTS playback"
+      pattern: "usePiperTts"
+---
+
+<objective>
+Add the onboarding voice step and wire VoiceRecordButton + TtsButton into PersonalAssistant.
+
+Purpose: VOICE-03 requires voice features offered during onboarding based on hardware capability. The PersonalAssistant is the primary chat surface for v1.5 and must have both STT (VoiceRecordButton) and TTS (TtsButton) controls.
+
+Output: VoiceStep component, updated 6-step wizard, PersonalAssistant with voice I/O.
+</objective>
+
+<execution_context>
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+</execution_context>
+
+<context>
+@.planning/PROJECT.md
+@.planning/ROADMAP.md
+@.planning/STATE.md
+@.planning/phases/34-voice/34-RESEARCH.md
+@.planning/phases/34-voice/34-01-SUMMARY.md
+
+@ui/src/components/NexusOnboardingWizard.tsx
+@ui/src/pages/PersonalAssistant.tsx
+@ui/src/components/VoiceRecordButton.tsx
+
+<interfaces>
+<!-- From Plan 01 outputs -->
+
+From ui/src/hooks/usePiperTts.ts (created in Plan 01):
+```typescript
+export type TtsStatus = "idle" | "downloading" | "ready" | "speaking" | "error";
+export function usePiperTts(): {
+  status: TtsStatus;
+  progress: number;
+  prewarm: () => Promise<void>;
+  speak: (text: string) => Promise<void>;
+  stop: () => void;
+};
+```
+
+From ui/src/components/TtsButton.tsx (created in Plan 01):
+```typescript
+interface TtsButtonProps {
+  status: TtsStatus;
+  progress: number;
+  onSpeak: () => void;
+  onStop: () => void;
+  onPrewarm: () => void;
+  disabled?: boolean;
+}
+export function TtsButton(props: TtsButtonProps): JSX.Element;
+```
+
+From ui/src/api/hardware.ts (updated in Plan 01):
+```typescript
+export interface NexusSettings {
+  mode: NexusMode;
+  voiceEnabled?: boolean;
+}
+export function updateNexusSettings(settings: Partial<NexusSettings>): Promise<NexusSettings>;
+```
+
+From ui/src/components/VoiceRecordButton.tsx (existing):
+```typescript
+interface VoiceRecordButtonProps {
+  onTranscription: (text: string) => void;
+  disabled?: boolean;
+}
+export function VoiceRecordButton(props: VoiceRecordButtonProps): JSX.Element;
+```
+
+From ui/src/components/NexusOnboardingWizard.tsx (existing step structure):
+- Step 1: hardware detection
+- Step 2: mode selection
+- Step 3: provider selection
+- Step 4: root directory (will become step 5)
+- Step 5: summary (will become step 6)
+- Step state: `const [step, setStep] = useState(1);`
+- Label: `{step === 5 ? "Summary" : \`Step ${step} of 4\`}`
+</interfaces>
+</context>
+
+<tasks>
+
+<task type="auto">
+  <name>Task 1: Create VoiceStep component and insert into NexusOnboardingWizard as step 4</name>
+  <files>ui/src/components/onboarding/VoiceStep.tsx, ui/src/components/NexusOnboardingWizard.tsx</files>
+  <read_first>
+    - ui/src/components/NexusOnboardingWizard.tsx (full file — understand all setStep calls and step rendering)
+    - ui/src/components/onboarding/ModeSelector.tsx (reference for onboarding step component patterns)
+    - ui/src/components/onboarding/HardwareSummaryStep.tsx (reference for step component patterns)
+    - ui/src/api/hardware.ts (NexusSettings type with voiceEnabled)
+  </read_first>
+  <action>
+**1. Create ui/src/components/onboarding/VoiceStep.tsx:**
+
+```tsx
+import { useEffect, useState } from "react";
+import { Mic, Volume2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+
+interface VoiceStepProps {
+  onEnable: () => void;
+  onSkip: () => void;
+}
+
+export function VoiceStep({ onEnable, onSkip }: VoiceStepProps) {
+  const [micAvailable, setMicAvailable] = useState<boolean | null>(null);
+
+  useEffect(() => {
+    navigator.mediaDevices?.enumerateDevices()
+      .then(devices => setMicAvailable(devices.some(d => d.kind === "audioinput")))
+      .catch(() => setMicAvailable(false));
+  }, []);
+
+  return (
+    <div className="flex flex-col gap-4">
+      <div className="flex flex-col gap-3">
+        <div className="flex items-center gap-3 rounded-lg border p-3">
+          <Mic className="h-5 w-5 text-primary shrink-0" />
+          <div>
+            <p className="text-sm font-medium">Speech-to-Text (Whisper)</p>
+            <p className="text-xs text-muted-foreground">
+              {micAvailable === false
+                ? "No microphone detected — unavailable"
+                : micAvailable === true
+                  ? "Microphone detected — speak to your assistant"
+                  : "Checking microphone..."}
+            </p>
+          </div>
+        </div>
+
+        <div className="flex items-center gap-3 rounded-lg border p-3">
+          <Volume2 className="h-5 w-5 text-primary shrink-0" />
+          <div>
+            <p className="text-sm font-medium">Text-to-Speech (Piper)</p>
+            <p className="text-xs text-muted-foreground">
+              Hear responses read aloud. Runs entirely on your device — no server needed.
+            </p>
+          </div>
+        </div>
+      </div>
+
+      <div className="flex flex-col gap-2">
+        <Button onClick={onEnable} className="w-full">
+          Enable voice
+        </Button>
+        <Button variant="ghost" onClick={onSkip} className="w-full">
+          Skip
+        </Button>
+      </div>
+    </div>
+  );
+}
+```
+
+**2. Update NexusOnboardingWizard.tsx — insert step 4 (voice), shift steps:**
+
+This is a precise step-number shift. Do a full audit of all `setStep(N)` calls and update:
+
+a. Add imports at top:
+   - `import { VoiceStep } from "./onboarding/VoiceStep";`
+   - `import { updateNexusSettings } from "../api/hardware";` (already imported)
+
+b. Add voiceEnabled state:
+   ```typescript
+   const [voiceEnabled, setVoiceEnabled] = useState(false);
+   ```
+
+c. Step number shift — ALL occurrences:
+   - Old step 4 (rootDir) becomes step 5
+   - Old step 5 (summary) becomes step 6
+   - Every `setStep(4)` that meant "go to rootDir" becomes `setStep(5)`
+   - Every `setStep(5)` that meant "go to summary" becomes `setStep(6)`
+   - The Back button on old step 4 (rootDir) that said `setStep(3)` becomes `setStep(4)` (back to voice)
+   - Old step 3 (provider) onSkip/onContinue `setStep(4)` becomes `setStep(4)` (now goes to voice, not rootDir) — no change needed here since 4 IS the voice step
+
+d. Step indicator label:
+   - Change `{step === 5 ? "Summary" : \`Step ${step} of 4\`}` to `{step === 6 ? "Summary" : \`Step ${step} of 5\`}`
+
+e. Reset voiceEnabled in the cleanup useEffect:
+   - Add `setVoiceEnabled(false);` alongside other resets
+
+f. Add step 4 rendering block (voice) — insert between step 3 and the rootDir step:
+   ```tsx
+   {/* Step 4 — Voice */}
+   {step === 4 && (
+     <>
+       <div className="flex flex-col gap-2 text-center">
+         <h1 className="text-2xl font-semibold tracking-tight">
+           Voice features
+         </h1>
+         <p className="text-sm text-muted-foreground">
+           Speak to your assistant and hear responses read aloud. Runs entirely on your device.
+         </p>
+       </div>
+
+       <VoiceStep
+         onEnable={() => {
+           setVoiceEnabled(true);
+           setStep(5);
+         }}
+         onSkip={() => setStep(5)}
+       />
+
+       <Button
+         type="button"
+         variant="ghost"
+         onClick={() => setStep(3)}
+         className="w-full"
+       >
+         Back
+       </Button>
+     </>
+   )}
+   ```
+
+g. Persist voiceEnabled in createWorkspace() — add after the existing mode save:
+   ```typescript
+   // Persist voice preference — non-blocking
+   if (voiceEnabled) {
+     try {
+       await updateNexusSettings({ voiceEnabled: true });
+     } catch {
+       // Non-blocking
+     }
+   }
+   ```
+
+h. Update old step 4 comment to say "Step 5 — Root Directory (was step 4)"
+   Update old step 5 comment to say "Step 6 — Summary (was step 5)"
+
+**Step number audit checklist (verify each):**
+- Step 3 provider: onSkip → `setStep(4)` (voice) -- was already 4, now means voice
+- Step 3 provider: onContinue → `setStep(4)` (voice) -- same
+- Step 4 (NEW voice): Enable → `setStep(5)`, Skip → `setStep(5)`, Back → `setStep(3)`
+- Step 5 (was 4, rootDir): "Review & finish" → `setStep(6)`, Back → `setStep(4)` (voice), "Skip to summary" → `setStep(6)`
+- Step 6 (was 5, summary): onBack → `setStep(5)` (rootDir)
+- Step rendering: `{step === 4 && ...}` for rootDir becomes `{step === 5 && ...}`
+- Step rendering: `{step === 5 && ...}` for summary becomes `{step === 6 && ...}`
+  </action>
+  <verify>
+    <automated>cd /opt/nexus && grep -c "setStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx && grep -q "step === 4" ui/src/components/NexusOnboardingWizard.tsx && grep -q "Step 4" ui/src/components/onboarding/VoiceStep.tsx 2>/dev/null; grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx && echo "PASS" || echo "FAIL"</automated>
+  </verify>
+  <acceptance_criteria>
+    - grep -q "VoiceStep" ui/src/components/onboarding/VoiceStep.tsx returns 0
+    - grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx returns 0
+    - grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx returns 0 (summary is now step 6)
+    - grep -q "Step.*of 5" ui/src/components/NexusOnboardingWizard.tsx returns 0 (label updated from "of 4")
+    - grep -q "voiceEnabled" ui/src/components/NexusOnboardingWizard.tsx returns 0
+    - grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx returns 0
+  </acceptance_criteria>
+  <done>Onboarding wizard has 6 steps with voice at step 4. VoiceStep probes mic availability and offers enable/skip. voiceEnabled is persisted on workspace creation. All setStep() calls use correct updated numbers.</done>
+</task>
+
+<task type="auto">
+  <name>Task 2: Wire VoiceRecordButton and TtsButton into PersonalAssistant</name>
+  <files>ui/src/pages/PersonalAssistant.tsx</files>
+  <read_first>
+    - ui/src/pages/PersonalAssistant.tsx (full file — understand input bar and message rendering)
+    - ui/src/components/VoiceRecordButton.tsx (props interface)
+    - ui/src/components/TtsButton.tsx (props interface, from Plan 01)
+    - ui/src/hooks/usePiperTts.ts (hook API, from Plan 01)
+  </read_first>
+  <action>
+**1. Add imports to PersonalAssistant.tsx:**
+```typescript
+import { VoiceRecordButton } from "@/components/VoiceRecordButton";
+import { TtsButton } from "@/components/TtsButton";
+import { usePiperTts } from "../hooks/usePiperTts";
+import { Volume2 } from "lucide-react";
+```
+
+**2. Add usePiperTts hook in PersonalAssistant component body:**
+```typescript
+const { status: ttsStatus, progress: ttsProgress, prewarm, speak, stop } = usePiperTts();
+```
+
+**3. Add VoiceRecordButton to the input bar:**
+In the input bar section (`{selectedConvId && ( <div className="px-6 py-4 ..."> ... </div> )}`), add VoiceRecordButton inside the `<div className="flex gap-3 items-end">` container, between the textarea and Send button:
+
+```tsx
+<VoiceRecordButton
+  onTranscription={(text) => setInputValue((prev) => prev ? prev + " " + text : text)}
+  disabled={isSending}
+/>
+```
+
+The onTranscription callback appends transcribed text to the input field (does not auto-send). This lets users review before sending.
+
+**4. Add TtsButton next to assistant messages in MessageBubble:**
+Modify the MessageBubble component to accept an optional `onSpeak` callback and show a TtsButton for assistant messages:
+
+Actually, a cleaner approach: add the TtsButton inline where messages are rendered, not inside MessageBubble (to avoid prop drilling the hook through). In the messages.map section, render a small TTS button after each assistant message:
+
+```tsx
+{messages.map((msg) => (
+  <div key={msg.id}>
+    <MessageBubble message={msg} />
+    {msg.role === "assistant" && msg.content && (
+      <div className="flex justify-start pl-10 -mt-1 mb-1">
+        <TtsButton
+          status={ttsStatus}
+          progress={ttsProgress}
+          onSpeak={() => speak(msg.content)}
+          onStop={stop}
+          onPrewarm={prewarm}
+        />
+      </div>
+    )}
+  </div>
+))}
+```
+
+The `pl-10` aligns the button under the message bubble (past the avatar). The `-mt-1 mb-1` tucks it close.
+
+**5. Auto-prewarm TTS when PersonalAssistant mounts (optional optimization):**
+Do NOT auto-prewarm. Let the user trigger it on first click of any TtsButton. This avoids unexpected downloads.
+  </action>
+  <verify>
+    <automated>cd /opt/nexus && grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx && grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx && grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx && echo "PASS" || echo "FAIL"</automated>
+  </verify>
+  <acceptance_criteria>
+    - grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx returns 0
+    - grep -q "onTranscription" ui/src/pages/PersonalAssistant.tsx returns 0
+    - grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx returns 0
+    - grep -q "usePiperTts" ui/src/pages/PersonalAssistant.tsx returns 0
+    - grep -q "speak" ui/src/pages/PersonalAssistant.tsx returns 0
+  </acceptance_criteria>
+  <done>PersonalAssistant has VoiceRecordButton in the input bar (STT via /api/transcribe) and TtsButton next to each assistant message (TTS via Piper WASM). Voice input appends to textarea for review before sending.</done>
+</task>
+
+</tasks>
+
+<verification>
+- `grep -q "VoiceStep" ui/src/components/NexusOnboardingWizard.tsx` — voice step integrated
+- `grep -q "step === 6" ui/src/components/NexusOnboardingWizard.tsx` — summary correctly at step 6
+- `grep -q "VoiceRecordButton" ui/src/pages/PersonalAssistant.tsx` — STT wired
+- `grep -q "TtsButton" ui/src/pages/PersonalAssistant.tsx` — TTS wired
+- `grep -q "enumerateDevices" ui/src/components/onboarding/VoiceStep.tsx` — mic detection
+</verification>
+
+<success_criteria>
+1. Onboarding wizard has voice at step 4 with mic detection and enable/skip (VOICE-03)
+2. Steps 5 (rootDir) and 6 (summary) work with correct Back/Continue navigation
+3. PersonalAssistant has VoiceRecordButton for STT input
+4. PersonalAssistant has TtsButton for TTS playback on assistant messages
+5. voiceEnabled preference is persisted when user enables voice during onboarding
+</success_criteria>
+
+<output>
+After completion, create `.planning/phases/34-voice/34-02-SUMMARY.md`
+</output>