nexus/.planning/phases/25-file-system/25-08-PLAN.md

---
phase: 25-file-system
plan: 08
type: execute
wave: 1
depends_on: ["25-02"]
files_modified:
  - ui/src/components/VoiceRecordButton.tsx
  - ui/src/components/ChatInput.tsx
  - ui/src/components/ChatPanel.tsx
  - server/src/routes/chat-files.ts
  - .planning/REQUIREMENTS.md
autonomous: true
gap_closure: true
requirements: [INPUT-02, INPUT-03, INPUT-04]

must_haves:
  truths:
    - "User can hold a record button to capture voice audio"
    - "Recorded audio is sent to the server for transcription"
    - "Transcription preview appears in the chat input before sending"
  artifacts:
    - path: "ui/src/components/VoiceRecordButton.tsx"
      provides: "Voice recording button with MediaRecorder API, preview, and confirm flow"
      min_lines: 60
    - path: "server/src/routes/chat-files.ts"
      provides: "POST /transcribe endpoint for audio transcription"
  key_links:
    - from: "ui/src/components/ChatInput.tsx"
      to: "ui/src/components/VoiceRecordButton.tsx"
      via: "import and render in input toolbar"
      pattern: "VoiceRecordButton"
    - from: "ui/src/components/VoiceRecordButton.tsx"
      to: "/api/transcribe"
      via: "fetch POST with audio blob"
      pattern: "fetch.*transcribe"
---

<objective>
Add voice input with transcription preview to the chat input.

Purpose: INPUT-04 requires voice input via Whisper when local AI is enabled: user can hold a record button, speak, see a transcription preview, and confirm to send. This plan creates a VoiceRecordButton component using the browser MediaRecorder API, a server transcription endpoint that calls a local Whisper process, and wires the transcription result into the chat input textarea.

Output: VoiceRecordButton component, server transcription endpoint, ChatInput integration
</objective>

<execution_context>
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
@$HOME/.claude/get-shit-done/templates/summary.md
</execution_context>

<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/25-file-system/25-02-SUMMARY.md

<interfaces>
From ui/src/components/ChatInput.tsx:
- Props include onSend(content: string), onFilesPicked, pendingFiles, onRemoveFile
- Contains Paperclip button for file upload in the button row
- Textarea with handleKeyDown and handlePaste

From server/src/routes/chat-files.ts:
- chatFileRoutes(db, storage) returns Express Router
- Uses multer for file upload, assertBoard for auth
</interfaces>
</context>

<tasks>

<task type="auto">
  <name>Task 1: Create VoiceRecordButton and server transcription endpoint</name>
  <files>ui/src/components/VoiceRecordButton.tsx, server/src/routes/chat-files.ts</files>
  <read_first>
    - ui/src/components/ChatInput.tsx
    - server/src/routes/chat-files.ts
    - server/src/attachment-types.ts
  </read_first>
  <action>
1. Create ui/src/components/VoiceRecordButton.tsx:

```typescript
import { useState, useRef, useCallback } from "react";
import { Mic, Square, Loader2 } from "lucide-react";
import { Button } from "./ui/button";

interface VoiceRecordButtonProps {
  onTranscription: (text: string) => void;
  disabled?: boolean;
}

export function VoiceRecordButton({ onTranscription, disabled }: VoiceRecordButtonProps) {
  const [recording, setRecording] = useState(false);
  const [transcribing, setTranscribing] = useState(false);
  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  const chunksRef = useRef<Blob[]>([]);

  const startRecording = useCallback(async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const mediaRecorder = new MediaRecorder(stream, {
        mimeType: MediaRecorder.isTypeSupported("audio/webm;codecs=opus")
          ? "audio/webm;codecs=opus"
          : "audio/webm",
      });

      chunksRef.current = [];
      mediaRecorder.ondataavailable = (e) => {
        if (e.data.size > 0) chunksRef.current.push(e.data);
      };

      mediaRecorder.onstop = async () => {
        stream.getTracks().forEach((t) => t.stop());
        const blob = new Blob(chunksRef.current, { type: "audio/webm" });
        if (blob.size === 0) return;

        setTranscribing(true);
        try {
          const formData = new FormData();
          formData.append("audio", blob, "recording.webm");

          const res = await fetch("/api/transcribe", {
            method: "POST",
            credentials: "include",
            body: formData,
          });

          if (res.ok) {
            const data = (await res.json()) as { text: string };
            if (data.text?.trim()) {
              onTranscription(data.text.trim());
            }
          }
        } finally {
          setTranscribing(false);
        }
      };

      mediaRecorderRef.current = mediaRecorder;
      mediaRecorder.start(250); // 250ms chunks
      setRecording(true);
    } catch {
      // Microphone permission denied or unavailable
    }
  }, [onTranscription]);

  const stopRecording = useCallback(() => {
    if (mediaRecorderRef.current?.state === "recording") {
      mediaRecorderRef.current.stop();
      mediaRecorderRef.current = null;
    }
    setRecording(false);
  }, []);

  if (transcribing) {
    return (
      <Button variant="ghost" size="icon" className="h-8 w-8" disabled>
        <Loader2 className="h-4 w-4 animate-spin" />
      </Button>
    );
  }

  if (recording) {
    return (
      <Button
        variant="ghost"
        size="icon"
        className="h-8 w-8 text-destructive"
        onClick={stopRecording}
        aria-label="Stop recording"
        title="Stop recording"
      >
        <Square className="h-4 w-4" />
      </Button>
    );
  }

  return (
    <Button
      variant="ghost"
      size="icon"
      className="h-8 w-8"
      onClick={startRecording}
      disabled={disabled}
      aria-label="Voice input"
      title="Voice input"
    >
      <Mic className="h-4 w-4" />
    </Button>
  );
}
```

2. In server/src/routes/chat-files.ts, add a transcription endpoint. This endpoint receives audio via multer, writes it to a temp file, and shells out to the `whisper` CLI (from OpenAI's whisper or whisper.cpp). If whisper is not installed, return a 503 with a helpful message.

Add this route inside chatFileRoutes, after the file upload routes:

```typescript
// POST /transcribe -- Transcribe audio via local Whisper
router.post("/transcribe", async (req, res) => {
  assertBoard(req);

  try {
    await runSingleFileUpload(fileUpload, req, res);
  } catch (err) {
    if (err instanceof multer.MulterError) {
      res.status(400).json({ error: err.message });
      return;
    }
    throw err;
  }

  const file = (req as Request & { file?: { buffer: Buffer; mimetype: string } }).file;
  if (!file) {
    res.status(400).json({ error: "Missing audio field" });
    return;
  }

  // Write to temp file
  const { writeFile, unlink } = await import("node:fs/promises");
  const { tmpdir } = await import("node:os");
  const tmpPath = path.join(tmpdir(), `nexus-audio-${Date.now()}.webm`);

  try {
    await writeFile(tmpPath, file.buffer);

    // Try whisper CLI (whisper.cpp or openai-whisper)
    const { promisify } = await import("node:util");
    const { execFile: execFileCb } = await import("node:child_process");
    const execFileAsync = promisify(execFileCb);

    try {
      // Try whisper.cpp first (outputs to stdout with --output-txt --output-file -)
      const { stdout } = await execFileAsync("whisper-cpp", [
        "--model", "base.en",
        "--file", tmpPath,
        "--no-timestamps",
        "--output-txt",
      ], { timeout: 30000 });
      res.json({ text: stdout.trim() });
    } catch {
      try {
        // Fallback: openai-whisper Python CLI
        const { stdout } = await execFileAsync("whisper", [
          tmpPath,
          "--model", "base.en",
          "--output_format", "txt",
          "--output_dir", tmpdir(),
        ], { timeout: 60000 });
        // whisper CLI outputs to a .txt file
        const txtPath = tmpPath.replace(/\.webm$/, ".txt");
        try {
          const { readFile } = await import("node:fs/promises");
          const text = await readFile(txtPath, "utf-8");
          await unlink(txtPath).catch(() => {});
          res.json({ text: text.trim() });
        } catch {
          // Parse stdout as fallback
          res.json({ text: stdout.trim() });
        }
      } catch {
        res.status(503).json({
          error: "Whisper not available. Install whisper-cpp or openai-whisper for voice input.",
        });
      }
    }
  } finally {
    await unlink(tmpPath).catch(() => {});
  }
});
```

Note: This uses execFileAsync (promisified execFile) -- NOT exec -- to avoid shell injection. The tmpPath is system-generated and safe.
  </action>
  <verify>
    <automated>cd /opt/nexus && test -f ui/src/components/VoiceRecordButton.tsx && echo "VoiceRecordButton exists" && grep -n "transcribe" server/src/routes/chat-files.ts | head -5</automated>
  </verify>
  <acceptance_criteria>
    - File ui/src/components/VoiceRecordButton.tsx exists
    - Contains MediaRecorder API usage (navigator.mediaDevices.getUserMedia)
    - Contains fetch("/api/transcribe") call
    - Contains recording/transcribing/idle states with Mic/Square/Loader2 icons
    - server/src/routes/chat-files.ts contains router.post("/transcribe") endpoint
    - Transcription endpoint uses execFileAsync (safe, no shell) for whisper CLI
    - Returns 503 with helpful message if whisper is not installed
  </acceptance_criteria>
  <done>VoiceRecordButton captures audio and sends to /api/transcribe; server transcription endpoint processes via local Whisper</done>
</task>

<task type="auto">
  <name>Task 2: Wire VoiceRecordButton into ChatInput and update REQUIREMENTS.md</name>
  <files>ui/src/components/ChatInput.tsx, .planning/REQUIREMENTS.md</files>
  <read_first>
    - ui/src/components/ChatInput.tsx
    - ui/src/components/ChatPanel.tsx
    - ui/src/components/VoiceRecordButton.tsx
    - .planning/REQUIREMENTS.md
  </read_first>
  <action>
1. Update ui/src/components/ChatInput.tsx:
   - Import VoiceRecordButton: `import { VoiceRecordButton } from "./VoiceRecordButton";`
   - Add an optional prop `enableVoiceInput?: boolean` to ChatInput's props interface
   - Add a handler that inserts transcription text into the textarea:
     ```typescript
     const handleTranscription = useCallback((text: string) => {
       // Append transcription to current input value
       const textarea = textareaRef.current;
       if (textarea) {
         const current = textarea.value;
         const newValue = current ? `${current} ${text}` : text;
         // Trigger onChange through native input event for controlled components
         const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
           window.HTMLTextAreaElement.prototype, "value"
         )?.set;
         nativeInputValueSetter?.call(textarea, newValue);
         textarea.dispatchEvent(new Event("input", { bubbles: true }));
         textarea.focus();
       }
     }, []);
     ```
     Alternatively, if ChatInput uses a state variable for the input value, just update that state directly. Read ChatInput.tsx first to determine the correct approach.
   - Render VoiceRecordButton in the button row (next to the Paperclip button), only when `enableVoiceInput` is true:
     ```tsx
     {enableVoiceInput && (
       <VoiceRecordButton
         onTranscription={handleTranscription}
         disabled={/* same disabled condition as send button if any */}
       />
     )}
     ```

2. Update ui/src/components/ChatPanel.tsx:
   - Pass `enableVoiceInput={true}` to the `<ChatInput>` component. This makes the voice button visible to users. (If a `localAIEnabled` flag or config exists, gate on that instead; otherwise pass `true` unconditionally — the server returns 503 gracefully if whisper is not installed.)

3. Update .planning/REQUIREMENTS.md:
   - Change INPUT-04 from `- [ ] **INPUT-04**` to `- [x] **INPUT-04**`
   - In Traceability table, change INPUT-04 from Pending to Complete
   - Also change INPUT-02 and INPUT-03 from Pending to Complete if not already (they were implemented in Plan 25-02)
  </action>
  <verify>
    <automated>cd /opt/nexus && grep -n "VoiceRecordButton\|enableVoiceInput\|handleTranscription" ui/src/components/ChatInput.tsx | head -5 && grep "INPUT-02\|INPUT-03\|INPUT-04" .planning/REQUIREMENTS.md | head -6</automated>
  </verify>
  <acceptance_criteria>
    - ui/src/components/ChatInput.tsx imports VoiceRecordButton
    - Contains enableVoiceInput prop in the props interface
    - Contains handleTranscription callback that inserts text into textarea
    - Renders VoiceRecordButton conditionally when enableVoiceInput is true
    - ui/src/components/ChatPanel.tsx passes enableVoiceInput={true} to ChatInput
    - .planning/REQUIREMENTS.md contains `- [x] **INPUT-02**`
    - .planning/REQUIREMENTS.md contains `- [x] **INPUT-03**`
    - .planning/REQUIREMENTS.md contains `- [x] **INPUT-04**`
  </acceptance_criteria>
  <done>Voice input button in chat input; transcription inserts into textarea; INPUT-02/03/04 marked Complete</done>
</task>

</tasks>

<verification>
- npx tsc --noEmit -p ui/tsconfig.json passes
- grep "VoiceRecordButton" ui/src/components/ChatInput.tsx matches
- grep "\[x\].*INPUT-04" .planning/REQUIREMENTS.md matches
</verification>

<success_criteria>
- VoiceRecordButton appears in ChatInput when voice input is enabled
- Recording captures audio, sends to /api/transcribe, and inserts result into textarea
- Server returns transcription via local Whisper (or 503 if not installed)
- INPUT-02, INPUT-03, INPUT-04 marked Complete in REQUIREMENTS.md
- TypeScript compiles without errors
</success_criteria>

<output>
After completion, create `.planning/phases/25-file-system/25-08-SUMMARY.md`
</output>