377 lines
16 KiB
Markdown
377 lines
16 KiB
Markdown
---
|
|
phase: 37-web-chat-voice-ui
|
|
plan: 04
|
|
type: execute
|
|
wave: 3
|
|
depends_on: ["37-02", "37-03"]
|
|
files_modified:
|
|
- ui/src/components/ChatInput.tsx
|
|
- ui/src/components/ChatMessage.tsx
|
|
- ui/src/components/ChatPanel.tsx
|
|
- ui/src/hooks/useStreamingChat.ts
|
|
- ui/src/api/chat.ts
|
|
autonomous: false
|
|
requirements:
|
|
- WCHAT-01
|
|
- WCHAT-02
|
|
- WCHAT-03
|
|
- WCHAT-04
|
|
- WCHAT-05
|
|
- WCHAT-06
|
|
|
|
must_haves:
|
|
truths:
|
|
- "ChatInput renders VoiceMicButton instead of VoiceRecordButton"
|
|
- "ChatInput shows VoiceModeToggle when voice mode is not 'text'"
|
|
- "ChatMessage renders ChatVoiceBadge for voice_input and voice_full messageTypes"
|
|
- "ChatMessage renders ChatVoicePlayer for voice_full messages with auto-play from localStorage"
|
|
- "useStreamingChat.startStream accepts voiceMode parameter"
|
|
- "chatApi.postMessageAndStream sends voiceMode in request body"
|
|
- "ChatPanel passes voiceMode from useVoiceMode to startStream calls"
|
|
- "Full voice flow works end-to-end: mic -> VAD -> transcribe -> stream -> voice badge + audio"
|
|
artifacts:
|
|
- path: "ui/src/components/ChatInput.tsx"
|
|
provides: "Voice-enhanced chat input with VoiceMicButton + VoiceModeToggle"
|
|
contains: "VoiceMicButton"
|
|
- path: "ui/src/components/ChatMessage.tsx"
|
|
provides: "Voice-aware message rendering"
|
|
contains: "ChatVoiceBadge"
|
|
- path: "ui/src/hooks/useStreamingChat.ts"
|
|
provides: "Voice-mode-aware streaming"
|
|
contains: "voiceMode"
|
|
- path: "ui/src/api/chat.ts"
|
|
provides: "Voice mode in stream request"
|
|
contains: "voiceMode"
|
|
key_links:
|
|
- from: "ui/src/components/ChatPanel.tsx"
|
|
to: "ui/src/hooks/useVoiceMode.ts"
|
|
via: "useVoiceMode() hook call"
|
|
pattern: "useVoiceMode"
|
|
- from: "ui/src/components/ChatPanel.tsx"
|
|
to: "ui/src/hooks/useStreamingChat.ts"
|
|
via: "startStream(content, agentId, voiceMode)"
|
|
pattern: "startStream.*voiceMode"
|
|
- from: "ui/src/hooks/useStreamingChat.ts"
|
|
to: "ui/src/api/chat.ts"
|
|
via: "chatApi.postMessageAndStream with voiceMode"
|
|
pattern: "voiceMode"
|
|
- from: "ui/src/components/ChatInput.tsx"
|
|
to: "ui/src/components/VoiceMicButton.tsx"
|
|
via: "VoiceMicButton replaces VoiceRecordButton"
|
|
pattern: "VoiceMicButton"
|
|
- from: "ui/src/components/ChatMessage.tsx"
|
|
to: "ui/src/components/ChatVoiceBadge.tsx"
|
|
via: "ChatVoiceBadge for voice messageTypes"
|
|
pattern: "ChatVoiceBadge"
|
|
---
|
|
|
|
<objective>
|
|
Wire all voice components into the existing chat system: replace VoiceRecordButton with VoiceMicButton in ChatInput, add VoiceModeToggle, render ChatVoiceBadge in ChatMessage, and thread voiceMode through useStreamingChat and chatApi.
|
|
|
|
Purpose: This is the integration plan that connects all Phase 37 components to the existing chat UI. Without this wiring, the components exist but aren't used.
|
|
|
|
Output: 5 modified files connecting voice I/O to the chat system
|
|
</objective>
|
|
|
|
<execution_context>
|
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
|
</execution_context>
|
|
|
|
<context>
|
|
@.planning/phases/37-web-chat-voice-ui/37-RESEARCH.md
|
|
@.planning/phases/37-web-chat-voice-ui/37-02-SUMMARY.md
|
|
@.planning/phases/37-web-chat-voice-ui/37-03-SUMMARY.md
|
|
|
|
<interfaces>
|
|
<!-- VoiceMicButton (from Plan 02) -->
|
|
```typescript
|
|
interface VoiceMicButtonProps {
|
|
onTranscript: (text: string) => void;
|
|
disabled?: boolean;
|
|
}
|
|
export function VoiceMicButton({ onTranscript, disabled }: VoiceMicButtonProps)
|
|
```
|
|
|
|
<!-- VoiceModeToggle (from Plan 03) -->
|
|
```typescript
|
|
export function VoiceModeToggle()
|
|
// Uses useVoiceMode() internally; renders three pills + auto-play checkbox
|
|
```
|
|
|
|
<!-- ChatVoiceBadge (from Plan 03) -->
|
|
```typescript
|
|
interface ChatVoiceBadgeProps {
|
|
content: string;
|
|
messageType: string; // "voice_input" | "voice_full"
|
|
autoPlayVoice?: boolean;
|
|
}
|
|
export function ChatVoiceBadge({ content, messageType, autoPlayVoice }: ChatVoiceBadgeProps)
|
|
```
|
|
|
|
<!-- useVoiceMode (from Plan 02) -->
|
|
```typescript
|
|
type VoiceMode = "text" | "voice_input" | "full_voice";
|
|
export function useVoiceMode(): { mode: VoiceMode; setMode: (v: VoiceMode) => Promise<void>; isLoading: boolean }
|
|
```
|
|
|
|
<!-- Current ChatInput props (parent branch) -->
|
|
```typescript
|
|
interface ChatInputProps {
|
|
onSend: (content: string) => void;
|
|
isSubmitting?: boolean;
|
|
disabled?: boolean;
|
|
placeholder?: string;
|
|
agents?: Agent[];
|
|
agentsLoading?: boolean;
|
|
onFilesPicked?: (files: File[]) => void;
|
|
pendingFiles?: PendingFile[];
|
|
onRemoveFile?: (id: string) => void;
|
|
enableVoiceInput?: boolean; // Controls VoiceRecordButton visibility
|
|
}
|
|
```
|
|
|
|
<!-- Current useStreamingChat (parent branch) -->
|
|
```typescript
|
|
export function useStreamingChat(conversationId: string | null) {
|
|
// startStream(userMessage: string, agentId?: string) — needs voiceMode param added
|
|
return { streamingContent, isStreaming, startStream, stop };
|
|
}
|
|
```
|
|
|
|
<!-- Current chatApi.postMessageAndStream (parent branch) -->
|
|
```typescript
|
|
async postMessageAndStream(
|
|
conversationId: string,
|
|
data: { content: string; agentId?: string }, // needs voiceMode added
|
|
callbacks: { onToken, onDone, onError },
|
|
signal?: AbortSignal,
|
|
): Promise<void>
|
|
```
|
|
|
|
<!-- Current ChatPanel send handler (parent branch) -->
|
|
```typescript
|
|
// handleSend calls startStream(content, resolvedAgentId) — needs voiceMode
|
|
```
|
|
</interfaces>
|
|
</context>
|
|
|
|
<tasks>
|
|
|
|
<task type="auto">
|
|
<name>Task 1: Thread voiceMode through chatApi and useStreamingChat</name>
|
|
<files>
|
|
ui/src/api/chat.ts,
|
|
ui/src/hooks/useStreamingChat.ts
|
|
</files>
|
|
<read_first>
|
|
ui/src/api/chat.ts,
|
|
ui/src/hooks/useStreamingChat.ts
|
|
</read_first>
|
|
<action>
|
|
1. **ui/src/api/chat.ts** — Extend postMessageAndStream data parameter:
|
|
- Change the `data` parameter type from `{ content: string; agentId?: string }` to `{ content: string; agentId?: string; voiceMode?: string }`
|
|
- The body is already sent as `JSON.stringify(data)`, so voiceMode will be included automatically when present
|
|
- No other changes needed — the server's chat.ts stream handler already reads voiceMode from req.body (added in Plan 01)
|
|
|
|
2. **ui/src/hooks/useStreamingChat.ts** — Extend startStream to accept voiceMode:
|
|
- Change `startStream` signature from `(userMessage: string, agentId?: string)` to `(userMessage: string, agentId?: string, voiceMode?: string)`
|
|
- Pass voiceMode through to chatApi.postMessageAndStream:
|
|
```typescript
|
|
chatApi.postMessageAndStream(
|
|
conversationId,
|
|
{ content: userMessage, agentId, voiceMode },
|
|
{ onToken, onDone, onError },
|
|
abort.signal,
|
|
);
|
|
```
|
|
- Add `voiceMode` to the useCallback dependency array if needed (it's a parameter, not state, so it shouldn't need to be)
|
|
</action>
|
|
<verify>
|
|
<automated>cd /opt/nexus/.claude/worktrees/agent-a009558f && grep -q "voiceMode" ui/src/api/chat.ts && grep -q "voiceMode" ui/src/hooks/useStreamingChat.ts && grep "postMessageAndStream" ui/src/api/chat.ts | grep -q "voiceMode" && echo "PASS" || echo "FAIL"</automated>
|
|
</verify>
|
|
<acceptance_criteria>
|
|
- grep "voiceMode" ui/src/api/chat.ts returns match in postMessageAndStream data type
|
|
- grep "voiceMode" ui/src/hooks/useStreamingChat.ts returns match in startStream signature
|
|
- grep "voiceMode" ui/src/hooks/useStreamingChat.ts returns match in postMessageAndStream call
|
|
</acceptance_criteria>
|
|
<done>chatApi.postMessageAndStream sends voiceMode in request body. useStreamingChat.startStream accepts and forwards voiceMode parameter.</done>
|
|
</task>
|
|
|
|
<task type="auto">
|
|
<name>Task 2: Wire VoiceMicButton + VoiceModeToggle into ChatInput, ChatVoiceBadge into ChatMessage, voiceMode into ChatPanel</name>
|
|
<files>
|
|
ui/src/components/ChatInput.tsx,
|
|
ui/src/components/ChatMessage.tsx,
|
|
ui/src/components/ChatPanel.tsx
|
|
</files>
|
|
<read_first>
|
|
ui/src/components/ChatInput.tsx,
|
|
ui/src/components/ChatMessage.tsx,
|
|
ui/src/components/ChatPanel.tsx,
|
|
ui/src/components/VoiceMicButton.tsx,
|
|
ui/src/components/VoiceModeToggle.tsx,
|
|
ui/src/components/ChatVoiceBadge.tsx,
|
|
ui/src/hooks/useVoiceMode.ts,
|
|
ui/src/hooks/useStreamingChat.ts
|
|
</read_first>
|
|
<action>
|
|
1. **ui/src/components/ChatInput.tsx** — Replace VoiceRecordButton with VoiceMicButton:
|
|
- Remove import of VoiceRecordButton: `import { VoiceRecordButton } from "./VoiceRecordButton";`
|
|
- Add import: `import { VoiceMicButton } from "./VoiceMicButton";`
|
|
- Add import: `import { VoiceModeToggle } from "./VoiceModeToggle";`
|
|
- In the JSX, find the VoiceRecordButton rendering block:
|
|
```tsx
|
|
{enableVoiceInput && (
|
|
<VoiceRecordButton
|
|
onTranscription={handleTranscription}
|
|
disabled={disabled}
|
|
/>
|
|
)}
|
|
```
|
|
Replace with:
|
|
```tsx
|
|
{enableVoiceInput && (
|
|
<VoiceMicButton
|
|
onTranscript={handleTranscription}
|
|
disabled={disabled}
|
|
/>
|
|
)}
|
|
```
|
|
- Add VoiceModeToggle ABOVE the input form, inside the ChatInput component, after ChatFileDropZone opens but before the form:
|
|
```tsx
|
|
<ChatFileDropZone ...>
|
|
{enableVoiceInput && <VoiceModeToggle />}
|
|
<form ...>
|
|
```
|
|
This places the toggle above the input row so it doesn't crowd the send button area.
|
|
|
|
2. **ui/src/components/ChatMessage.tsx** — Add ChatVoiceBadge for voice messages:
|
|
- Add imports:
|
|
```typescript
|
|
import { ChatVoiceBadge } from "./ChatVoiceBadge";
|
|
```
|
|
- In the messageType dispatch block (after the existing spec_card, handoff, task_created, status_update checks), add:
|
|
```typescript
|
|
if (messageType === "voice_input" || messageType === "voice_full") {
|
|
const autoPlay = typeof window !== "undefined"
|
|
? localStorage.getItem("nexus:voice:autoplay") === "true"
|
|
: false;
|
|
return (
|
|
<div className="max-w-full group relative">
|
|
{agentName && (
|
|
<ChatMessageIdentityBar
|
|
agentName={agentName}
|
|
agentIcon={agentIcon}
|
|
agentRole={agentRole}
|
|
timestamp={timestamp}
|
|
isStreaming={isStreaming}
|
|
/>
|
|
)}
|
|
<ChatVoiceBadge
|
|
content={content}
|
|
messageType={messageType}
|
|
autoPlayVoice={autoPlay}
|
|
/>
|
|
{isStreaming && <ChatStreamingCursor />}
|
|
<ChatMessageActions
|
|
role="assistant"
|
|
isStreaming={isAnyStreaming}
|
|
onRetry={id && onRetry ? () => onRetry(id) : undefined}
|
|
onBookmark={id && onBookmark ? () => onBookmark(id) : undefined}
|
|
isBookmarked={isBookmarked}
|
|
/>
|
|
</div>
|
|
);
|
|
}
|
|
```
|
|
- Place this BEFORE the general "fall through to default system message rendering" comment, but AFTER the status_update check
|
|
|
|
3. **ui/src/components/ChatPanel.tsx** — Connect useVoiceMode and pass voiceMode to startStream:
|
|
- Add imports:
|
|
```typescript
|
|
import { useVoiceMode } from "../hooks/useVoiceMode";
|
|
```
|
|
- Inside the ChatPanel component, call the hook:
|
|
```typescript
|
|
const { mode: voiceMode } = useVoiceMode();
|
|
```
|
|
- Find ALL calls to `startStream(content, agentId)` (there are ~5 of them per the read_first scan). Add voiceMode as third argument:
|
|
```typescript
|
|
startStream(content, resolvedAgentId ?? undefined, voiceMode);
|
|
```
|
|
- The five locations are approximately:
|
|
- In handleSend: `startStream(content, resolvedAgentId ?? undefined)` (two calls — online and offline branches)
|
|
- In handleEdit callback: `startStream(newContent, activeAgentId ?? undefined)`
|
|
- In handleRetry: `startStream(newContent, activeAgentId ?? undefined)`
|
|
- In retry from error: `startStream(lastUserContent, activeAgentId ?? undefined)`
|
|
- Update each to include `voiceMode` as the third argument
|
|
- Also pass `enableVoiceInput={voiceMode !== "text" || true}` to ChatInput — actually, keep `enableVoiceInput={true}` always (or however it's currently set). The VoiceModeToggle handles mode selection independently. The mic button should always be visible when voice is available.
|
|
- Check how enableVoiceInput is currently set in ChatPanel. If it's hardcoded or conditional, ensure it stays true so VoiceMicButton renders.
|
|
</action>
|
|
<verify>
|
|
<automated>cd /opt/nexus/.claude/worktrees/agent-a009558f && grep -q "VoiceMicButton" ui/src/components/ChatInput.tsx && grep -q "VoiceModeToggle" ui/src/components/ChatInput.tsx && ! grep -q "VoiceRecordButton" ui/src/components/ChatInput.tsx && grep -q "ChatVoiceBadge" ui/src/components/ChatMessage.tsx && grep -q "voice_input\|voice_full" ui/src/components/ChatMessage.tsx && grep -q "useVoiceMode" ui/src/components/ChatPanel.tsx && grep -q "voiceMode" ui/src/components/ChatPanel.tsx && echo "PASS" || echo "FAIL"</automated>
|
|
</verify>
|
|
<acceptance_criteria>
|
|
- grep "VoiceMicButton" ui/src/components/ChatInput.tsx returns match
|
|
- grep "VoiceModeToggle" ui/src/components/ChatInput.tsx returns match
|
|
- grep "VoiceRecordButton" ui/src/components/ChatInput.tsx returns NO match (replaced)
|
|
- grep "ChatVoiceBadge" ui/src/components/ChatMessage.tsx returns match
|
|
- grep "voice_input" ui/src/components/ChatMessage.tsx returns match
|
|
- grep "voice_full" ui/src/components/ChatMessage.tsx returns match
|
|
- grep "nexus:voice:autoplay" ui/src/components/ChatMessage.tsx returns match (reads localStorage)
|
|
- grep "useVoiceMode" ui/src/components/ChatPanel.tsx returns match
|
|
- grep "voiceMode" ui/src/components/ChatPanel.tsx appears in startStream calls
|
|
- grep "startStream.*voiceMode" ui/src/components/ChatPanel.tsx returns match
|
|
</acceptance_criteria>
|
|
<done>ChatInput uses VoiceMicButton (VAD-powered) instead of VoiceRecordButton. VoiceModeToggle shown above input. ChatMessage renders ChatVoiceBadge for voice messages. ChatPanel passes voiceMode to all startStream calls.</done>
|
|
</task>
|
|
|
|
<task type="checkpoint:human-verify" gate="blocking">
|
|
<name>Task 3: Verify voice flow end-to-end</name>
|
|
<files>ui/src/components/ChatPanel.tsx</files>
|
|
<read_first>ui/src/components/ChatPanel.tsx</read_first>
|
|
<action>
|
|
Human verification of the complete voice I/O integration. No code changes in this task — all implementation was done in Tasks 1-2. This checkpoint confirms the full voice flow works visually and functionally in the browser.
|
|
|
|
What was built across all Phase 37 plans:
|
|
- VoiceMicButton with VAD auto-stop replacing VoiceRecordButton
|
|
- VoiceWaveform canvas animation during recording
|
|
- VoiceModeToggle (Text / Voice In / Full Voice) with nexus-settings persistence
|
|
- ChatVoiceBadge with collapsible full markdown for voice_full messages
|
|
- ChatVoicePlayer with play/pause and auto-play from localStorage
|
|
- voiceMode threaded through ChatPanel -> useStreamingChat -> chatApi -> server chat.ts
|
|
</action>
|
|
<verify>
|
|
<automated>cd /opt/nexus/.claude/worktrees/agent-a009558f && grep -q "VoiceMicButton" ui/src/components/ChatInput.tsx && grep -q "ChatVoiceBadge" ui/src/components/ChatMessage.tsx && grep -q "voiceMode" ui/src/components/ChatPanel.tsx && echo "PASS" || echo "FAIL"</automated>
|
|
</verify>
|
|
<acceptance_criteria>
|
|
- VoiceModeToggle visible above chat input with three pills
|
|
- Mic button starts recording with waveform animation
|
|
- Recording auto-stops on silence detection
|
|
- Transcribed text populates input field
|
|
- Voice badge appears on agent responses in voice modes
|
|
- Audio player works for voice_full messages
|
|
- Auto-play toggle persists across page refresh
|
|
</acceptance_criteria>
|
|
<done>End-to-end voice flow verified by human: recording, VAD auto-stop, transcription, voice mode toggle, voice badge, audio playback, and auto-play setting all working correctly.</done>
|
|
</task>
|
|
|
|
</tasks>
|
|
|
|
<verification>
|
|
- VoiceRecordButton fully replaced by VoiceMicButton in ChatInput
|
|
- VoiceModeToggle renders above chat input
|
|
- ChatMessage dispatches voice_input and voice_full to ChatVoiceBadge
|
|
- voiceMode flows: ChatPanel -> useStreamingChat -> chatApi -> server chat.ts
|
|
- Auto-play reads from localStorage
|
|
- TypeScript compiles without errors: pnpm --filter @paperclipai/ui typecheck
|
|
</verification>
|
|
|
|
<success_criteria>
|
|
Complete voice I/O working in browser chat: VAD-powered recording with waveform, auto-stop on silence, voice mode toggle with persistence, voice badge on responses, inline audio player with auto-play setting. User can have a full voice conversation with their agent.
|
|
</success_criteria>
|
|
|
|
<output>
|
|
After completion, create `.planning/phases/37-web-chat-voice-ui/37-04-SUMMARY.md`
|
|
</output>
|