feat(01-03): add timing profiler for response latency debugging

Instruments the full message pipeline with [TIMING] log entries:
- Message age (Telegram delivery delay)
- Subprocess state (cold-start vs reused)
- Process spawn, first stdout, first assistant text
- Telegram send latency, total wall time

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Mikkel Georgsen 2026-02-04 18:30:05 +00:00
parent 2302b75e14
commit d878931e79
2 changed files with 35 additions and 9 deletions

View file

@ -8,6 +8,7 @@ import asyncio
import logging
import os
import subprocess
import time
from datetime import datetime
from pathlib import Path
@ -66,10 +67,13 @@ def is_authorized(user_id: int) -> bool:
def make_callbacks(bot, chat_id):
"""Create callbacks for ClaudeSubprocess bound to specific chat."""
async def on_output(text):
t0 = time.monotonic()
# Truncate to Telegram limit
if len(text) > 4000:
text = text[:4000] + "\n... (truncated)"
await bot.send_message(chat_id=chat_id, text=text)
elapsed = time.monotonic() - t0
logger.info(f"[TIMING] Telegram send: {elapsed:.3f}s ({len(text)} chars)")
async def on_error(error):
await bot.send_message(chat_id=chat_id, text=f"Error: {error}")
@ -454,11 +458,15 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
return
message = update.message.text
logger.info(f"Routing message to session '{active_session}': {message[:50]}...")
t_start = time.monotonic()
# Telegram message timestamp vs now = delivery delay
msg_age = (datetime.utcnow() - update.message.date.replace(tzinfo=None)).total_seconds()
logger.info(f"[TIMING] Message received: session='{active_session}', age={msg_age:.1f}s, text={message[:50]}...")
try:
# Get or create subprocess for active session
if active_session not in subprocesses or not subprocesses[active_session].is_alive:
already_alive = active_session in subprocesses and subprocesses[active_session].is_alive
if not already_alive:
session_dir = session_manager.get_session_dir(active_session)
session_data = session_manager.get_session(active_session)
persona_name = session_data.get('persona', 'default')
@ -477,7 +485,8 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
on_status=callbacks[3],
)
logger.info(f"Auto-spawned subprocess for active session '{active_session}'")
subprocess_state = "reused" if already_alive else "cold-start"
logger.info(f"[TIMING] Subprocess: {subprocess_state}, busy={subprocesses[active_session].is_busy}")
# Send message to Claude
await subprocesses[active_session].send_message(message)

View file

@ -25,6 +25,7 @@ import inspect
import json
import logging
import os
import time
from pathlib import Path
from typing import Callable, Optional
@ -139,6 +140,9 @@ class ClaudeSubprocess:
Args:
message: Initial message to send
"""
self._spawn_time = time.monotonic()
self._first_output_time = None
# Build command
cmd = [
"claude",
@ -179,9 +183,11 @@ class ClaudeSubprocess:
# Ensure session directory exists
self._session_dir.mkdir(parents=True, exist_ok=True)
# Log full command (with model) for debugging
cmd_flags = [c for c in cmd if c.startswith("--")]
logger.info(
f"Spawning Claude Code: cwd={self._session_dir}, "
f"cmd={' '.join(cmd[:4])}... (truncated)"
f"[TIMING] Spawning: cwd={self._session_dir.name}, "
f"flags={' '.join(cmd_flags)}, msg={message[:60]}..."
)
try:
@ -195,7 +201,8 @@ class ClaudeSubprocess:
)
self._busy = True
logger.info(f"Claude Code spawned: PID={self._process.pid}")
elapsed = time.monotonic() - self._spawn_time
logger.info(f"[TIMING] Process spawned: PID={self._process.pid} (+{elapsed:.3f}s)")
# Launch concurrent stream readers
self._reader_task = asyncio.create_task(self._read_streams())
@ -229,7 +236,8 @@ class ClaudeSubprocess:
# Wait for process to terminate
returncode = await self._process.wait()
logger.info(f"Claude Code process exited: returncode={returncode}")
elapsed = time.monotonic() - self._spawn_time
logger.info(f"[TIMING] Process exited: returncode={returncode}, total={elapsed:.3f}s")
# Handle crash (non-zero exit, but not signals from our own termination)
# Negative return codes are signals (e.g., -15 = SIGTERM, -9 = SIGKILL)
@ -271,6 +279,7 @@ class ClaudeSubprocess:
if not self._process or not self._process.stdout:
return
first_line = True
try:
while True:
line = await self._process.stdout.readline()
@ -278,6 +287,10 @@ class ClaudeSubprocess:
break
line_str = line.decode().rstrip()
if line_str:
if first_line:
elapsed = time.monotonic() - self._spawn_time
logger.info(f"[TIMING] First stdout line: +{elapsed:.3f}s")
first_line = False
self._handle_stdout_line(line_str)
except Exception as e:
logger.error(f"Error reading stdout: {e}")
@ -322,6 +335,10 @@ class ClaudeSubprocess:
if block.get("type") == "text":
text = block.get("text", "")
if text and self.on_output:
if not self._first_output_time:
self._first_output_time = time.monotonic()
elapsed = self._first_output_time - self._spawn_time
logger.info(f"[TIMING] First assistant text: +{elapsed:.3f}s ({len(text)} chars)")
try:
if inspect.iscoroutinefunction(self.on_output):
asyncio.create_task(self.on_output(text))
@ -332,9 +349,9 @@ class ClaudeSubprocess:
elif event_type == "result":
# Turn complete
elapsed = time.monotonic() - self._spawn_time
session_id = event.get("session_id")
if session_id:
logger.debug(f"Turn complete: session_id={session_id}")
logger.info(f"[TIMING] Result event: +{elapsed:.3f}s (session={session_id})")
# Check for error
if event.get("is_error") and self.on_error: