feat(01-03): add timing profiler for response latency debugging
Instruments the full message pipeline with [TIMING] log entries: - Message age (Telegram delivery delay) - Subprocess state (cold-start vs reused) - Process spawn, first stdout, first assistant text - Telegram send latency, total wall time Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
2302b75e14
commit
d878931e79
2 changed files with 35 additions and 9 deletions
|
|
@ -8,6 +8,7 @@ import asyncio
|
|||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -66,10 +67,13 @@ def is_authorized(user_id: int) -> bool:
|
|||
def make_callbacks(bot, chat_id):
|
||||
"""Create callbacks for ClaudeSubprocess bound to specific chat."""
|
||||
async def on_output(text):
|
||||
t0 = time.monotonic()
|
||||
# Truncate to Telegram limit
|
||||
if len(text) > 4000:
|
||||
text = text[:4000] + "\n... (truncated)"
|
||||
await bot.send_message(chat_id=chat_id, text=text)
|
||||
elapsed = time.monotonic() - t0
|
||||
logger.info(f"[TIMING] Telegram send: {elapsed:.3f}s ({len(text)} chars)")
|
||||
|
||||
async def on_error(error):
|
||||
await bot.send_message(chat_id=chat_id, text=f"Error: {error}")
|
||||
|
|
@ -454,11 +458,15 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||
return
|
||||
|
||||
message = update.message.text
|
||||
logger.info(f"Routing message to session '{active_session}': {message[:50]}...")
|
||||
t_start = time.monotonic()
|
||||
# Telegram message timestamp vs now = delivery delay
|
||||
msg_age = (datetime.utcnow() - update.message.date.replace(tzinfo=None)).total_seconds()
|
||||
logger.info(f"[TIMING] Message received: session='{active_session}', age={msg_age:.1f}s, text={message[:50]}...")
|
||||
|
||||
try:
|
||||
# Get or create subprocess for active session
|
||||
if active_session not in subprocesses or not subprocesses[active_session].is_alive:
|
||||
already_alive = active_session in subprocesses and subprocesses[active_session].is_alive
|
||||
if not already_alive:
|
||||
session_dir = session_manager.get_session_dir(active_session)
|
||||
session_data = session_manager.get_session(active_session)
|
||||
persona_name = session_data.get('persona', 'default')
|
||||
|
|
@ -477,7 +485,8 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||
on_status=callbacks[3],
|
||||
)
|
||||
|
||||
logger.info(f"Auto-spawned subprocess for active session '{active_session}'")
|
||||
subprocess_state = "reused" if already_alive else "cold-start"
|
||||
logger.info(f"[TIMING] Subprocess: {subprocess_state}, busy={subprocesses[active_session].is_busy}")
|
||||
|
||||
# Send message to Claude
|
||||
await subprocesses[active_session].send_message(message)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import inspect
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
|
|
@ -139,6 +140,9 @@ class ClaudeSubprocess:
|
|||
Args:
|
||||
message: Initial message to send
|
||||
"""
|
||||
self._spawn_time = time.monotonic()
|
||||
self._first_output_time = None
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
"claude",
|
||||
|
|
@ -179,9 +183,11 @@ class ClaudeSubprocess:
|
|||
# Ensure session directory exists
|
||||
self._session_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Log full command (with model) for debugging
|
||||
cmd_flags = [c for c in cmd if c.startswith("--")]
|
||||
logger.info(
|
||||
f"Spawning Claude Code: cwd={self._session_dir}, "
|
||||
f"cmd={' '.join(cmd[:4])}... (truncated)"
|
||||
f"[TIMING] Spawning: cwd={self._session_dir.name}, "
|
||||
f"flags={' '.join(cmd_flags)}, msg={message[:60]}..."
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -195,7 +201,8 @@ class ClaudeSubprocess:
|
|||
)
|
||||
|
||||
self._busy = True
|
||||
logger.info(f"Claude Code spawned: PID={self._process.pid}")
|
||||
elapsed = time.monotonic() - self._spawn_time
|
||||
logger.info(f"[TIMING] Process spawned: PID={self._process.pid} (+{elapsed:.3f}s)")
|
||||
|
||||
# Launch concurrent stream readers
|
||||
self._reader_task = asyncio.create_task(self._read_streams())
|
||||
|
|
@ -229,7 +236,8 @@ class ClaudeSubprocess:
|
|||
|
||||
# Wait for process to terminate
|
||||
returncode = await self._process.wait()
|
||||
logger.info(f"Claude Code process exited: returncode={returncode}")
|
||||
elapsed = time.monotonic() - self._spawn_time
|
||||
logger.info(f"[TIMING] Process exited: returncode={returncode}, total={elapsed:.3f}s")
|
||||
|
||||
# Handle crash (non-zero exit, but not signals from our own termination)
|
||||
# Negative return codes are signals (e.g., -15 = SIGTERM, -9 = SIGKILL)
|
||||
|
|
@ -271,6 +279,7 @@ class ClaudeSubprocess:
|
|||
if not self._process or not self._process.stdout:
|
||||
return
|
||||
|
||||
first_line = True
|
||||
try:
|
||||
while True:
|
||||
line = await self._process.stdout.readline()
|
||||
|
|
@ -278,6 +287,10 @@ class ClaudeSubprocess:
|
|||
break
|
||||
line_str = line.decode().rstrip()
|
||||
if line_str:
|
||||
if first_line:
|
||||
elapsed = time.monotonic() - self._spawn_time
|
||||
logger.info(f"[TIMING] First stdout line: +{elapsed:.3f}s")
|
||||
first_line = False
|
||||
self._handle_stdout_line(line_str)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading stdout: {e}")
|
||||
|
|
@ -322,6 +335,10 @@ class ClaudeSubprocess:
|
|||
if block.get("type") == "text":
|
||||
text = block.get("text", "")
|
||||
if text and self.on_output:
|
||||
if not self._first_output_time:
|
||||
self._first_output_time = time.monotonic()
|
||||
elapsed = self._first_output_time - self._spawn_time
|
||||
logger.info(f"[TIMING] First assistant text: +{elapsed:.3f}s ({len(text)} chars)")
|
||||
try:
|
||||
if inspect.iscoroutinefunction(self.on_output):
|
||||
asyncio.create_task(self.on_output(text))
|
||||
|
|
@ -332,9 +349,9 @@ class ClaudeSubprocess:
|
|||
|
||||
elif event_type == "result":
|
||||
# Turn complete
|
||||
elapsed = time.monotonic() - self._spawn_time
|
||||
session_id = event.get("session_id")
|
||||
if session_id:
|
||||
logger.debug(f"Turn complete: session_id={session_id}")
|
||||
logger.info(f"[TIMING] Result event: +{elapsed:.3f}s (session={session_id})")
|
||||
|
||||
# Check for error
|
||||
if event.get("is_error") and self.on_error:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue