feat(01-03): add timing profiler for response latency debugging

Instruments the full message pipeline with [TIMING] log entries: - Message age (Telegram delivery delay) - Subprocess state (cold-start vs reused) - Process spawn, first stdout, first assistant text - Telegram send latency, total wall time Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 18:30:05 +00:00 · 2026-02-04 18:30:05 +00:00 · d878931e79
commit d878931e79
parent 2302b75e14
2 changed files with 35 additions and 9 deletions
--- a/telegram/bot.py
+++ b/telegram/bot.py
@ -8,6 +8,7 @@ import asyncio
 import logging
 import os
 import subprocess
+import time
 from datetime import datetime
 from pathlib import Path

@ -66,10 +67,13 @@ def is_authorized(user_id: int) -> bool:
 def make_callbacks(bot, chat_id):
    """Create callbacks for ClaudeSubprocess bound to specific chat."""
    async def on_output(text):
+        t0 = time.monotonic()
        # Truncate to Telegram limit
        if len(text) > 4000:
            text = text[:4000] + "\n... (truncated)"
        await bot.send_message(chat_id=chat_id, text=text)
+        elapsed = time.monotonic() - t0
+        logger.info(f"[TIMING] Telegram send: {elapsed:.3f}s ({len(text)} chars)")

    async def on_error(error):
        await bot.send_message(chat_id=chat_id, text=f"Error: {error}")
@ -454,11 +458,15 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
        return

    message = update.message.text
-    logger.info(f"Routing message to session '{active_session}': {message[:50]}...")
+    t_start = time.monotonic()
+    # Telegram message timestamp vs now = delivery delay
+    msg_age = (datetime.utcnow() - update.message.date.replace(tzinfo=None)).total_seconds()
+    logger.info(f"[TIMING] Message received: session='{active_session}', age={msg_age:.1f}s, text={message[:50]}...")

    try:
        # Get or create subprocess for active session
-        if active_session not in subprocesses or not subprocesses[active_session].is_alive:
+        already_alive = active_session in subprocesses and subprocesses[active_session].is_alive
+        if not already_alive:
            session_dir = session_manager.get_session_dir(active_session)
            session_data = session_manager.get_session(active_session)
            persona_name = session_data.get('persona', 'default')
@ -477,7 +485,8 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
                on_status=callbacks[3],
            )

-            logger.info(f"Auto-spawned subprocess for active session '{active_session}'")
+        subprocess_state = "reused" if already_alive else "cold-start"
+        logger.info(f"[TIMING] Subprocess: {subprocess_state}, busy={subprocesses[active_session].is_busy}")

        # Send message to Claude
        await subprocesses[active_session].send_message(message)
--- a/telegram/claude_subprocess.py
+++ b/telegram/claude_subprocess.py
@ -25,6 +25,7 @@ import inspect
 import json
 import logging
 import os
+import time
 from pathlib import Path
 from typing import Callable, Optional

@ -139,6 +140,9 @@ class ClaudeSubprocess:
        Args:
            message: Initial message to send
        """
+        self._spawn_time = time.monotonic()
+        self._first_output_time = None
+
        # Build command
        cmd = [
            "claude",
@ -179,9 +183,11 @@ class ClaudeSubprocess:
        # Ensure session directory exists
        self._session_dir.mkdir(parents=True, exist_ok=True)

+        # Log full command (with model) for debugging
+        cmd_flags = [c for c in cmd if c.startswith("--")]
        logger.info(
-            f"Spawning Claude Code: cwd={self._session_dir}, "
-            f"cmd={' '.join(cmd[:4])}... (truncated)"
+            f"[TIMING] Spawning: cwd={self._session_dir.name}, "
+            f"flags={' '.join(cmd_flags)}, msg={message[:60]}..."
        )

        try:
@ -195,7 +201,8 @@ class ClaudeSubprocess:
            )

            self._busy = True
-            logger.info(f"Claude Code spawned: PID={self._process.pid}")
+            elapsed = time.monotonic() - self._spawn_time
+            logger.info(f"[TIMING] Process spawned: PID={self._process.pid} (+{elapsed:.3f}s)")

            # Launch concurrent stream readers
            self._reader_task = asyncio.create_task(self._read_streams())
@ -229,7 +236,8 @@ class ClaudeSubprocess:

            # Wait for process to terminate
            returncode = await self._process.wait()
-            logger.info(f"Claude Code process exited: returncode={returncode}")
+            elapsed = time.monotonic() - self._spawn_time
+            logger.info(f"[TIMING] Process exited: returncode={returncode}, total={elapsed:.3f}s")

            # Handle crash (non-zero exit, but not signals from our own termination)
            # Negative return codes are signals (e.g., -15 = SIGTERM, -9 = SIGKILL)
@ -271,6 +279,7 @@ class ClaudeSubprocess:
        if not self._process or not self._process.stdout:
            return

+        first_line = True
        try:
            while True:
                line = await self._process.stdout.readline()
@ -278,6 +287,10 @@ class ClaudeSubprocess:
                    break
                line_str = line.decode().rstrip()
                if line_str:
+                    if first_line:
+                        elapsed = time.monotonic() - self._spawn_time
+                        logger.info(f"[TIMING] First stdout line: +{elapsed:.3f}s")
+                        first_line = False
                    self._handle_stdout_line(line_str)
        except Exception as e:
            logger.error(f"Error reading stdout: {e}")
@ -322,6 +335,10 @@ class ClaudeSubprocess:
                    if block.get("type") == "text":
                        text = block.get("text", "")
                        if text and self.on_output:
+                            if not self._first_output_time:
+                                self._first_output_time = time.monotonic()
+                                elapsed = self._first_output_time - self._spawn_time
+                                logger.info(f"[TIMING] First assistant text: +{elapsed:.3f}s ({len(text)} chars)")
                            try:
                                if inspect.iscoroutinefunction(self.on_output):
                                    asyncio.create_task(self.on_output(text))
@ -332,9 +349,9 @@ class ClaudeSubprocess:

            elif event_type == "result":
                # Turn complete
+                elapsed = time.monotonic() - self._spawn_time
                session_id = event.get("session_id")
-                if session_id:
-                    logger.debug(f"Turn complete: session_id={session_id}")
+                logger.info(f"[TIMING] Result event: +{elapsed:.3f}s (session={session_id})")

                # Check for error
                if event.get("is_error") and self.on_error: