#!/usr/bin/env python3 """ Homelab Telegram Bot Two-way interactive bot for homelab management and notifications. """ import asyncio import logging import os import signal import subprocess import time from datetime import datetime, timezone from pathlib import Path from telegram import Update from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters from session_manager import SessionManager from claude_subprocess import ClaudeSubprocess from telegram_utils import split_message_smart, escape_markdown_v2, typing_indicator_loop from message_batcher import MessageBatcher from idle_timer import SessionIdleTimer # Setup logging logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO ) logger = logging.getLogger(__name__) # Load credentials CREDENTIALS_FILE = Path(__file__).parent / 'credentials' config = {} with open(CREDENTIALS_FILE) as f: for line in f: if '=' in line: key, value = line.strip().split('=', 1) config[key] = value TOKEN = config['TELEGRAM_BOT_TOKEN'] # Authorized users file (stores chat IDs that are allowed to use the bot) AUTHORIZED_FILE = Path(__file__).parent / 'authorized_users' # Inbox file for messages to Claude INBOX_FILE = Path(__file__).parent / 'inbox' # Session manager, subprocess tracking, and message batchers session_manager = SessionManager() subprocesses: dict[str, ClaudeSubprocess] = {} # Persistent subprocess per session batchers: dict[str, MessageBatcher] = {} # Message batcher per session typing_tasks: dict[str, tuple[asyncio.Task, asyncio.Event]] = {} # Typing indicator per session idle_timers: dict[str, SessionIdleTimer] = {} # Idle timer per session subprocess_locks: dict[str, asyncio.Lock] = {} # Lock per session to prevent races def get_authorized_users(): """Load authorized user IDs.""" if AUTHORIZED_FILE.exists(): with open(AUTHORIZED_FILE) as f: return set(int(line.strip()) for line in f if line.strip()) return set() def add_authorized_user(user_id: int): """Add a user to authorized list.""" users = get_authorized_users() users.add(user_id) with open(AUTHORIZED_FILE, 'w') as f: for uid in users: f.write(f"{uid}\n") def is_authorized(user_id: int) -> bool: """Check if user is authorized.""" return user_id in get_authorized_users() def get_subprocess_lock(session_name: str) -> asyncio.Lock: """Get or create subprocess lock for a session.""" return subprocess_locks.setdefault(session_name, asyncio.Lock()) async def suspend_session(session_name: str): """Suspend a session after idle timeout (callback for idle timer).""" async with get_subprocess_lock(session_name): # Check if subprocess exists and is alive if session_name not in subprocesses or not subprocesses[session_name].is_alive: logger.debug(f"Session '{session_name}' already not running, updating metadata only") session_manager.update_session(session_name, status='suspended', pid=None) return # Check if subprocess is busy (Claude is mid-processing) if subprocesses[session_name].is_busy: logger.info(f"Session '{session_name}' is busy, deferring suspension") # Reset timer to try again later if session_name in idle_timers: idle_timers[session_name].reset() return # Store PID for logging pid = subprocesses[session_name].pid logger.info(f"Suspending session '{session_name}' (PID {pid}) after idle timeout") # Terminate subprocess await subprocesses[session_name].terminate() del subprocesses[session_name] # Flush and remove batcher if exists if session_name in batchers: await batchers[session_name].flush_immediately() del batchers[session_name] # Update session metadata session_manager.update_session(session_name, status='suspended', pid=None) # Cancel and remove idle timer if session_name in idle_timers: idle_timers[session_name].cancel() del idle_timers[session_name] logger.info(f"Session '{session_name}' suspended after idle timeout") MODEL_ALIASES = { "sonnet": "claude-sonnet-4-5-20250929", "opus": "claude-opus-4-5-20251101", "haiku": "claude-haiku-4-5-20251001", } def load_persona_for_session(session_name: str) -> dict: """Load persona with session-level model override applied.""" session_data = session_manager.get_session(session_name) persona_name = session_data.get('persona', 'default') persona = session_manager.load_persona(persona_name) # Apply session model override if set model_override = session_data.get('model_override') if model_override: if 'settings' not in persona: persona['settings'] = {} persona['settings']['model'] = model_override return persona def make_callbacks(bot, chat_id, session_name: str): """Create callbacks for ClaudeSubprocess bound to specific chat with dynamic typing control. Typing events are looked up dynamically from typing_tasks dict so callbacks always reference the CURRENT typing indicator, not a stale one from creation time. """ def _stop_typing(): """Stop the current typing indicator for this session (if any).""" if session_name in typing_tasks: _, stop_event = typing_tasks[session_name] stop_event.set() async def on_output(text): t0 = time.monotonic() # Split message using smart splitting chunks = split_message_smart(text) for chunk in chunks: try: # Try sending with MarkdownV2 escaped = escape_markdown_v2(chunk) await bot.send_message(chat_id=chat_id, text=escaped, parse_mode='MarkdownV2') except Exception as e: # Fall back to plain text if MarkdownV2 fails logger.warning(f"MarkdownV2 parse failed, falling back to plain text: {e}") await bot.send_message(chat_id=chat_id, text=chunk) # Stop typing indicator (dynamic lookup) _stop_typing() elapsed = time.monotonic() - t0 logger.info(f"[TIMING] Telegram send: {elapsed:.3f}s ({len(text)} chars, {len(chunks)} chunks)") async def on_error(error): await bot.send_message(chat_id=chat_id, text=f"Error: {error}") # Stop typing indicator on error _stop_typing() async def on_complete(): # Stop typing indicator on completion _stop_typing() # Reset idle timer (only start counting AFTER Claude finishes) if session_name in idle_timers: idle_timers[session_name].reset() async def on_status(status): await bot.send_message(chat_id=chat_id, text=f"[{status}]") async def on_tool_use(tool_name: str, tool_input: dict): """Format and send tool call progress notification.""" # Extract meaningful target from tool_input target = "" if tool_name == "Bash": cmd = tool_input.get("command", "") target = cmd[:50] + "..." if len(cmd) > 50 else cmd elif tool_name == "Read": target = tool_input.get("file_path", "") elif tool_name == "Edit": target = tool_input.get("file_path", "") elif tool_name == "Write": target = tool_input.get("file_path", "") elif tool_name == "Grep": pattern = tool_input.get("pattern", "") target = f"pattern: {pattern}" elif tool_name == "Glob": pattern = tool_input.get("pattern", "") target = f"pattern: {pattern}" else: target = str(tool_input)[:50] # Format as italic message message = f"_{tool_name}: {target}_" try: await bot.send_message(chat_id=chat_id, text=message, parse_mode='MarkdownV2') except Exception as e: # Fall back to plain text logger.warning(f"Failed to send tool notification with MarkdownV2: {e}") await bot.send_message(chat_id=chat_id, text=f"{tool_name}: {target}") return { 'on_output': on_output, 'on_error': on_error, 'on_complete': on_complete, 'on_status': on_status, 'on_tool_use': on_tool_use, } def run_command(cmd: list, timeout: int = 30) -> str: """Run a shell command and return output.""" try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, env={**os.environ, 'PATH': f"/home/mikkel/bin:{os.environ.get('PATH', '')}"} ) output = result.stdout or result.stderr or "No output" # Telegram has 4096 char limit per message if len(output) > 4000: output = output[:4000] + "\n... (truncated)" return output except subprocess.TimeoutExpired: return "Command timed out" except Exception as e: return f"Error: {e}" # Command handlers async def start(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle /start command - first contact with bot.""" user = update.effective_user chat_id = update.effective_chat.id if not is_authorized(user.id): # First user becomes authorized automatically if not get_authorized_users(): add_authorized_user(user.id) await update.message.reply_text( f"Welcome {user.first_name}! You're now authorized as the admin.\n" f"Your chat ID: {chat_id}\n\n" f"Use /help to see available commands." ) else: await update.message.reply_text( f"Unauthorized. Your user ID: {user.id}\n" "Contact the admin to get access." ) return await update.message.reply_text( f"Welcome back {user.first_name}!\n" f"Use /help to see available commands." ) async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle /help command.""" if not is_authorized(update.effective_user.id): return help_text = """ *Homelab Bot Commands* *Claude Sessions:* /new [persona] - Create new Claude session /session - Switch to a session /sessions - List all sessions with status /model - Switch model (sonnet/opus/haiku) /timeout - Set idle timeout (1-120) /archive - Archive and remove a session *Status & Monitoring:* /status - Quick service overview /pbs - PBS backup status /backups - Last backup per VM/CT /beszel - Server metrics /kuma - Uptime Kuma status *PBS Commands:* /pbs\\_status - Full PBS overview /pbs\\_errors - Recent errors /pbs\\_gc - Garbage collection status *System:* /ping - Ping a host /help - This help message /chatid - Show your chat ID """ await update.message.reply_text(help_text, parse_mode='Markdown') async def chatid(update: Update, context: ContextTypes.DEFAULT_TYPE): """Show chat ID (useful for notifications).""" await update.message.reply_text(f"Chat ID: `{update.effective_chat.id}`", parse_mode='Markdown') async def status(update: Update, context: ContextTypes.DEFAULT_TYPE): """Quick status overview.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Checking services...") # Quick checks output_lines = ["*Homelab Status*\n"] # Check key services via ping services = [ ("NPM", "10.5.0.1"), ("DNS", "10.5.0.2"), ("PBS", "10.5.0.6"), ("Dockge", "10.5.0.10"), ("Forgejo", "10.5.0.14"), ] for name, ip in services: result = subprocess.run( ["ping", "-c", "1", "-W", "1", ip], capture_output=True ) status = "up" if result.returncode == 0 else "down" output_lines.append(f"{'✅' if status == 'up' else '❌'} {name} ({ip})") await update.message.reply_text("\n".join(output_lines), parse_mode='Markdown') async def pbs(update: Update, context: ContextTypes.DEFAULT_TYPE): """PBS quick status.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Fetching PBS status...") output = run_command(["/home/mikkel/bin/pbs", "status"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def pbs_status(update: Update, context: ContextTypes.DEFAULT_TYPE): """PBS full status.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Fetching PBS status...") output = run_command(["/home/mikkel/bin/pbs", "status"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def backups(update: Update, context: ContextTypes.DEFAULT_TYPE): """PBS backups per VM/CT.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Fetching backup status...") output = run_command(["/home/mikkel/bin/pbs", "backups"], timeout=60) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def pbs_errors(update: Update, context: ContextTypes.DEFAULT_TYPE): """PBS errors.""" if not is_authorized(update.effective_user.id): return output = run_command(["/home/mikkel/bin/pbs", "errors"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def pbs_gc(update: Update, context: ContextTypes.DEFAULT_TYPE): """PBS garbage collection status.""" if not is_authorized(update.effective_user.id): return output = run_command(["/home/mikkel/bin/pbs", "gc"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def beszel(update: Update, context: ContextTypes.DEFAULT_TYPE): """Beszel server metrics.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Fetching server metrics...") output = run_command(["/home/mikkel/bin/beszel", "status"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def kuma(update: Update, context: ContextTypes.DEFAULT_TYPE): """Uptime Kuma status.""" if not is_authorized(update.effective_user.id): return output = run_command(["/home/mikkel/bin/kuma", "list"]) await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def ping_host(update: Update, context: ContextTypes.DEFAULT_TYPE): """Ping a host.""" if not is_authorized(update.effective_user.id): return if not context.args: await update.message.reply_text("Usage: /ping ") return host = context.args[0] # Basic validation if not host.replace('.', '').replace('-', '').isalnum(): await update.message.reply_text("Invalid hostname") return result = subprocess.run( ["ping", "-c", "3", "-W", "2", host], capture_output=True, text=True, timeout=10 ) output = result.stdout or result.stderr await update.message.reply_text(f"```\n{output}\n```", parse_mode='Markdown') async def new_session(update: Update, context: ContextTypes.DEFAULT_TYPE): """Create a new Claude session.""" if not is_authorized(update.effective_user.id): return if not context.args: await update.message.reply_text("Usage: /new [persona]") return name = context.args[0] persona = context.args[1] if len(context.args) > 1 else None try: # Create session session_manager.create_session(name, persona=persona) # Auto-switch to new session session_manager.switch_session(name) # Prepare reply if persona: reply = f"Session '{name}' created with persona '{persona}'." else: reply = f"Session '{name}' created." await update.message.reply_text(reply) # Spawn subprocess for the new session (but don't send message yet) session_dir = session_manager.get_session_dir(name) persona_data = load_persona_for_session(name) # Create callbacks bound to this chat (typing looked up dynamically) callbacks = make_callbacks(context.bot, update.effective_chat.id, name) # Create subprocess subprocesses[name] = ClaudeSubprocess( session_dir=session_dir, persona=persona_data, on_output=callbacks['on_output'], on_error=callbacks['on_error'], on_complete=callbacks['on_complete'], on_status=callbacks['on_status'], on_tool_use=callbacks['on_tool_use'], ) # Initialize idle timer for the new session timeout_secs = session_manager.get_session_timeout(name) idle_timers[name] = SessionIdleTimer(name, timeout_secs, on_timeout=suspend_session) logger.info(f"Created session '{name}' with persona '{persona or 'default'}'") except ValueError as e: # Session already exists await update.message.reply_text( f"Session '{name}' already exists. Use /session to switch to it." ) except FileNotFoundError as e: # Persona not found await update.message.reply_text(f"Error: {e}") except Exception as e: logger.error(f"Error creating session: {e}") await update.message.reply_text(f"Error creating session: {e}") async def switch_session_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE): """Switch to a different Claude session.""" if not is_authorized(update.effective_user.id): return if not context.args: # Show usage and list available sessions sessions = session_manager.list_sessions() if not sessions: await update.message.reply_text("No sessions found. Use /new to create one.") return session_list = "\n".join( f"- {s['name']} ({s['status']}) - {s.get('persona', 'default')}" for s in sessions ) await update.message.reply_text( f"Usage: /session \n\nAvailable sessions:\n{session_list}" ) return name = context.args[0] try: # Check if session exists if not session_manager.session_exists(name): await update.message.reply_text( f"Session '{name}' doesn't exist. Use /new to create it." ) return # Stop typing indicator for previous session if running prev_session = session_manager.get_active_session() if prev_session and prev_session in typing_tasks: task, stop_event = typing_tasks[prev_session] stop_event.set() try: await task except Exception: pass del typing_tasks[prev_session] # Flush batcher for previous session immediately if prev_session and prev_session in batchers: await batchers[prev_session].flush_immediately() # Switch session session_manager.switch_session(name) # Auto-spawn subprocess if not alive if name not in subprocesses or not subprocesses[name].is_alive: session_dir = session_manager.get_session_dir(name) persona_data = load_persona_for_session(name) # Create callbacks bound to this chat (typing looked up dynamically) callbacks = make_callbacks(context.bot, update.effective_chat.id, name) # Create subprocess subprocesses[name] = ClaudeSubprocess( session_dir=session_dir, persona=persona_data, on_output=callbacks['on_output'], on_error=callbacks['on_error'], on_complete=callbacks['on_complete'], on_status=callbacks['on_status'], on_tool_use=callbacks['on_tool_use'], ) # Initialize idle timer for the switched-to session timeout_secs = session_manager.get_session_timeout(name) if name not in idle_timers: idle_timers[name] = SessionIdleTimer(name, timeout_secs, on_timeout=suspend_session) logger.info(f"Auto-spawned subprocess for session '{name}'") # Get persona for reply session_data = session_manager.get_session(name) persona_name = session_data.get('persona', 'default') if persona_name and persona_name != 'default': reply = f"Switched to session '{name}' (persona: {persona_name})." else: reply = f"Switched to session '{name}'." await update.message.reply_text(reply) except Exception as e: logger.error(f"Error switching session: {e}") await update.message.reply_text(f"Error switching session: {e}") async def archive_session_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE): """Archive a Claude session (compress and remove).""" if not is_authorized(update.effective_user.id): return if not context.args: await update.message.reply_text("Usage: /archive ") return name = context.args[0] try: # Stop typing indicator if running if name in typing_tasks: task, stop_event = typing_tasks[name] stop_event.set() try: await task except Exception: pass del typing_tasks[name] # Flush and remove batcher if name in batchers: await batchers[name].flush_immediately() del batchers[name] # Terminate subprocess if running if name in subprocesses: if subprocesses[name].is_alive: await subprocesses[name].terminate() del subprocesses[name] # Cancel idle timer if exists if name in idle_timers: idle_timers[name].cancel() del idle_timers[name] # Remove subprocess lock if exists subprocess_locks.pop(name, None) # Archive the session archive_path = session_manager.archive_session(name) size_mb = archive_path.stat().st_size / (1024 * 1024) await update.message.reply_text( f"Session '{name}' archived ({size_mb:.1f} MB)\n{archive_path.name}" ) logger.info(f"Archived session '{name}' to {archive_path}") except ValueError as e: await update.message.reply_text(str(e)) except Exception as e: logger.error(f"Error archiving session: {e}") await update.message.reply_text(f"Error archiving session: {e}") async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle free text messages - route to active Claude session with batching.""" if not is_authorized(update.effective_user.id): return # Check for active session active_session = session_manager.get_active_session() if not active_session: await update.message.reply_text( "No active session. Use /new to start one." ) return message = update.message.text t_start = time.monotonic() # Telegram message timestamp vs now = delivery delay msg_age = (datetime.utcnow() - update.message.date.replace(tzinfo=None)).total_seconds() logger.info(f"[TIMING] Message received: session='{active_session}', age={msg_age:.1f}s, text={message[:50]}...") try: # Reset idle timer on user activity (even before processing message) if active_session in idle_timers: idle_timers[active_session].reset() # Clean up stale typing task (from previous message completion) if active_session in typing_tasks: old_task, old_event = typing_tasks[active_session] if old_task.done(): del typing_tasks[active_session] # Start fresh typing indicator for this message if active_session not in typing_tasks: stop_typing = asyncio.Event() typing_task = asyncio.create_task( typing_indicator_loop(context.bot, update.effective_chat.id, stop_typing) ) typing_tasks[active_session] = (typing_task, stop_typing) # Acquire lock to prevent race with suspend_session lock = get_subprocess_lock(active_session) async with lock: # Get or create subprocess for active session (avoid double-start) already_alive = active_session in subprocesses and subprocesses[active_session].is_alive if not already_alive: session_dir = session_manager.get_session_dir(active_session) persona_data = load_persona_for_session(active_session) # Check if this is a resume (has .claude/ dir with history) is_resume = (session_dir / ".claude").exists() if is_resume: # Calculate idle duration session_data = session_manager.get_session(active_session) last_active_str = session_data.get('last_active') if last_active_str: last_active = datetime.fromisoformat(last_active_str) idle_duration = (datetime.now(timezone.utc) - last_active).total_seconds() if idle_duration > 60: # Show idle duration if >1 min idle_minutes = int(idle_duration / 60) await update.message.reply_text(f"Resuming session (idle for {idle_minutes} min)...") else: await update.message.reply_text("Resuming session...") else: await update.message.reply_text("Resuming session...") # Create callbacks bound to this chat (typing looked up dynamically) callbacks = make_callbacks(context.bot, update.effective_chat.id, active_session) # Create subprocess subprocess_inst = ClaudeSubprocess( session_dir=session_dir, persona=persona_data, on_output=callbacks['on_output'], on_error=callbacks['on_error'], on_complete=callbacks['on_complete'], on_status=callbacks['on_status'], on_tool_use=callbacks['on_tool_use'], ) await subprocess_inst.start() subprocesses[active_session] = subprocess_inst # Update metadata with PID and status now_iso = datetime.now(timezone.utc).isoformat() session_manager.update_session( active_session, status='active', last_active=now_iso, pid=subprocess_inst.pid ) else: subprocess_inst = subprocesses[active_session] subprocess_state = "reused" if already_alive else "cold-start" logger.info(f"[TIMING] Subprocess: {subprocess_state}, busy={subprocess_inst.is_busy}") # Get or create message batcher for this session if active_session not in batchers: # Batcher callback sends message to subprocess batchers[active_session] = MessageBatcher( callback=subprocess_inst.send_message, debounce_seconds=2.0 ) # Create/reset idle timer for the session (outside lock) timeout_secs = session_manager.get_session_timeout(active_session) if active_session not in idle_timers: idle_timers[active_session] = SessionIdleTimer(active_session, timeout_secs, on_timeout=suspend_session) # Add message to batcher (typing indicator already running) await batchers[active_session].add_message(message) except Exception as e: logger.error(f"Error handling message: {e}") await update.message.reply_text(f"Error: {e}") # Stop typing indicator on error if active_session in typing_tasks: task, stop_event = typing_tasks[active_session] stop_event.set() async def handle_photo(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle photo messages - save to session directory and auto-analyze.""" if not is_authorized(update.effective_user.id): return # Check for active session active_session = session_manager.get_active_session() if not active_session: await update.message.reply_text( "No active session. Use /new to start one, then send the photo again." ) return user = update.effective_user file_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') # Get session directory session_dir = session_manager.get_session_dir(active_session) # Get the largest photo (best quality) photo = update.message.photo[-1] file = await context.bot.get_file(photo.file_id) # Download to session directory filename = f"photo_{file_timestamp}.jpg" filepath = session_dir / filename await file.download_to_drive(filepath) logger.info(f"Photo saved to session '{active_session}': {filepath}") # Get caption if any caption = update.message.caption or "" # Reset idle timer on user activity if active_session in idle_timers: idle_timers[active_session].reset() # Clean up stale typing task if active_session in typing_tasks: old_task, old_event = typing_tasks[active_session] if old_task.done(): del typing_tasks[active_session] # Start typing indicator if active_session not in typing_tasks: stop_typing = asyncio.Event() typing_task = asyncio.create_task( typing_indicator_loop(context.bot, update.effective_chat.id, stop_typing) ) typing_tasks[active_session] = (typing_task, stop_typing) # Auto-analyze: send message to Claude if caption: analysis_message = f"I've attached a photo: {filename}. {caption}" else: analysis_message = f"I've attached a photo: {filename}. Please describe what you see." # Acquire lock to prevent race with suspend_session lock = get_subprocess_lock(active_session) async with lock: # Get or create subprocess if active_session not in subprocesses or not subprocesses[active_session].is_alive: # Check if this is a resume is_resume = (session_dir / ".claude").exists() if is_resume: session_data = session_manager.get_session(active_session) last_active_str = session_data.get('last_active') if last_active_str: last_active = datetime.fromisoformat(last_active_str) idle_duration = (datetime.now(timezone.utc) - last_active).total_seconds() if idle_duration > 60: idle_minutes = int(idle_duration / 60) await update.message.reply_text(f"Resuming session (idle for {idle_minutes} min)...") else: await update.message.reply_text("Resuming session...") else: await update.message.reply_text("Resuming session...") persona_data = load_persona_for_session(active_session) callbacks = make_callbacks(context.bot, update.effective_chat.id, active_session) subprocess_inst = ClaudeSubprocess( session_dir=session_dir, persona=persona_data, on_output=callbacks['on_output'], on_error=callbacks['on_error'], on_complete=callbacks['on_complete'], on_status=callbacks['on_status'], on_tool_use=callbacks['on_tool_use'], ) await subprocess_inst.start() subprocesses[active_session] = subprocess_inst # Update metadata with PID and status now_iso = datetime.now(timezone.utc).isoformat() session_manager.update_session( active_session, status='active', last_active=now_iso, pid=subprocess_inst.pid ) # Create/reset idle timer (outside lock) timeout_secs = session_manager.get_session_timeout(active_session) if active_session not in idle_timers: idle_timers[active_session] = SessionIdleTimer(active_session, timeout_secs, on_timeout=suspend_session) # Send analysis message directly (not batched) await subprocesses[active_session].send_message(analysis_message) async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle document/file messages - save to session directory and notify Claude.""" if not is_authorized(update.effective_user.id): return # Check for active session active_session = session_manager.get_active_session() if not active_session: await update.message.reply_text( "No active session. Use /new to start one, then send the file again." ) return user = update.effective_user file_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') # Get session directory session_dir = session_manager.get_session_dir(active_session) # Get document info doc = update.message.document original_name = doc.file_name or "unknown" file = await context.bot.get_file(doc.file_id) # Download with timestamp prefix to avoid collisions filename = f"{file_timestamp}_{original_name}" filepath = session_dir / filename await file.download_to_drive(filepath) logger.info(f"Document saved to session '{active_session}': {filepath}") # Get caption if any caption = update.message.caption or "" # Reset idle timer on user activity if active_session in idle_timers: idle_timers[active_session].reset() # Clean up stale typing task if active_session in typing_tasks: old_task, old_event = typing_tasks[active_session] if old_task.done(): del typing_tasks[active_session] # Start typing indicator if active_session not in typing_tasks: stop_typing = asyncio.Event() typing_task = asyncio.create_task( typing_indicator_loop(context.bot, update.effective_chat.id, stop_typing) ) typing_tasks[active_session] = (typing_task, stop_typing) # Notify Claude if caption: notify_message = f"{caption}\nThe file {filename} has been saved to your session." else: notify_message = f"User uploaded file: {filename}" # Acquire lock to prevent race with suspend_session lock = get_subprocess_lock(active_session) async with lock: # Get or create subprocess if active_session not in subprocesses or not subprocesses[active_session].is_alive: # Check if this is a resume is_resume = (session_dir / ".claude").exists() if is_resume: session_data = session_manager.get_session(active_session) last_active_str = session_data.get('last_active') if last_active_str: last_active = datetime.fromisoformat(last_active_str) idle_duration = (datetime.now(timezone.utc) - last_active).total_seconds() if idle_duration > 60: idle_minutes = int(idle_duration / 60) await update.message.reply_text(f"Resuming session (idle for {idle_minutes} min)...") else: await update.message.reply_text("Resuming session...") else: await update.message.reply_text("Resuming session...") persona_data = load_persona_for_session(active_session) callbacks = make_callbacks(context.bot, update.effective_chat.id, active_session) subprocess_inst = ClaudeSubprocess( session_dir=session_dir, persona=persona_data, on_output=callbacks['on_output'], on_error=callbacks['on_error'], on_complete=callbacks['on_complete'], on_status=callbacks['on_status'], on_tool_use=callbacks['on_tool_use'], ) await subprocess_inst.start() subprocesses[active_session] = subprocess_inst # Update metadata with PID and status now_iso = datetime.now(timezone.utc).isoformat() session_manager.update_session( active_session, status='active', last_active=now_iso, pid=subprocess_inst.pid ) # Create/reset idle timer (outside lock) timeout_secs = session_manager.get_session_timeout(active_session) if active_session not in idle_timers: idle_timers[active_session] = SessionIdleTimer(active_session, timeout_secs, on_timeout=suspend_session) # Send notification directly (not batched) await subprocesses[active_session].send_message(notify_message) async def model_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE): """Switch model for current session. Persists across session switches.""" if not is_authorized(update.effective_user.id): return active_session = session_manager.get_active_session() if not active_session: await update.message.reply_text("No active session. Use /new to start one.") return if not context.args: # Show current model session_data = session_manager.get_session(active_session) model_override = session_data.get('model_override') persona_name = session_data.get('persona', 'default') persona = session_manager.load_persona(persona_name) current = model_override or persona.get('settings', {}).get('model', 'default') aliases = "\n".join(f" {k} → {v}" for k, v in MODEL_ALIASES.items()) await update.message.reply_text( f"Current model: {current}\n\nUsage: /model \n\nAliases:\n{aliases}" ) return model = context.args[0] # Resolve alias resolved = MODEL_ALIASES.get(model, model) # Persist to session metadata session_manager.update_session(active_session, model_override=resolved) # Terminate current subprocess so next message spawns with new model if active_session in subprocesses: if subprocesses[active_session].is_alive: await subprocesses[active_session].terminate() del subprocesses[active_session] # Clean up batcher too if active_session in batchers: await batchers[active_session].flush_immediately() del batchers[active_session] # Cancel idle timer (will be recreated on next message) if active_session in idle_timers: idle_timers[active_session].cancel() del idle_timers[active_session] await update.message.reply_text(f"Model set to {resolved} for session '{active_session}'.") logger.info(f"Model changed to {resolved} for session '{active_session}'") async def timeout_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE): """Set idle timeout for current session.""" if not is_authorized(update.effective_user.id): return active_session = session_manager.get_active_session() if not active_session: await update.message.reply_text("No active session. Use /new to start one.") return if not context.args: # Show current timeout timeout_secs = session_manager.get_session_timeout(active_session) minutes = timeout_secs // 60 await update.message.reply_text( f"Idle timeout: {minutes} minutes\n\nUsage: /timeout (1-120)" ) return # Parse and validate timeout value try: minutes = int(context.args[0]) if minutes < 1 or minutes > 120: await update.message.reply_text("Timeout must be between 1 and 120 minutes") return except ValueError: await update.message.reply_text("Invalid number. Usage: /timeout ") return # Convert to seconds and update timeout_seconds = minutes * 60 session_manager.update_session(active_session, idle_timeout=timeout_seconds) # Update existing idle timer if present if active_session in idle_timers: idle_timers[active_session].timeout_seconds = timeout_seconds idle_timers[active_session].reset() await update.message.reply_text( f"Idle timeout set to {minutes} minutes for session '{active_session}'." ) logger.info(f"Idle timeout set to {minutes} minutes for session '{active_session}'") async def sessions_cmd(update: Update, context: ContextTypes.DEFAULT_TYPE): """List all sessions with status.""" if not is_authorized(update.effective_user.id): return sessions = session_manager.list_sessions() if not sessions: await update.message.reply_text("No sessions. Use /new to create one.") return active_session = session_manager.get_active_session() def format_relative_time(iso_str): """Format ISO timestamp as relative time (e.g., '2m ago').""" dt = datetime.fromisoformat(iso_str) delta = datetime.now(timezone.utc) - dt secs = delta.total_seconds() if secs < 60: return "just now" if secs < 3600: return f"{int(secs/60)}m ago" if secs < 86400: return f"{int(secs/3600)}h ago" return f"{int(secs/86400)}d ago" lines = [] for s in sessions: name = s['name'] persona = s.get('persona', 'default') # Determine status if name in subprocesses and subprocesses[name].is_alive: status = "🟢 LIVE" elif s.get('status') == 'suspended': status = "⚪ IDLE" else: status = s.get('status', 'unknown').upper() # Format last active time last_active = s.get('last_active', '') rel_time = format_relative_time(last_active) if last_active else "unknown" # Mark current active session marker = "→ " if name == active_session else " " lines.append(f"{marker}{status} `{name}` ({persona}) - {rel_time}") await update.message.reply_text("\n".join(lines), parse_mode='Markdown') async def unknown(update: Update, context: ContextTypes.DEFAULT_TYPE): """Handle unknown commands.""" if not is_authorized(update.effective_user.id): return await update.message.reply_text("Unknown command. Use /help for available commands.") async def cleanup_orphaned_subprocesses(): """Clean up orphaned subprocesses at bot startup.""" orphan_count = 0 sessions = session_manager.list_sessions() for session in sessions: name = session['name'] pid = session.get('pid') if pid is not None: # Check if process exists try: os.kill(pid, 0) # Process exists - verify it's a claude process try: with open(f"/proc/{pid}/cmdline", "r") as f: cmdline = f.read() if "claude" in cmdline: # It's a claude process - kill it logger.info(f"Killing orphaned claude process for session '{name}': PID {pid}") try: os.kill(pid, signal.SIGTERM) await asyncio.sleep(2) try: os.kill(pid, signal.SIGKILL) except ProcessLookupError: pass # Already dead except ProcessLookupError: pass # Process died between checks orphan_count += 1 else: logger.warning(f"PID {pid} for session '{name}' exists but is not a claude process") except (FileNotFoundError, PermissionError): # Can't read cmdline - assume it's not our process logger.warning(f"Cannot verify PID {pid} for session '{name}'") except ProcessLookupError: # Process doesn't exist pass # Update metadata - clear PID and set status to suspended session_manager.update_session(name, pid=None, status='suspended') # Also suspend any sessions that are marked active but have no PID elif session.get('status') == 'active': session_manager.update_session(name, status='suspended') if orphan_count > 0: logger.info(f"Cleaned up {orphan_count} orphaned subprocess(es)") else: logger.info("No orphaned subprocesses found") async def post_init(application): """Run startup cleanup.""" await cleanup_orphaned_subprocesses() async def post_shutdown(application): """Clean up subprocesses and timers on bot shutdown.""" logger.info("Bot shutting down, cleaning up...") # Cancel all idle timers for name, timer in idle_timers.items(): timer.cancel() # Terminate all subprocesses for name, proc in list(subprocesses.items()): if proc.is_alive: logger.info(f"Terminating subprocess for '{name}'") await proc.terminate() logger.info("Cleanup complete") def main(): """Start the bot.""" # Create application with lifecycle callbacks app = Application.builder().token(TOKEN).post_init(post_init).post_shutdown(post_shutdown).build() # Add handlers app.add_handler(CommandHandler("start", start)) app.add_handler(CommandHandler("help", help_command)) app.add_handler(CommandHandler("chatid", chatid)) app.add_handler(CommandHandler("new", new_session)) app.add_handler(CommandHandler("session", switch_session_cmd)) app.add_handler(CommandHandler("sessions", sessions_cmd)) app.add_handler(CommandHandler("archive", archive_session_cmd)) app.add_handler(CommandHandler("model", model_cmd)) app.add_handler(CommandHandler("timeout", timeout_cmd)) app.add_handler(CommandHandler("status", status)) app.add_handler(CommandHandler("pbs", pbs)) app.add_handler(CommandHandler("pbs_status", pbs_status)) app.add_handler(CommandHandler("backups", backups)) app.add_handler(CommandHandler("pbs_errors", pbs_errors)) app.add_handler(CommandHandler("pbs_gc", pbs_gc)) app.add_handler(CommandHandler("beszel", beszel)) app.add_handler(CommandHandler("kuma", kuma)) app.add_handler(CommandHandler("ping", ping_host)) # Unknown command handler app.add_handler(MessageHandler(filters.COMMAND, unknown)) # Free text message handler (for messages to Claude) app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message, block=False)) # Photo handler app.add_handler(MessageHandler(filters.PHOTO, handle_photo)) # Document/file handler app.add_handler(MessageHandler(filters.Document.ALL, handle_document)) # Start polling logger.info("Starting Homelab Bot...") app.run_polling(allowed_updates=Update.ALL_TYPES) if __name__ == '__main__': main()