homelab/telegram/telegram_utils.py

"""
Telegram message formatting and UX utilities.

Provides smart message splitting, MarkdownV2 escaping, and typing indicator
management for the Telegram Claude Code bridge.

Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md
"""

import asyncio
import logging
import re
from telegram.constants import ChatAction

logger = logging.getLogger(__name__)

TELEGRAM_MAX_LENGTH = 4096
SAFE_LENGTH = 4000  # Leave room for MarkdownV2 escape character expansion


def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]:
    """
    Split long message at smart boundaries, respecting MarkdownV2 code blocks.

    Never splits inside triple-backtick code blocks. Prefers paragraph breaks
    (\\n\\n), then line breaks (\\n), then hard character split as last resort.

    Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape
    character expansion.

    Args:
        text: Message text to split
        max_length: Maximum length per chunk (default: 4000)

    Returns:
        List of message chunks, each <= max_length

    Example:
        >>> split_message_smart("a" * 5000)
        ['aaa...', 'aaa...']  # Two chunks, each <= 4000 chars
    """
    if len(text) <= max_length:
        return [text]

    chunks = []
    current_chunk = ""
    in_code_block = False

    lines = text.split('\n')

    for line in lines:
        # Track code block state
        if line.strip().startswith('```'):
            in_code_block = not in_code_block

        # Check if adding this line exceeds limit
        potential_chunk = current_chunk + ('\n' if current_chunk else '') + line

        if len(potential_chunk) > max_length:
            # Would exceed limit
            if in_code_block:
                # Inside code block - must include whole block
                # (Telegram will handle overflow gracefully or we truncate)
                current_chunk = potential_chunk
            else:
                # Can split here
                if current_chunk:
                    chunks.append(current_chunk)
                current_chunk = line
        else:
            current_chunk = potential_chunk

    if current_chunk:
        chunks.append(current_chunk)

    return chunks


def escape_markdown_v2(text: str) -> str:
    """
    Escape MarkdownV2 special characters outside of code blocks.

    Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . !
    BUT does NOT escape content inside code blocks (triple backticks or single backticks).

    Strategy: Split text by code regions, escape only non-code regions, rejoin.

    Args:
        text: Text to escape

    Returns:
        Text with MarkdownV2 special characters escaped outside code blocks

    Example:
        >>> escape_markdown_v2("hello_world")
        'hello\\_world'
        >>> escape_markdown_v2("`hello_world`")
        '`hello_world`'  # Inside backticks, not escaped
    """
    # Characters that need escaping in MarkdownV2
    escape_chars = r'_*[]()~`>#+-=|{}.!'

    # Pattern to match code blocks (triple backticks) and inline code (single backticks)
    # Match triple backticks first (```...```), then single backticks (`...`)
    code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE)

    # Split text into code and non-code segments
    parts = []
    last_end = 0

    for match in code_pattern.finditer(text):
        # Add non-code segment (escaped)
        non_code = text[last_end:match.start()]
        if non_code:
            # Escape special characters in non-code text
            escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
            parts.append(escaped)

        # Add code segment (not escaped)
        parts.append(match.group(0))
        last_end = match.end()

    # Add remaining non-code segment
    if last_end < len(text):
        non_code = text[last_end:]
        escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
        parts.append(escaped)

    return ''.join(parts)


async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event):
    """
    Maintain typing indicator until stop_event is set.

    Sends ChatAction.TYPING every 4 seconds to keep indicator alive for
    operations longer than 5 seconds (Telegram expires typing after 5s).

    Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds
    until stop_event is set.

    Args:
        bot: Telegram bot instance
        chat_id: Chat ID to send typing indicator to
        stop_event: asyncio.Event to signal when to stop

    Example:
        >>> stop_typing = asyncio.Event()
        >>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing))
        >>> # ... long operation ...
        >>> stop_typing.set()
        >>> await task
    """
    while not stop_event.is_set():
        try:
            await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING)
        except Exception as e:
            logger.warning(f"Failed to send typing indicator: {e}")

        # Wait 4s or until stop_event (whichever comes first)
        try:
            await asyncio.wait_for(stop_event.wait(), timeout=4.0)
            break  # stop_event was set
        except asyncio.TimeoutError:
            continue  # Timeout, re-send typing indicator