feat(02-01): create telegram_utils with message formatting

- Add split_message_smart: code-block-aware message splitting at 4000 chars - Add escape_markdown_v2: escape 17 special chars outside code blocks - Add typing_indicator_loop: re-send typing every 4s with asyncio.Event - Smart splitting respects triple-backtick and single-backtick boundaries - Never splits inside code blocks (preserves syntax) - Regex pattern identifies code regions for selective escaping
2026-02-04 19:17:20 +00:00 · 2026-02-04 19:17:20 +00:00 · 6b624d7f80
commit 6b624d7f80
parent 6a115a4947
1 changed files with 165 additions and 0 deletions
--- a/telegram/telegram_utils.py
+++ b/telegram/telegram_utils.py
@ -0,0 +1,165 @@
 """
 Telegram message formatting and UX utilities.
 Provides smart message splitting, MarkdownV2 escaping, and typing indicator
 management for the Telegram Claude Code bridge.
 Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md
 """
 import asyncio
 import logging
 import re
 from telegram.constants import ChatAction
 logger = logging.getLogger(__name__)
 TELEGRAM_MAX_LENGTH = 4096
 SAFE_LENGTH = 4000  # Leave room for MarkdownV2 escape character expansion
 def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]:
    """
    Split long message at smart boundaries, respecting MarkdownV2 code blocks.
    Never splits inside triple-backtick code blocks. Prefers paragraph breaks
    (\\n\\n), then line breaks (\\n), then hard character split as last resort.
    Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape
    character expansion.
    Args:
        text: Message text to split
        max_length: Maximum length per chunk (default: 4000)
    Returns:
        List of message chunks, each <= max_length
    Example:
        >>> split_message_smart("a" * 5000)
        ['aaa...', 'aaa...']  # Two chunks, each <= 4000 chars
    """
    if len(text) <= max_length:
        return [text]
    chunks = []
    current_chunk = ""
    in_code_block = False
    lines = text.split('\n')
    for line in lines:
        # Track code block state
        if line.strip().startswith('```'):
            in_code_block = not in_code_block
        # Check if adding this line exceeds limit
        potential_chunk = current_chunk + ('\n' if current_chunk else '') + line
        if len(potential_chunk) > max_length:
            # Would exceed limit
            if in_code_block:
                # Inside code block - must include whole block
                # (Telegram will handle overflow gracefully or we truncate)
                current_chunk = potential_chunk
            else:
                # Can split here
                if current_chunk:
                    chunks.append(current_chunk)
                current_chunk = line
        else:
            current_chunk = potential_chunk
    if current_chunk:
        chunks.append(current_chunk)
    return chunks
 def escape_markdown_v2(text: str) -> str:
    """
    Escape MarkdownV2 special characters outside of code blocks.
    Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . !
    BUT does NOT escape content inside code blocks (triple backticks or single backticks).
    Strategy: Split text by code regions, escape only non-code regions, rejoin.
    Args:
        text: Text to escape
    Returns:
        Text with MarkdownV2 special characters escaped outside code blocks
    Example:
        >>> escape_markdown_v2("hello_world")
        'hello\\_world'
        >>> escape_markdown_v2("`hello_world`")
        '`hello_world`'  # Inside backticks, not escaped
    """
    # Characters that need escaping in MarkdownV2
    escape_chars = r'_*[]()~`>#+-=|{}.!'
    # Pattern to match code blocks (triple backticks) and inline code (single backticks)
    # Match triple backticks first (```...```), then single backticks (`...`)
    code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE)
    # Split text into code and non-code segments
    parts = []
    last_end = 0
    for match in code_pattern.finditer(text):
        # Add non-code segment (escaped)
        non_code = text[last_end:match.start()]
        if non_code:
            # Escape special characters in non-code text
            escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
            parts.append(escaped)
        # Add code segment (not escaped)
        parts.append(match.group(0))
        last_end = match.end()
    # Add remaining non-code segment
    if last_end < len(text):
        non_code = text[last_end:]
        escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
        parts.append(escaped)
    return ''.join(parts)
 async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event):
    """
    Maintain typing indicator until stop_event is set.
    Sends ChatAction.TYPING every 4 seconds to keep indicator alive for
    operations longer than 5 seconds (Telegram expires typing after 5s).
    Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds
    until stop_event is set.
    Args:
        bot: Telegram bot instance
        chat_id: Chat ID to send typing indicator to
        stop_event: asyncio.Event to signal when to stop
    Example:
        >>> stop_typing = asyncio.Event()
        >>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing))
        >>> # ... long operation ...
        >>> stop_typing.set()
        >>> await task
    """
    while not stop_event.is_set():
        try:
            await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING)
        except Exception as e:
            logger.warning(f"Failed to send typing indicator: {e}")
        # Wait 4s or until stop_event (whichever comes first)
        try:
            await asyncio.wait_for(stop_event.wait(), timeout=4.0)
            break  # stop_event was set
        except asyncio.TimeoutError:
            continue  # Timeout, re-send typing indicator