diff --git a/telegram/telegram_utils.py b/telegram/telegram_utils.py new file mode 100644 index 0000000..befc31a --- /dev/null +++ b/telegram/telegram_utils.py @@ -0,0 +1,165 @@ +""" +Telegram message formatting and UX utilities. + +Provides smart message splitting, MarkdownV2 escaping, and typing indicator +management for the Telegram Claude Code bridge. + +Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md +""" + +import asyncio +import logging +import re +from telegram.constants import ChatAction + +logger = logging.getLogger(__name__) + +TELEGRAM_MAX_LENGTH = 4096 +SAFE_LENGTH = 4000 # Leave room for MarkdownV2 escape character expansion + + +def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]: + """ + Split long message at smart boundaries, respecting MarkdownV2 code blocks. + + Never splits inside triple-backtick code blocks. Prefers paragraph breaks + (\\n\\n), then line breaks (\\n), then hard character split as last resort. + + Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape + character expansion. + + Args: + text: Message text to split + max_length: Maximum length per chunk (default: 4000) + + Returns: + List of message chunks, each <= max_length + + Example: + >>> split_message_smart("a" * 5000) + ['aaa...', 'aaa...'] # Two chunks, each <= 4000 chars + """ + if len(text) <= max_length: + return [text] + + chunks = [] + current_chunk = "" + in_code_block = False + + lines = text.split('\n') + + for line in lines: + # Track code block state + if line.strip().startswith('```'): + in_code_block = not in_code_block + + # Check if adding this line exceeds limit + potential_chunk = current_chunk + ('\n' if current_chunk else '') + line + + if len(potential_chunk) > max_length: + # Would exceed limit + if in_code_block: + # Inside code block - must include whole block + # (Telegram will handle overflow gracefully or we truncate) + current_chunk = potential_chunk + else: + # Can split here + if current_chunk: + chunks.append(current_chunk) + current_chunk = line + else: + current_chunk = potential_chunk + + if current_chunk: + chunks.append(current_chunk) + + return chunks + + +def escape_markdown_v2(text: str) -> str: + """ + Escape MarkdownV2 special characters outside of code blocks. + + Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . ! + BUT does NOT escape content inside code blocks (triple backticks or single backticks). + + Strategy: Split text by code regions, escape only non-code regions, rejoin. + + Args: + text: Text to escape + + Returns: + Text with MarkdownV2 special characters escaped outside code blocks + + Example: + >>> escape_markdown_v2("hello_world") + 'hello\\_world' + >>> escape_markdown_v2("`hello_world`") + '`hello_world`' # Inside backticks, not escaped + """ + # Characters that need escaping in MarkdownV2 + escape_chars = r'_*[]()~`>#+-=|{}.!' + + # Pattern to match code blocks (triple backticks) and inline code (single backticks) + # Match triple backticks first (```...```), then single backticks (`...`) + code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE) + + # Split text into code and non-code segments + parts = [] + last_end = 0 + + for match in code_pattern.finditer(text): + # Add non-code segment (escaped) + non_code = text[last_end:match.start()] + if non_code: + # Escape special characters in non-code text + escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code) + parts.append(escaped) + + # Add code segment (not escaped) + parts.append(match.group(0)) + last_end = match.end() + + # Add remaining non-code segment + if last_end < len(text): + non_code = text[last_end:] + escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code) + parts.append(escaped) + + return ''.join(parts) + + +async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event): + """ + Maintain typing indicator until stop_event is set. + + Sends ChatAction.TYPING every 4 seconds to keep indicator alive for + operations longer than 5 seconds (Telegram expires typing after 5s). + + Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds + until stop_event is set. + + Args: + bot: Telegram bot instance + chat_id: Chat ID to send typing indicator to + stop_event: asyncio.Event to signal when to stop + + Example: + >>> stop_typing = asyncio.Event() + >>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing)) + >>> # ... long operation ... + >>> stop_typing.set() + >>> await task + """ + while not stop_event.is_set(): + try: + await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING) + except Exception as e: + logger.warning(f"Failed to send typing indicator: {e}") + + # Wait 4s or until stop_event (whichever comes first) + try: + await asyncio.wait_for(stop_event.wait(), timeout=4.0) + break # stop_event was set + except asyncio.TimeoutError: + continue # Timeout, re-send typing indicator