homelab/telegram/telegram_utils.py
Mikkel Georgsen 6b624d7f80 feat(02-01): create telegram_utils with message formatting
- Add split_message_smart: code-block-aware message splitting at 4000 chars
- Add escape_markdown_v2: escape 17 special chars outside code blocks
- Add typing_indicator_loop: re-send typing every 4s with asyncio.Event
- Smart splitting respects triple-backtick and single-backtick boundaries
- Never splits inside code blocks (preserves syntax)
- Regex pattern identifies code regions for selective escaping
2026-02-04 19:17:20 +00:00

165 lines
5.2 KiB
Python

"""
Telegram message formatting and UX utilities.
Provides smart message splitting, MarkdownV2 escaping, and typing indicator
management for the Telegram Claude Code bridge.
Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md
"""
import asyncio
import logging
import re
from telegram.constants import ChatAction
logger = logging.getLogger(__name__)
TELEGRAM_MAX_LENGTH = 4096
SAFE_LENGTH = 4000 # Leave room for MarkdownV2 escape character expansion
def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]:
"""
Split long message at smart boundaries, respecting MarkdownV2 code blocks.
Never splits inside triple-backtick code blocks. Prefers paragraph breaks
(\\n\\n), then line breaks (\\n), then hard character split as last resort.
Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape
character expansion.
Args:
text: Message text to split
max_length: Maximum length per chunk (default: 4000)
Returns:
List of message chunks, each <= max_length
Example:
>>> split_message_smart("a" * 5000)
['aaa...', 'aaa...'] # Two chunks, each <= 4000 chars
"""
if len(text) <= max_length:
return [text]
chunks = []
current_chunk = ""
in_code_block = False
lines = text.split('\n')
for line in lines:
# Track code block state
if line.strip().startswith('```'):
in_code_block = not in_code_block
# Check if adding this line exceeds limit
potential_chunk = current_chunk + ('\n' if current_chunk else '') + line
if len(potential_chunk) > max_length:
# Would exceed limit
if in_code_block:
# Inside code block - must include whole block
# (Telegram will handle overflow gracefully or we truncate)
current_chunk = potential_chunk
else:
# Can split here
if current_chunk:
chunks.append(current_chunk)
current_chunk = line
else:
current_chunk = potential_chunk
if current_chunk:
chunks.append(current_chunk)
return chunks
def escape_markdown_v2(text: str) -> str:
"""
Escape MarkdownV2 special characters outside of code blocks.
Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . !
BUT does NOT escape content inside code blocks (triple backticks or single backticks).
Strategy: Split text by code regions, escape only non-code regions, rejoin.
Args:
text: Text to escape
Returns:
Text with MarkdownV2 special characters escaped outside code blocks
Example:
>>> escape_markdown_v2("hello_world")
'hello\\_world'
>>> escape_markdown_v2("`hello_world`")
'`hello_world`' # Inside backticks, not escaped
"""
# Characters that need escaping in MarkdownV2
escape_chars = r'_*[]()~`>#+-=|{}.!'
# Pattern to match code blocks (triple backticks) and inline code (single backticks)
# Match triple backticks first (```...```), then single backticks (`...`)
code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE)
# Split text into code and non-code segments
parts = []
last_end = 0
for match in code_pattern.finditer(text):
# Add non-code segment (escaped)
non_code = text[last_end:match.start()]
if non_code:
# Escape special characters in non-code text
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
parts.append(escaped)
# Add code segment (not escaped)
parts.append(match.group(0))
last_end = match.end()
# Add remaining non-code segment
if last_end < len(text):
non_code = text[last_end:]
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
parts.append(escaped)
return ''.join(parts)
async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event):
"""
Maintain typing indicator until stop_event is set.
Sends ChatAction.TYPING every 4 seconds to keep indicator alive for
operations longer than 5 seconds (Telegram expires typing after 5s).
Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds
until stop_event is set.
Args:
bot: Telegram bot instance
chat_id: Chat ID to send typing indicator to
stop_event: asyncio.Event to signal when to stop
Example:
>>> stop_typing = asyncio.Event()
>>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing))
>>> # ... long operation ...
>>> stop_typing.set()
>>> await task
"""
while not stop_event.is_set():
try:
await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING)
except Exception as e:
logger.warning(f"Failed to send typing indicator: {e}")
# Wait 4s or until stop_event (whichever comes first)
try:
await asyncio.wait_for(stop_event.wait(), timeout=4.0)
break # stop_event was set
except asyncio.TimeoutError:
continue # Timeout, re-send typing indicator