feat(02-01): create telegram_utils with message formatting
- Add split_message_smart: code-block-aware message splitting at 4000 chars - Add escape_markdown_v2: escape 17 special chars outside code blocks - Add typing_indicator_loop: re-send typing every 4s with asyncio.Event - Smart splitting respects triple-backtick and single-backtick boundaries - Never splits inside code blocks (preserves syntax) - Regex pattern identifies code regions for selective escaping
This commit is contained in:
parent
6a115a4947
commit
6b624d7f80
1 changed files with 165 additions and 0 deletions
165
telegram/telegram_utils.py
Normal file
165
telegram/telegram_utils.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""
|
||||
Telegram message formatting and UX utilities.
|
||||
|
||||
Provides smart message splitting, MarkdownV2 escaping, and typing indicator
|
||||
management for the Telegram Claude Code bridge.
|
||||
|
||||
Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from telegram.constants import ChatAction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TELEGRAM_MAX_LENGTH = 4096
|
||||
SAFE_LENGTH = 4000 # Leave room for MarkdownV2 escape character expansion
|
||||
|
||||
|
||||
def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]:
|
||||
"""
|
||||
Split long message at smart boundaries, respecting MarkdownV2 code blocks.
|
||||
|
||||
Never splits inside triple-backtick code blocks. Prefers paragraph breaks
|
||||
(\\n\\n), then line breaks (\\n), then hard character split as last resort.
|
||||
|
||||
Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape
|
||||
character expansion.
|
||||
|
||||
Args:
|
||||
text: Message text to split
|
||||
max_length: Maximum length per chunk (default: 4000)
|
||||
|
||||
Returns:
|
||||
List of message chunks, each <= max_length
|
||||
|
||||
Example:
|
||||
>>> split_message_smart("a" * 5000)
|
||||
['aaa...', 'aaa...'] # Two chunks, each <= 4000 chars
|
||||
"""
|
||||
if len(text) <= max_length:
|
||||
return [text]
|
||||
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
in_code_block = False
|
||||
|
||||
lines = text.split('\n')
|
||||
|
||||
for line in lines:
|
||||
# Track code block state
|
||||
if line.strip().startswith('```'):
|
||||
in_code_block = not in_code_block
|
||||
|
||||
# Check if adding this line exceeds limit
|
||||
potential_chunk = current_chunk + ('\n' if current_chunk else '') + line
|
||||
|
||||
if len(potential_chunk) > max_length:
|
||||
# Would exceed limit
|
||||
if in_code_block:
|
||||
# Inside code block - must include whole block
|
||||
# (Telegram will handle overflow gracefully or we truncate)
|
||||
current_chunk = potential_chunk
|
||||
else:
|
||||
# Can split here
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = line
|
||||
else:
|
||||
current_chunk = potential_chunk
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def escape_markdown_v2(text: str) -> str:
|
||||
"""
|
||||
Escape MarkdownV2 special characters outside of code blocks.
|
||||
|
||||
Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . !
|
||||
BUT does NOT escape content inside code blocks (triple backticks or single backticks).
|
||||
|
||||
Strategy: Split text by code regions, escape only non-code regions, rejoin.
|
||||
|
||||
Args:
|
||||
text: Text to escape
|
||||
|
||||
Returns:
|
||||
Text with MarkdownV2 special characters escaped outside code blocks
|
||||
|
||||
Example:
|
||||
>>> escape_markdown_v2("hello_world")
|
||||
'hello\\_world'
|
||||
>>> escape_markdown_v2("`hello_world`")
|
||||
'`hello_world`' # Inside backticks, not escaped
|
||||
"""
|
||||
# Characters that need escaping in MarkdownV2
|
||||
escape_chars = r'_*[]()~`>#+-=|{}.!'
|
||||
|
||||
# Pattern to match code blocks (triple backticks) and inline code (single backticks)
|
||||
# Match triple backticks first (```...```), then single backticks (`...`)
|
||||
code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE)
|
||||
|
||||
# Split text into code and non-code segments
|
||||
parts = []
|
||||
last_end = 0
|
||||
|
||||
for match in code_pattern.finditer(text):
|
||||
# Add non-code segment (escaped)
|
||||
non_code = text[last_end:match.start()]
|
||||
if non_code:
|
||||
# Escape special characters in non-code text
|
||||
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
|
||||
parts.append(escaped)
|
||||
|
||||
# Add code segment (not escaped)
|
||||
parts.append(match.group(0))
|
||||
last_end = match.end()
|
||||
|
||||
# Add remaining non-code segment
|
||||
if last_end < len(text):
|
||||
non_code = text[last_end:]
|
||||
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
|
||||
parts.append(escaped)
|
||||
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event):
|
||||
"""
|
||||
Maintain typing indicator until stop_event is set.
|
||||
|
||||
Sends ChatAction.TYPING every 4 seconds to keep indicator alive for
|
||||
operations longer than 5 seconds (Telegram expires typing after 5s).
|
||||
|
||||
Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds
|
||||
until stop_event is set.
|
||||
|
||||
Args:
|
||||
bot: Telegram bot instance
|
||||
chat_id: Chat ID to send typing indicator to
|
||||
stop_event: asyncio.Event to signal when to stop
|
||||
|
||||
Example:
|
||||
>>> stop_typing = asyncio.Event()
|
||||
>>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing))
|
||||
>>> # ... long operation ...
|
||||
>>> stop_typing.set()
|
||||
>>> await task
|
||||
"""
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send typing indicator: {e}")
|
||||
|
||||
# Wait 4s or until stop_event (whichever comes first)
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=4.0)
|
||||
break # stop_event was set
|
||||
except asyncio.TimeoutError:
|
||||
continue # Timeout, re-send typing indicator
|
||||
Loading…
Add table
Reference in a new issue