feat(02-01): create telegram_utils with message formatting
- Add split_message_smart: code-block-aware message splitting at 4000 chars - Add escape_markdown_v2: escape 17 special chars outside code blocks - Add typing_indicator_loop: re-send typing every 4s with asyncio.Event - Smart splitting respects triple-backtick and single-backtick boundaries - Never splits inside code blocks (preserves syntax) - Regex pattern identifies code regions for selective escaping
This commit is contained in:
parent
6a115a4947
commit
6b624d7f80
1 changed files with 165 additions and 0 deletions
165
telegram/telegram_utils.py
Normal file
165
telegram/telegram_utils.py
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
"""
|
||||||
|
Telegram message formatting and UX utilities.
|
||||||
|
|
||||||
|
Provides smart message splitting, MarkdownV2 escaping, and typing indicator
|
||||||
|
management for the Telegram Claude Code bridge.
|
||||||
|
|
||||||
|
Based on research in: .planning/phases/02-telegram-integration/02-RESEARCH.md
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from telegram.constants import ChatAction
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
TELEGRAM_MAX_LENGTH = 4096
|
||||||
|
SAFE_LENGTH = 4000 # Leave room for MarkdownV2 escape character expansion
|
||||||
|
|
||||||
|
|
||||||
|
def split_message_smart(text: str, max_length: int = SAFE_LENGTH) -> list[str]:
|
||||||
|
"""
|
||||||
|
Split long message at smart boundaries, respecting MarkdownV2 code blocks.
|
||||||
|
|
||||||
|
Never splits inside triple-backtick code blocks. Prefers paragraph breaks
|
||||||
|
(\\n\\n), then line breaks (\\n), then hard character split as last resort.
|
||||||
|
|
||||||
|
Uses 4000 as default max (not 4096) to leave room for MarkdownV2 escape
|
||||||
|
character expansion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Message text to split
|
||||||
|
max_length: Maximum length per chunk (default: 4000)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of message chunks, each <= max_length
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> split_message_smart("a" * 5000)
|
||||||
|
['aaa...', 'aaa...'] # Two chunks, each <= 4000 chars
|
||||||
|
"""
|
||||||
|
if len(text) <= max_length:
|
||||||
|
return [text]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
current_chunk = ""
|
||||||
|
in_code_block = False
|
||||||
|
|
||||||
|
lines = text.split('\n')
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
# Track code block state
|
||||||
|
if line.strip().startswith('```'):
|
||||||
|
in_code_block = not in_code_block
|
||||||
|
|
||||||
|
# Check if adding this line exceeds limit
|
||||||
|
potential_chunk = current_chunk + ('\n' if current_chunk else '') + line
|
||||||
|
|
||||||
|
if len(potential_chunk) > max_length:
|
||||||
|
# Would exceed limit
|
||||||
|
if in_code_block:
|
||||||
|
# Inside code block - must include whole block
|
||||||
|
# (Telegram will handle overflow gracefully or we truncate)
|
||||||
|
current_chunk = potential_chunk
|
||||||
|
else:
|
||||||
|
# Can split here
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
current_chunk = line
|
||||||
|
else:
|
||||||
|
current_chunk = potential_chunk
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def escape_markdown_v2(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Escape MarkdownV2 special characters outside of code blocks.
|
||||||
|
|
||||||
|
Escapes 17 special characters: _ * [ ] ( ) ~ ` > # + - = | { } . !
|
||||||
|
BUT does NOT escape content inside code blocks (triple backticks or single backticks).
|
||||||
|
|
||||||
|
Strategy: Split text by code regions, escape only non-code regions, rejoin.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to escape
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text with MarkdownV2 special characters escaped outside code blocks
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> escape_markdown_v2("hello_world")
|
||||||
|
'hello\\_world'
|
||||||
|
>>> escape_markdown_v2("`hello_world`")
|
||||||
|
'`hello_world`' # Inside backticks, not escaped
|
||||||
|
"""
|
||||||
|
# Characters that need escaping in MarkdownV2
|
||||||
|
escape_chars = r'_*[]()~`>#+-=|{}.!'
|
||||||
|
|
||||||
|
# Pattern to match code blocks (triple backticks) and inline code (single backticks)
|
||||||
|
# Match triple backticks first (```...```), then single backticks (`...`)
|
||||||
|
code_pattern = re.compile(r'(```[\s\S]*?```|`[^`]*?`)', re.MULTILINE)
|
||||||
|
|
||||||
|
# Split text into code and non-code segments
|
||||||
|
parts = []
|
||||||
|
last_end = 0
|
||||||
|
|
||||||
|
for match in code_pattern.finditer(text):
|
||||||
|
# Add non-code segment (escaped)
|
||||||
|
non_code = text[last_end:match.start()]
|
||||||
|
if non_code:
|
||||||
|
# Escape special characters in non-code text
|
||||||
|
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
|
||||||
|
parts.append(escaped)
|
||||||
|
|
||||||
|
# Add code segment (not escaped)
|
||||||
|
parts.append(match.group(0))
|
||||||
|
last_end = match.end()
|
||||||
|
|
||||||
|
# Add remaining non-code segment
|
||||||
|
if last_end < len(text):
|
||||||
|
non_code = text[last_end:]
|
||||||
|
escaped = re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', non_code)
|
||||||
|
parts.append(escaped)
|
||||||
|
|
||||||
|
return ''.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
async def typing_indicator_loop(bot, chat_id: int, stop_event: asyncio.Event):
|
||||||
|
"""
|
||||||
|
Maintain typing indicator until stop_event is set.
|
||||||
|
|
||||||
|
Sends ChatAction.TYPING every 4 seconds to keep indicator alive for
|
||||||
|
operations longer than 5 seconds (Telegram expires typing after 5s).
|
||||||
|
|
||||||
|
Uses asyncio.wait_for pattern with timeout to re-send every 4 seconds
|
||||||
|
until stop_event is set.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bot: Telegram bot instance
|
||||||
|
chat_id: Chat ID to send typing indicator to
|
||||||
|
stop_event: asyncio.Event to signal when to stop
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> stop_typing = asyncio.Event()
|
||||||
|
>>> task = asyncio.create_task(typing_indicator_loop(bot, chat_id, stop_typing))
|
||||||
|
>>> # ... long operation ...
|
||||||
|
>>> stop_typing.set()
|
||||||
|
>>> await task
|
||||||
|
"""
|
||||||
|
while not stop_event.is_set():
|
||||||
|
try:
|
||||||
|
await bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to send typing indicator: {e}")
|
||||||
|
|
||||||
|
# Wait 4s or until stop_event (whichever comes first)
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(stop_event.wait(), timeout=4.0)
|
||||||
|
break # stop_event was set
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
continue # Timeout, re-send typing indicator
|
||||||
Loading…
Add table
Reference in a new issue