Replace message chunking with upstream's proven implementation
Build Nanobot OAuth / build (push) Successful in 5m52s
Build Nanobot OAuth / cleanup (push) Successful in 1s

- Switch from sentence-boundary splitting to upstream's simpler approach
- Uses max_len=4000 (safer buffer vs 4096 limit)
- Split priority: line breaks → spaces → hard cut
- Battle-tested implementation from HKUDS/nanobot upstream
- Simpler, more maintainable code
- Works better for both prose and code/logs

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 03:47:08 +00:00
parent 9a7596193f
commit 54255c89c4
2 changed files with 31 additions and 58 deletions
+27 -54
View File
@@ -218,70 +218,43 @@ class TelegramChannel(BaseChannel):
except Exception as e2:
logger.error(f"Error sending Telegram message: {e2}")
@staticmethod
def _split_message(content: str, max_len: int = 4000) -> list[str]:
"""Split content into chunks within max_len, preferring line breaks.
From upstream HKUDS/nanobot - battle-tested implementation.
Uses 4000 char limit (safer than 4096) with split priority: \n → space → hard cut.
"""
if len(content) <= max_len:
return [content]
chunks: list[str] = []
while content:
if len(content) <= max_len:
chunks.append(content)
break
cut = content[:max_len]
pos = cut.rfind('\n')
if pos == -1:
pos = cut.rfind(' ')
if pos == -1:
pos = max_len
chunks.append(content[:pos])
content = content[pos:].lstrip()
return chunks
async def _send_text_chunks(
self,
chat_id: int,
text: str,
parse_mode: str | None = "HTML"
) -> None:
"""Split and send long messages at sentence boundaries.
"""Split and send long messages.
Telegram has a 4096 character limit per message.
Per design doc: split at sentence boundaries, send as multiple messages.
Uses upstream's proven implementation - splits at line breaks, then spaces.
"""
MAX_LENGTH = 4096
chunks = self._split_message(text)
if len(text) <= MAX_LENGTH:
# Single message
await self._app.bot.send_message(
chat_id=chat_id,
text=text,
parse_mode=parse_mode
)
return
# Split at sentence boundaries
import re
# Split on sentence endings: . ! ? followed by space/newline/end
sentences = re.split(r'([.!?]+(?:\s+|$))', text)
# Rejoin sentence with its punctuation
parts = []
for i in range(0, len(sentences) - 1, 2):
parts.append(sentences[i] + (sentences[i+1] if i+1 < len(sentences) else ''))
if len(sentences) % 2 == 1: # Last part without punctuation
parts.append(sentences[-1])
# Group into chunks under MAX_LENGTH
chunks = []
current_chunk = ""
for part in parts:
# If single part exceeds limit, force split it
if len(part) > MAX_LENGTH:
if current_chunk:
chunks.append(current_chunk)
current_chunk = ""
# Hard split at MAX_LENGTH
for i in range(0, len(part), MAX_LENGTH):
chunks.append(part[i:i + MAX_LENGTH])
continue
# Try adding part to current chunk
test_chunk = current_chunk + part
if len(test_chunk) > MAX_LENGTH:
# Save current chunk, start new one
if current_chunk:
chunks.append(current_chunk)
current_chunk = part
else:
current_chunk = test_chunk
# Add final chunk
if current_chunk:
chunks.append(current_chunk)
# Send all chunks as separate messages
for chunk in chunks:
await self._app.bot.send_message(
chat_id=chat_id,
+4 -4
View File
@@ -92,8 +92,8 @@ async def test_long_message_splits_at_sentences():
@pytest.mark.asyncio
async def test_message_at_exactly_4096_chars():
"""Message at exactly 4096 chars should not chunk."""
async def test_message_at_exactly_4000_chars():
"""Message at exactly 4000 chars should not chunk (safer limit)."""
config = MagicMock()
config.token = "test-token"
bus = MagicMock()
@@ -112,8 +112,8 @@ async def test_message_at_exactly_4096_chars():
channel._app = MockApp()
# Exactly 4096 chars
content = "A" * 4096
# Exactly 4000 chars (upstream uses 4000 as safer limit vs 4096)
content = "A" * 4000
msg = OutboundMessage(
channel="telegram",