Files
nanobot/tests/test_telegram_chunking.py
code-server 54255c89c4
Build Nanobot OAuth / build (push) Successful in 5m52s
Build Nanobot OAuth / cleanup (push) Successful in 1s
Replace message chunking with upstream's proven implementation
- Switch from sentence-boundary splitting to upstream's simpler approach
- Uses max_len=4000 (safer buffer vs 4096 limit)
- Split priority: line breaks → spaces → hard cut
- Battle-tested implementation from HKUDS/nanobot upstream
- Simpler, more maintainable code
- Works better for both prose and code/logs

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-28 23:43:15 +00:00

171 lines
4.3 KiB
Python

"""Tests for Telegram message chunking.
Per design doc: messages >4096 chars should split at sentence boundaries.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from nanobot.bus.events import OutboundMessage
from nanobot.channels.telegram import TelegramChannel
@pytest.mark.asyncio
async def test_short_message_not_chunked():
"""Messages under 4096 chars should send as single message."""
config = MagicMock()
config.token = "test-token"
bus = MagicMock()
channel = TelegramChannel(config, bus)
# Mock the bot
sent_messages = []
class MockBot:
async def send_message(self, chat_id, text, parse_mode=None):
sent_messages.append({"chat_id": chat_id, "text": text})
class MockApp:
bot = MockBot()
channel._app = MockApp()
# Short message
msg = OutboundMessage(
channel="telegram",
chat_id="123",
content="Short message."
)
await channel.send(msg)
# Should send exactly 1 message
assert len(sent_messages) == 1
assert sent_messages[0]["text"] == "Short message."
@pytest.mark.asyncio
async def test_long_message_splits_at_sentences():
"""Messages >4096 chars should split at sentence boundaries."""
config = MagicMock()
config.token = "test-token"
bus = MagicMock()
channel = TelegramChannel(config, bus)
# Mock the bot
sent_messages = []
class MockBot:
async def send_message(self, chat_id, text, parse_mode=None):
sent_messages.append({"chat_id": chat_id, "text": text})
class MockApp:
bot = MockBot()
channel._app = MockApp()
# Create a message longer than 4096 chars with clear sentence boundaries
# Each sentence is 200 chars, need 21+ sentences to exceed 4096
sentence = "A" * 195 + "end. " # 200 chars including "end. "
long_content = sentence * 25 # 5000 chars total
msg = OutboundMessage(
channel="telegram",
chat_id="123",
content=long_content
)
await channel.send(msg)
# Should split into multiple messages
assert len(sent_messages) > 1
# Each message should be under 4096 chars
for sent in sent_messages:
assert len(sent["text"]) <= 4096
# All messages combined should equal original (with whitespace trimming)
combined = "".join(sent["text"] for sent in sent_messages)
assert combined.replace(" ", "") == long_content.replace(" ", "")
@pytest.mark.asyncio
async def test_message_at_exactly_4000_chars():
"""Message at exactly 4000 chars should not chunk (safer limit)."""
config = MagicMock()
config.token = "test-token"
bus = MagicMock()
channel = TelegramChannel(config, bus)
# Mock the bot
sent_messages = []
class MockBot:
async def send_message(self, chat_id, text, parse_mode=None):
sent_messages.append({"chat_id": chat_id, "text": text})
class MockApp:
bot = MockBot()
channel._app = MockApp()
# Exactly 4000 chars (upstream uses 4000 as safer limit vs 4096)
content = "A" * 4000
msg = OutboundMessage(
channel="telegram",
chat_id="123",
content=content
)
await channel.send(msg)
# Should send exactly 1 message
assert len(sent_messages) == 1
@pytest.mark.asyncio
async def test_message_preserves_sentence_boundaries():
"""Chunks should split at sentence endings, not mid-sentence."""
config = MagicMock()
config.token = "test-token"
bus = MagicMock()
channel = TelegramChannel(config, bus)
# Mock the bot
sent_messages = []
class MockBot:
async def send_message(self, chat_id, text, parse_mode=None):
sent_messages.append({"chat_id": chat_id, "text": text})
class MockApp:
bot = MockBot()
channel._app = MockApp()
# Create content with clear sentence markers
# First part: just under 4096 chars
part1 = "First sentence. " * 250 # ~4000 chars
part2 = "Second sentence. "
content = part1 + part2
msg = OutboundMessage(
channel="telegram",
chat_id="123",
content=content
)
await channel.send(msg)
# Verify chunks don't break mid-sentence
for sent in sent_messages:
text = sent["text"].strip()
# Each chunk should end with sentence punctuation
if text:
assert text[-1] in ".!?"