Replace message chunking with upstream's proven implementation

- Switch from sentence-boundary splitting to upstream's simpler approach - Uses max_len=4000 (safer buffer vs 4096 limit) - Split priority: line breaks → spaces → hard cut - Battle-tested implementation from HKUDS/nanobot upstream - Simpler, more maintainable code - Works better for both prose and code/logs Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-28 03:47:08 +00:00
parent 9a7596193f
commit 54255c89c4
2 changed files with 31 additions and 58 deletions
@@ -218,70 +218,43 @@ class TelegramChannel(BaseChannel):
            except Exception as e2:
                logger.error(f"Error sending Telegram message: {e2}")

+    @staticmethod
+    def _split_message(content: str, max_len: int = 4000) -> list[str]:
+        """Split content into chunks within max_len, preferring line breaks.
+
+        From upstream HKUDS/nanobot - battle-tested implementation.
+        Uses 4000 char limit (safer than 4096) with split priority: \n → space → hard cut.
+        """
+        if len(content) <= max_len:
+            return [content]
+        chunks: list[str] = []
+        while content:
+            if len(content) <= max_len:
+                chunks.append(content)
+                break
+            cut = content[:max_len]
+            pos = cut.rfind('\n')
+            if pos == -1:
+                pos = cut.rfind(' ')
+            if pos == -1:
+                pos = max_len
+            chunks.append(content[:pos])
+            content = content[pos:].lstrip()
+        return chunks
+
    async def _send_text_chunks(
        self,
        chat_id: int,
        text: str,
        parse_mode: str | None = "HTML"
    ) -> None:
-        """Split and send long messages at sentence boundaries.
+        """Split and send long messages.

        Telegram has a 4096 character limit per message.
-        Per design doc: split at sentence boundaries, send as multiple messages.
+        Uses upstream's proven implementation - splits at line breaks, then spaces.
        """
-        MAX_LENGTH = 4096
+        chunks = self._split_message(text)

-        if len(text) <= MAX_LENGTH:
-            # Single message
-            await self._app.bot.send_message(
-                chat_id=chat_id,
-                text=text,
-                parse_mode=parse_mode
-            )
-            return
-
-        # Split at sentence boundaries
-        import re
-        # Split on sentence endings: . ! ? followed by space/newline/end
-        sentences = re.split(r'([.!?]+(?:\s+|$))', text)
-
-        # Rejoin sentence with its punctuation
-        parts = []
-        for i in range(0, len(sentences) - 1, 2):
-            parts.append(sentences[i] + (sentences[i+1] if i+1 < len(sentences) else ''))
-        if len(sentences) % 2 == 1:  # Last part without punctuation
-            parts.append(sentences[-1])
-
-        # Group into chunks under MAX_LENGTH
-        chunks = []
-        current_chunk = ""
-
-        for part in parts:
-            # If single part exceeds limit, force split it
-            if len(part) > MAX_LENGTH:
-                if current_chunk:
-                    chunks.append(current_chunk)
-                    current_chunk = ""
-                # Hard split at MAX_LENGTH
-                for i in range(0, len(part), MAX_LENGTH):
-                    chunks.append(part[i:i + MAX_LENGTH])
-                continue
-
-            # Try adding part to current chunk
-            test_chunk = current_chunk + part
-            if len(test_chunk) > MAX_LENGTH:
-                # Save current chunk, start new one
-                if current_chunk:
-                    chunks.append(current_chunk)
-                current_chunk = part
-            else:
-                current_chunk = test_chunk
-
-        # Add final chunk
-        if current_chunk:
-            chunks.append(current_chunk)
-
-        # Send all chunks as separate messages
        for chunk in chunks:
            await self._app.bot.send_message(
                chat_id=chat_id,
@@ -92,8 +92,8 @@ async def test_long_message_splits_at_sentences():


@pytest.mark.asyncio
-async def test_message_at_exactly_4096_chars():
-    """Message at exactly 4096 chars should not chunk."""
+async def test_message_at_exactly_4000_chars():
+    """Message at exactly 4000 chars should not chunk (safer limit)."""
    config = MagicMock()
    config.token = "test-token"
    bus = MagicMock()
@@ -112,8 +112,8 @@ async def test_message_at_exactly_4096_chars():

    channel._app = MockApp()

-    # Exactly 4096 chars
-    content = "A" * 4096
+    # Exactly 4000 chars (upstream uses 4000 as safer limit vs 4096)
+    content = "A" * 4000

    msg = OutboundMessage(
        channel="telegram",