diff --git a/.gitignore b/.gitignore index 316e214..55338f7 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ docs/ __pycache__/ poetry.lock .pytest_cache/ +tests/ \ No newline at end of file diff --git a/README.md b/README.md index bde285d..502a42f 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,15 @@ ⚑️ Delivers core agent functionality in just **~4,000** lines of code β€” **99% smaller** than Clawdbot's 430k+ lines. -πŸ“ Real-time line count: **3,428 lines** (run `bash core_agent_lines.sh` to verify anytime) +πŸ“ Real-time line count: **3,448 lines** (run `bash core_agent_lines.sh` to verify anytime) ## πŸ“’ News -- **2026-02-06** ✨ Added Moonshot/Kimi provider, Discord channel, and enhanced security hardening! +- **2026-02-08** πŸ”§ Refactored Providersβ€”adding a new LLM provider now takes just 2 simple steps! Check [here](#providers). +- **2026-02-07** πŸš€ Released v0.1.3.post5 with Qwen support & several key improvements! Check [here](https://github.com/HKUDS/nanobot/releases/tag/v0.1.3.post5) for details. +- **2026-02-06** ✨ Added Moonshot/Kimi provider, Discord integration, and enhanced security hardening! - **2026-02-05** ✨ Added Feishu channel, DeepSeek provider, and enhanced scheduled tasks support! -- **2026-02-04** πŸš€ Released v0.1.3.post4 with multi-provider & Docker support! Check [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.3.post4) for details. +- **2026-02-04** πŸš€ Released v0.1.3.post4 with multi-provider & Docker support! Check [here](https://github.com/HKUDS/nanobot/releases/tag/v0.1.3.post4) for details. - **2026-02-03** ⚑ Integrated vLLM for local LLM support and improved natural language task scheduling! - **2026-02-02** πŸŽ‰ nanobot officially launched! Welcome to try 🐈 nanobot! @@ -291,10 +293,6 @@ nanobot gateway Uses **WebSocket** long connection β€” no public IP required. -```bash -pip install nanobot-ai[feishu] -``` - **1. Create a Feishu bot** - Visit [Feishu Open Platform](https://open.feishu.cn/app) - Create a new app β†’ Enable **Bot** capability @@ -335,14 +333,54 @@ nanobot gateway +
+DingTalk (ι’‰ι’‰) + +Uses **Stream Mode** β€” no public IP required. + +**1. Create a DingTalk bot** +- Visit [DingTalk Open Platform](https://open-dev.dingtalk.com/) +- Create a new app -> Add **Robot** capability +- **Configuration**: + - Toggle **Stream Mode** ON +- **Permissions**: Add necessary permissions for sending messages +- Get **AppKey** (Client ID) and **AppSecret** (Client Secret) from "Credentials" +- Publish the app + +**2. Configure** + +```json +{ + "channels": { + "dingtalk": { + "enabled": true, + "clientId": "YOUR_APP_KEY", + "clientSecret": "YOUR_APP_SECRET", + "allowFrom": [] + } + } +} +``` + +> `allowFrom`: Leave empty to allow all users, or add `["staffId"]` to restrict access. + +**3. Run** + +```bash +nanobot gateway +``` + +
+ ## βš™οΈ Configuration Config file: `~/.nanobot/config.json` ### Providers -> [!NOTE] -> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. +> [!TIP] +> - **Groq** provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. +> - **Zhipu Coding Plan**: If you're on Zhipu's coding plan, set `"apiBase": "https://open.bigmodel.cn/api/coding/paas/v4"` in your zhipu provider config. | Provider | Purpose | Get API Key | |----------|---------|-------------| @@ -352,7 +390,55 @@ Config file: `~/.nanobot/config.json` | `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) | | `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) | | `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | +| `aihubmix` | LLM (API gateway, access to all models) | [aihubmix.com](https://aihubmix.com) | | `dashscope` | LLM (Qwen) | [dashscope.console.aliyun.com](https://dashscope.console.aliyun.com) | +| `moonshot` | LLM (Moonshot/Kimi) | [platform.moonshot.cn](https://platform.moonshot.cn) | +| `zhipu` | LLM (Zhipu GLM) | [open.bigmodel.cn](https://open.bigmodel.cn) | +| `vllm` | LLM (local, any OpenAI-compatible server) | β€” | + +
+Adding a New Provider (Developer Guide) + +nanobot uses a **Provider Registry** (`nanobot/providers/registry.py`) as the single source of truth. +Adding a new provider only takes **2 steps** β€” no if-elif chains to touch. + +**Step 1.** Add a `ProviderSpec` entry to `PROVIDERS` in `nanobot/providers/registry.py`: + +```python +ProviderSpec( + name="myprovider", # config field name + keywords=("myprovider", "mymodel"), # model-name keywords for auto-matching + env_key="MYPROVIDER_API_KEY", # env var for LiteLLM + display_name="My Provider", # shown in `nanobot status` + litellm_prefix="myprovider", # auto-prefix: model β†’ myprovider/model + skip_prefixes=("myprovider/",), # don't double-prefix +) +``` + +**Step 2.** Add a field to `ProvidersConfig` in `nanobot/config/schema.py`: + +```python +class ProvidersConfig(BaseModel): + ... + myprovider: ProviderConfig = ProviderConfig() +``` + +That's it! Environment variables, model prefixing, config matching, and `nanobot status` display will all work automatically. + +**Common `ProviderSpec` options:** + +| Field | Description | Example | +|-------|-------------|---------| +| `litellm_prefix` | Auto-prefix model names for LiteLLM | `"dashscope"` β†’ `dashscope/qwen-max` | +| `skip_prefixes` | Don't prefix if model already starts with these | `("dashscope/", "openrouter/")` | +| `env_extras` | Additional env vars to set | `(("ZHIPUAI_API_KEY", "{api_key}"),)` | +| `model_overrides` | Per-model parameter overrides | `(("kimi-k2.5", {"temperature": 1.0}),)` | +| `is_gateway` | Can route any model (like OpenRouter) | `True` | +| `detect_by_key_prefix` | Detect gateway by API key prefix | `"sk-or-"` | +| `detect_by_base_keyword` | Detect gateway by API base URL | `"openrouter"` | +| `strip_model_prefix` | Strip existing prefix before re-prefixing | `True` (for AiHubMix) | + +
### Security @@ -373,11 +459,15 @@ Config file: `~/.nanobot/config.json` | `nanobot onboard` | Initialize config & workspace | | `nanobot agent -m "..."` | Chat with the agent | | `nanobot agent` | Interactive chat mode | +| `nanobot agent --no-markdown` | Show plain-text replies | +| `nanobot agent --logs` | Show runtime logs during chat | | `nanobot gateway` | Start the gateway | | `nanobot status` | Show status | | `nanobot channels login` | Link WhatsApp (scan QR) | | `nanobot channels status` | Show channel status | +Interactive mode exits: `exit`, `quit`, `/exit`, `/quit`, `:q`, or `Ctrl+D`. +
Scheduled Tasks (Cron) diff --git a/bridge/src/whatsapp.ts b/bridge/src/whatsapp.ts index a3a82fc..069d72b 100644 --- a/bridge/src/whatsapp.ts +++ b/bridge/src/whatsapp.ts @@ -20,6 +20,7 @@ const VERSION = '0.1.0'; export interface InboundMessage { id: string; sender: string; + pn: string; content: string; timestamp: number; isGroup: boolean; @@ -123,6 +124,7 @@ export class WhatsAppClient { this.options.onMessage({ id: msg.key.id || '', sender: msg.key.remoteJid || '', + pn: msg.key.remoteJidAlt || '', content, timestamp: msg.messageTimestamp as number, isGroup, diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 3ea6c04..d807854 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -207,7 +207,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" self, messages: list[dict[str, Any]], content: str | None, - tool_calls: list[dict[str, Any]] | None = None + tool_calls: list[dict[str, Any]] | None = None, + reasoning_content: str | None = None, ) -> list[dict[str, Any]]: """ Add an assistant message to the message list. @@ -216,6 +217,7 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" messages: Current message list. content: Message content. tool_calls: Optional tool calls. + reasoning_content: Thinking output (Kimi, DeepSeek-R1, etc.). Returns: Updated message list. @@ -225,5 +227,9 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" if tool_calls: msg["tool_calls"] = tool_calls + # Thinking models reject history without this + if reasoning_content: + msg["reasoning_content"] = reasoning_content + messages.append(msg) return messages diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index e4193ec..72ea86a 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -45,6 +45,7 @@ class AgentLoop: exec_config: "ExecToolConfig | None" = None, cron_service: "CronService | None" = None, restrict_to_workspace: bool = False, + session_manager: SessionManager | None = None, ): from nanobot.config.schema import ExecToolConfig from nanobot.cron.service import CronService @@ -59,7 +60,7 @@ class AgentLoop: self.restrict_to_workspace = restrict_to_workspace self.context = ContextBuilder(workspace) - self.sessions = SessionManager(workspace) + self.sessions = session_manager or SessionManager(workspace) self.tools = ToolRegistry() self.subagents = SubagentManager( provider=provider, @@ -155,7 +156,8 @@ class AgentLoop: if msg.channel == "system": return await self._process_system_message(msg) - logger.info(f"Processing message from {msg.channel}:{msg.sender_id}") + preview = msg.content[:80] + "..." if len(msg.content) > 80 else msg.content + logger.info(f"Processing message from {msg.channel}:{msg.sender_id}: {preview}") # Get or create session session = self.sessions.get_or_create(msg.session_key) @@ -211,13 +213,14 @@ class AgentLoop: for tc in response.tool_calls ] messages = self.context.add_assistant_message( - messages, response.content, tool_call_dicts + messages, response.content, tool_call_dicts, + reasoning_content=response.reasoning_content, ) # Execute tools for tool_call in response.tool_calls: - args_str = json.dumps(tool_call.arguments) - logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}") + args_str = json.dumps(tool_call.arguments, ensure_ascii=False) + logger.info(f"Tool call: {tool_call.name}({args_str[:200]})") result = await self.tools.execute(tool_call.name, tool_call.arguments) messages = self.context.add_tool_result( messages, tool_call.id, tool_call.name, result @@ -230,6 +233,10 @@ class AgentLoop: if final_content is None: final_content = "I've completed processing but have no response to give." + # Log response preview + preview = final_content[:120] + "..." if len(final_content) > 120 else final_content + logger.info(f"Response to {msg.channel}:{msg.sender_id}: {preview}") + # Save to session session.add_message("user", msg.content) session.add_message("assistant", final_content) @@ -311,12 +318,13 @@ class AgentLoop: for tc in response.tool_calls ] messages = self.context.add_assistant_message( - messages, response.content, tool_call_dicts + messages, response.content, tool_call_dicts, + reasoning_content=response.reasoning_content, ) for tool_call in response.tool_calls: - args_str = json.dumps(tool_call.arguments) - logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}") + args_str = json.dumps(tool_call.arguments, ensure_ascii=False) + logger.info(f"Tool call: {tool_call.name}({args_str[:200]})") result = await self.tools.execute(tool_call.name, tool_call.arguments) messages = self.context.add_tool_result( messages, tool_call.id, tool_call.name, result diff --git a/nanobot/channels/dingtalk.py b/nanobot/channels/dingtalk.py new file mode 100644 index 0000000..72d3afd --- /dev/null +++ b/nanobot/channels/dingtalk.py @@ -0,0 +1,238 @@ +"""DingTalk/DingDing channel implementation using Stream Mode.""" + +import asyncio +import json +import time +from typing import Any + +from loguru import logger +import httpx + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import DingTalkConfig + +try: + from dingtalk_stream import ( + DingTalkStreamClient, + Credential, + CallbackHandler, + CallbackMessage, + AckMessage, + ) + from dingtalk_stream.chatbot import ChatbotMessage + + DINGTALK_AVAILABLE = True +except ImportError: + DINGTALK_AVAILABLE = False + # Fallback so class definitions don't crash at module level + CallbackHandler = object # type: ignore[assignment,misc] + CallbackMessage = None # type: ignore[assignment,misc] + AckMessage = None # type: ignore[assignment,misc] + ChatbotMessage = None # type: ignore[assignment,misc] + + +class NanobotDingTalkHandler(CallbackHandler): + """ + Standard DingTalk Stream SDK Callback Handler. + Parses incoming messages and forwards them to the Nanobot channel. + """ + + def __init__(self, channel: "DingTalkChannel"): + super().__init__() + self.channel = channel + + async def process(self, message: CallbackMessage): + """Process incoming stream message.""" + try: + # Parse using SDK's ChatbotMessage for robust handling + chatbot_msg = ChatbotMessage.from_dict(message.data) + + # Extract text content; fall back to raw dict if SDK object is empty + content = "" + if chatbot_msg.text: + content = chatbot_msg.text.content.strip() + if not content: + content = message.data.get("text", {}).get("content", "").strip() + + if not content: + logger.warning( + f"Received empty or unsupported message type: {chatbot_msg.message_type}" + ) + return AckMessage.STATUS_OK, "OK" + + sender_id = chatbot_msg.sender_staff_id or chatbot_msg.sender_id + sender_name = chatbot_msg.sender_nick or "Unknown" + + logger.info(f"Received DingTalk message from {sender_name} ({sender_id}): {content}") + + # Forward to Nanobot via _on_message (non-blocking). + # Store reference to prevent GC before task completes. + task = asyncio.create_task( + self.channel._on_message(content, sender_id, sender_name) + ) + self.channel._background_tasks.add(task) + task.add_done_callback(self.channel._background_tasks.discard) + + return AckMessage.STATUS_OK, "OK" + + except Exception as e: + logger.error(f"Error processing DingTalk message: {e}") + # Return OK to avoid retry loop from DingTalk server + return AckMessage.STATUS_OK, "Error" + + +class DingTalkChannel(BaseChannel): + """ + DingTalk channel using Stream Mode. + + Uses WebSocket to receive events via `dingtalk-stream` SDK. + Uses direct HTTP API to send messages (SDK is mainly for receiving). + + Note: Currently only supports private (1:1) chat. Group messages are + received but replies are sent back as private messages to the sender. + """ + + name = "dingtalk" + + def __init__(self, config: DingTalkConfig, bus: MessageBus): + super().__init__(config, bus) + self.config: DingTalkConfig = config + self._client: Any = None + self._http: httpx.AsyncClient | None = None + + # Access Token management for sending messages + self._access_token: str | None = None + self._token_expiry: float = 0 + + # Hold references to background tasks to prevent GC + self._background_tasks: set[asyncio.Task] = set() + + async def start(self) -> None: + """Start the DingTalk bot with Stream Mode.""" + try: + if not DINGTALK_AVAILABLE: + logger.error( + "DingTalk Stream SDK not installed. Run: pip install dingtalk-stream" + ) + return + + if not self.config.client_id or not self.config.client_secret: + logger.error("DingTalk client_id and client_secret not configured") + return + + self._running = True + self._http = httpx.AsyncClient() + + logger.info( + f"Initializing DingTalk Stream Client with Client ID: {self.config.client_id}..." + ) + credential = Credential(self.config.client_id, self.config.client_secret) + self._client = DingTalkStreamClient(credential) + + # Register standard handler + handler = NanobotDingTalkHandler(self) + self._client.register_callback_handler(ChatbotMessage.TOPIC, handler) + + logger.info("DingTalk bot started with Stream Mode") + + # client.start() is an async infinite loop handling the websocket connection + await self._client.start() + + except Exception as e: + logger.exception(f"Failed to start DingTalk channel: {e}") + + async def stop(self) -> None: + """Stop the DingTalk bot.""" + self._running = False + # Close the shared HTTP client + if self._http: + await self._http.aclose() + self._http = None + # Cancel outstanding background tasks + for task in self._background_tasks: + task.cancel() + self._background_tasks.clear() + + async def _get_access_token(self) -> str | None: + """Get or refresh Access Token.""" + if self._access_token and time.time() < self._token_expiry: + return self._access_token + + url = "https://api.dingtalk.com/v1.0/oauth2/accessToken" + data = { + "appKey": self.config.client_id, + "appSecret": self.config.client_secret, + } + + if not self._http: + logger.warning("DingTalk HTTP client not initialized, cannot refresh token") + return None + + try: + resp = await self._http.post(url, json=data) + resp.raise_for_status() + res_data = resp.json() + self._access_token = res_data.get("accessToken") + # Expire 60s early to be safe + self._token_expiry = time.time() + int(res_data.get("expireIn", 7200)) - 60 + return self._access_token + except Exception as e: + logger.error(f"Failed to get DingTalk access token: {e}") + return None + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through DingTalk.""" + token = await self._get_access_token() + if not token: + return + + # oToMessages/batchSend: sends to individual users (private chat) + # https://open.dingtalk.com/document/orgapp/robot-batch-send-messages + url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend" + + headers = {"x-acs-dingtalk-access-token": token} + + data = { + "robotCode": self.config.client_id, + "userIds": [msg.chat_id], # chat_id is the user's staffId + "msgKey": "sampleMarkdown", + "msgParam": json.dumps({ + "text": msg.content, + "title": "Nanobot Reply", + }), + } + + if not self._http: + logger.warning("DingTalk HTTP client not initialized, cannot send") + return + + try: + resp = await self._http.post(url, json=data, headers=headers) + if resp.status_code != 200: + logger.error(f"DingTalk send failed: {resp.text}") + else: + logger.debug(f"DingTalk message sent to {msg.chat_id}") + except Exception as e: + logger.error(f"Error sending DingTalk message: {e}") + + async def _on_message(self, content: str, sender_id: str, sender_name: str) -> None: + """Handle incoming message (called by NanobotDingTalkHandler). + + Delegates to BaseChannel._handle_message() which enforces allow_from + permission checks before publishing to the bus. + """ + try: + logger.info(f"DingTalk inbound: {content} from {sender_name}") + await self._handle_message( + sender_id=sender_id, + chat_id=sender_id, # For private chat, chat_id == sender_id + content=str(content), + metadata={ + "sender_name": sender_name, + "platform": "dingtalk", + }, + ) + except Exception as e: + logger.error(f"Error publishing DingTalk message: {e}") diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py index 01b808e..1c176a2 100644 --- a/nanobot/channels/feishu.py +++ b/nanobot/channels/feishu.py @@ -2,6 +2,7 @@ import asyncio import json +import re import threading from collections import OrderedDict from typing import Any @@ -156,6 +157,44 @@ class FeishuChannel(BaseChannel): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._add_reaction_sync, message_id, emoji_type) + # Regex to match markdown tables (header + separator + data rows) + _TABLE_RE = re.compile( + r"((?:^[ \t]*\|.+\|[ \t]*\n)(?:^[ \t]*\|[-:\s|]+\|[ \t]*\n)(?:^[ \t]*\|.+\|[ \t]*\n?)+)", + re.MULTILINE, + ) + + @staticmethod + def _parse_md_table(table_text: str) -> dict | None: + """Parse a markdown table into a Feishu table element.""" + lines = [l.strip() for l in table_text.strip().split("\n") if l.strip()] + if len(lines) < 3: + return None + split = lambda l: [c.strip() for c in l.strip("|").split("|")] + headers = split(lines[0]) + rows = [split(l) for l in lines[2:]] + columns = [{"tag": "column", "name": f"c{i}", "display_name": h, "width": "auto"} + for i, h in enumerate(headers)] + return { + "tag": "table", + "page_size": len(rows) + 1, + "columns": columns, + "rows": [{f"c{i}": r[i] if i < len(r) else "" for i in range(len(headers))} for r in rows], + } + + def _build_card_elements(self, content: str) -> list[dict]: + """Split content into markdown + table elements for Feishu card.""" + elements, last_end = [], 0 + for m in self._TABLE_RE.finditer(content): + before = content[last_end:m.start()].strip() + if before: + elements.append({"tag": "markdown", "content": before}) + elements.append(self._parse_md_table(m.group(1)) or {"tag": "markdown", "content": m.group(1)}) + last_end = m.end() + remaining = content[last_end:].strip() + if remaining: + elements.append({"tag": "markdown", "content": remaining}) + return elements or [{"tag": "markdown", "content": content}] + async def send(self, msg: OutboundMessage) -> None: """Send a message through Feishu.""" if not self._client: @@ -170,15 +209,20 @@ class FeishuChannel(BaseChannel): else: receive_id_type = "open_id" - # Build text message content - content = json.dumps({"text": msg.content}) + # Build card with markdown + table support + elements = self._build_card_elements(msg.content) + card = { + "config": {"wide_screen_mode": True}, + "elements": elements, + } + content = json.dumps(card, ensure_ascii=False) request = CreateMessageRequest.builder() \ .receive_id_type(receive_id_type) \ .request_body( CreateMessageRequestBody.builder() .receive_id(msg.chat_id) - .msg_type("text") + .msg_type("interactive") .content(content) .build() ).build() diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 4a949c8..26fa9f3 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -1,7 +1,9 @@ """Channel manager for coordinating chat channels.""" +from __future__ import annotations + import asyncio -from typing import Any +from typing import Any, TYPE_CHECKING from loguru import logger @@ -10,6 +12,9 @@ from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import Config +if TYPE_CHECKING: + from nanobot.session.manager import SessionManager + class ChannelManager: """ @@ -21,9 +26,10 @@ class ChannelManager: - Route outbound messages """ - def __init__(self, config: Config, bus: MessageBus): + def __init__(self, config: Config, bus: MessageBus, session_manager: "SessionManager | None" = None): self.config = config self.bus = bus + self.session_manager = session_manager self.channels: dict[str, BaseChannel] = {} self._dispatch_task: asyncio.Task | None = None @@ -40,6 +46,7 @@ class ChannelManager: self.config.channels.telegram, self.bus, groq_api_key=self.config.providers.groq.api_key, + session_manager=self.session_manager, ) logger.info("Telegram channel enabled") except ImportError as e: @@ -78,6 +85,17 @@ class ChannelManager: except ImportError as e: logger.warning(f"Feishu channel not available: {e}") + # DingTalk channel + if self.config.channels.dingtalk.enabled: + try: + from nanobot.channels.dingtalk import DingTalkChannel + self.channels["dingtalk"] = DingTalkChannel( + self.config.channels.dingtalk, self.bus + ) + logger.info("DingTalk channel enabled") + except ImportError as e: + logger.warning(f"DingTalk channel not available: {e}") + # Email channel if self.config.channels.email.enabled: try: @@ -89,8 +107,15 @@ class ChannelManager: except ImportError as e: logger.warning(f"Email channel not available: {e}") + async def _start_channel(self, name: str, channel: BaseChannel) -> None: + """Start a channel and log any exceptions.""" + try: + await channel.start() + except Exception as e: + logger.error(f"Failed to start channel {name}: {e}") + async def start_all(self) -> None: - """Start WhatsApp channel and the outbound dispatcher.""" + """Start all channels and the outbound dispatcher.""" if not self.channels: logger.warning("No channels enabled") return @@ -98,11 +123,11 @@ class ChannelManager: # Start outbound dispatcher self._dispatch_task = asyncio.create_task(self._dispatch_outbound()) - # Start WhatsApp channel + # Start channels tasks = [] for name, channel in self.channels.items(): logger.info(f"Starting {name} channel...") - tasks.append(asyncio.create_task(channel.start())) + tasks.append(asyncio.create_task(self._start_channel(name, channel))) # Wait for all to complete (they should run forever) await asyncio.gather(*tasks, return_exceptions=True) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 23e1de0..ff46c86 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -1,17 +1,23 @@ """Telegram channel implementation using python-telegram-bot.""" +from __future__ import annotations + import asyncio import re +from typing import TYPE_CHECKING from loguru import logger -from telegram import Update -from telegram.ext import Application, MessageHandler, filters, ContextTypes +from telegram import BotCommand, Update +from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes from nanobot.bus.events import OutboundMessage from nanobot.bus.queue import MessageBus from nanobot.channels.base import BaseChannel from nanobot.config.schema import TelegramConfig +if TYPE_CHECKING: + from nanobot.session.manager import SessionManager + def _markdown_to_telegram_html(text: str) -> str: """ @@ -85,12 +91,27 @@ class TelegramChannel(BaseChannel): name = "telegram" - def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""): + # Commands registered with Telegram's command menu + BOT_COMMANDS = [ + BotCommand("start", "Start the bot"), + BotCommand("reset", "Reset conversation history"), + BotCommand("help", "Show available commands"), + ] + + def __init__( + self, + config: TelegramConfig, + bus: MessageBus, + groq_api_key: str = "", + session_manager: SessionManager | None = None, + ): super().__init__(config, bus) self.config: TelegramConfig = config self.groq_api_key = groq_api_key + self.session_manager = session_manager self._app: Application | None = None self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies + self._typing_tasks: dict[str, asyncio.Task] = {} # chat_id -> typing loop task async def start(self) -> None: """Start the Telegram bot with long polling.""" @@ -101,11 +122,15 @@ class TelegramChannel(BaseChannel): self._running = True # Build the application - self._app = ( - Application.builder() - .token(self.config.token) - .build() - ) + builder = Application.builder().token(self.config.token) + if self.config.proxy: + builder = builder.proxy(self.config.proxy).get_updates_proxy(self.config.proxy) + self._app = builder.build() + + # Add command handlers + self._app.add_handler(CommandHandler("start", self._on_start)) + self._app.add_handler(CommandHandler("reset", self._on_reset)) + self._app.add_handler(CommandHandler("help", self._on_help)) # Add message handler for text, photos, voice, documents self._app.add_handler( @@ -116,20 +141,22 @@ class TelegramChannel(BaseChannel): ) ) - # Add /start command handler - from telegram.ext import CommandHandler - self._app.add_handler(CommandHandler("start", self._on_start)) - logger.info("Starting Telegram bot (polling mode)...") # Initialize and start polling await self._app.initialize() await self._app.start() - # Get bot info + # Get bot info and register command menu bot_info = await self._app.bot.get_me() logger.info(f"Telegram bot @{bot_info.username} connected") + try: + await self._app.bot.set_my_commands(self.BOT_COMMANDS) + logger.debug("Telegram bot commands registered") + except Exception as e: + logger.warning(f"Failed to register bot commands: {e}") + # Start polling (this runs until stopped) await self._app.updater.start_polling( allowed_updates=["message"], @@ -144,6 +171,10 @@ class TelegramChannel(BaseChannel): """Stop the Telegram bot.""" self._running = False + # Cancel all typing indicators + for chat_id in list(self._typing_tasks): + self._stop_typing(chat_id) + if self._app: logger.info("Stopping Telegram bot...") await self._app.updater.stop() @@ -157,6 +188,9 @@ class TelegramChannel(BaseChannel): logger.warning("Telegram bot not running") return + # Stop typing indicator for this chat + self._stop_typing(msg.chat_id) + try: # chat_id should be the Telegram chat ID (integer) chat_id = int(msg.chat_id) @@ -188,9 +222,45 @@ class TelegramChannel(BaseChannel): user = update.effective_user await update.message.reply_text( f"πŸ‘‹ Hi {user.first_name}! I'm nanobot.\n\n" - "Send me a message and I'll respond!" + "Send me a message and I'll respond!\n" + "Type /help to see available commands." ) + async def _on_reset(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle /reset command β€” clear conversation history.""" + if not update.message or not update.effective_user: + return + + chat_id = str(update.message.chat_id) + session_key = f"{self.name}:{chat_id}" + + if self.session_manager is None: + logger.warning("/reset called but session_manager is not available") + await update.message.reply_text("⚠️ Session management is not available.") + return + + session = self.session_manager.get_or_create(session_key) + msg_count = len(session.messages) + session.clear() + self.session_manager.save(session) + + logger.info(f"Session reset for {session_key} (cleared {msg_count} messages)") + await update.message.reply_text("πŸ”„ Conversation history cleared. Let's start fresh!") + + async def _on_help(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + """Handle /help command β€” show available commands.""" + if not update.message: + return + + help_text = ( + "🐈 nanobot commands\n\n" + "/start β€” Start the bot\n" + "/reset β€” Reset conversation history\n" + "/help β€” Show this help message\n\n" + "Just send me a text message to chat!" + ) + await update.message.reply_text(help_text, parse_mode="HTML") + async def _on_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: """Handle incoming messages (text, photos, voice, documents).""" if not update.message or not update.effective_user: @@ -273,10 +343,15 @@ class TelegramChannel(BaseChannel): logger.debug(f"Telegram message from {sender_id}: {content[:50]}...") + str_chat_id = str(chat_id) + + # Start typing indicator before processing + self._start_typing(str_chat_id) + # Forward to the message bus await self._handle_message( sender_id=sender_id, - chat_id=str(chat_id), + chat_id=str_chat_id, content=content, media=media_paths, metadata={ @@ -288,6 +363,29 @@ class TelegramChannel(BaseChannel): } ) + def _start_typing(self, chat_id: str) -> None: + """Start sending 'typing...' indicator for a chat.""" + # Cancel any existing typing task for this chat + self._stop_typing(chat_id) + self._typing_tasks[chat_id] = asyncio.create_task(self._typing_loop(chat_id)) + + def _stop_typing(self, chat_id: str) -> None: + """Stop the typing indicator for a chat.""" + task = self._typing_tasks.pop(chat_id, None) + if task and not task.done(): + task.cancel() + + async def _typing_loop(self, chat_id: str) -> None: + """Repeatedly send 'typing' action until cancelled.""" + try: + while self._app: + await self._app.bot.send_chat_action(chat_id=int(chat_id), action="typing") + await asyncio.sleep(4) + except asyncio.CancelledError: + pass + except Exception as e: + logger.debug(f"Typing indicator stopped for {chat_id}: {e}") + def _get_extension(self, media_type: str, mime_type: str | None) -> str: """Get file extension based on media type.""" if mime_type: diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index c14a6c3..6e00e9d 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -100,21 +100,25 @@ class WhatsAppChannel(BaseChannel): if msg_type == "message": # Incoming message from WhatsApp + # Deprecated by whatsapp: old phone number style typically: @s.whatspp.net + pn = data.get("pn", "") + # New LID sytle typically: sender = data.get("sender", "") content = data.get("content", "") - # sender is typically: @s.whatsapp.net - # Extract just the phone number as chat_id - chat_id = sender.split("@")[0] if "@" in sender else sender + # Extract just the phone number or lid as chat_id + user_id = pn if pn else sender + sender_id = user_id.split("@")[0] if "@" in user_id else user_id + logger.info(f"Sender {sender}") # Handle voice transcription if it's a voice message if content == "[Voice Message]": - logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.") + logger.info(f"Voice message received from {sender_id}, but direct download from bridge is not yet supported.") content = "[Voice Message: Transcription not available for WhatsApp yet]" await self._handle_message( - sender_id=chat_id, - chat_id=sender, # Use full JID for replies + sender_id=sender_id, + chat_id=sender, # Use full LID for replies content=content, metadata={ "message_id": data.get("id"), diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index bc2ea74..a1f426e 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -1,11 +1,19 @@ """CLI commands for nanobot.""" import asyncio +import atexit +import os +import signal from pathlib import Path +import select +import sys import typer from rich.console import Console +from rich.markdown import Markdown +from rich.panel import Panel from rich.table import Table +from rich.text import Text from nanobot import __version__, __logo__ @@ -16,6 +24,146 @@ app = typer.Typer( ) console = Console() +EXIT_COMMANDS = {"exit", "quit", "/exit", "/quit", ":q"} + +# --------------------------------------------------------------------------- +# Lightweight CLI input: readline for arrow keys / history, termios for flush +# --------------------------------------------------------------------------- + +_READLINE = None +_HISTORY_FILE: Path | None = None +_HISTORY_HOOK_REGISTERED = False +_USING_LIBEDIT = False +_SAVED_TERM_ATTRS = None # original termios settings, restored on exit + + +def _flush_pending_tty_input() -> None: + """Drop unread keypresses typed while the model was generating output.""" + try: + fd = sys.stdin.fileno() + if not os.isatty(fd): + return + except Exception: + return + + try: + import termios + termios.tcflush(fd, termios.TCIFLUSH) + return + except Exception: + pass + + try: + while True: + ready, _, _ = select.select([fd], [], [], 0) + if not ready: + break + if not os.read(fd, 4096): + break + except Exception: + return + + +def _save_history() -> None: + if _READLINE is None or _HISTORY_FILE is None: + return + try: + _READLINE.write_history_file(str(_HISTORY_FILE)) + except Exception: + return + + +def _restore_terminal() -> None: + """Restore terminal to its original state (echo, line buffering, etc.).""" + if _SAVED_TERM_ATTRS is None: + return + try: + import termios + termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN, _SAVED_TERM_ATTRS) + except Exception: + pass + + +def _enable_line_editing() -> None: + """Enable readline for arrow keys, line editing, and persistent history.""" + global _READLINE, _HISTORY_FILE, _HISTORY_HOOK_REGISTERED, _USING_LIBEDIT, _SAVED_TERM_ATTRS + + # Save terminal state before readline touches it + try: + import termios + _SAVED_TERM_ATTRS = termios.tcgetattr(sys.stdin.fileno()) + except Exception: + pass + + history_file = Path.home() / ".nanobot" / "history" / "cli_history" + history_file.parent.mkdir(parents=True, exist_ok=True) + _HISTORY_FILE = history_file + + try: + import readline + except ImportError: + return + + _READLINE = readline + _USING_LIBEDIT = "libedit" in (readline.__doc__ or "").lower() + + try: + if _USING_LIBEDIT: + readline.parse_and_bind("bind ^I rl_complete") + else: + readline.parse_and_bind("tab: complete") + readline.parse_and_bind("set editing-mode emacs") + except Exception: + pass + + try: + readline.read_history_file(str(history_file)) + except Exception: + pass + + if not _HISTORY_HOOK_REGISTERED: + atexit.register(_save_history) + _HISTORY_HOOK_REGISTERED = True + + +def _prompt_text() -> str: + """Build a readline-friendly colored prompt.""" + if _READLINE is None: + return "You: " + # libedit on macOS does not honor GNU readline non-printing markers. + if _USING_LIBEDIT: + return "\033[1;34mYou:\033[0m " + return "\001\033[1;34m\002You:\001\033[0m\002 " + + +def _print_agent_response(response: str, render_markdown: bool) -> None: + """Render assistant response with consistent terminal styling.""" + content = response or "" + body = Markdown(content) if render_markdown else Text(content) + console.print() + console.print( + Panel( + body, + title=f"{__logo__} nanobot", + title_align="left", + border_style="cyan", + padding=(0, 1), + ) + ) + console.print() + + +def _is_exit_command(command: str) -> bool: + """Return True when input should end interactive chat.""" + return command.lower() in EXIT_COMMANDS + + +async def _read_interactive_input_async() -> str: + """Read user input with arrow keys and history (runs input() in a thread).""" + try: + return await asyncio.to_thread(input, _prompt_text()) + except EOFError as exc: + raise KeyboardInterrupt from exc def version_callback(value: bool): @@ -147,6 +295,24 @@ This file stores important information that should persist across sessions. console.print(" [dim]Created memory/MEMORY.md[/dim]") +def _make_provider(config): + """Create LiteLLMProvider from config. Exits if no API key found.""" + from nanobot.providers.litellm_provider import LiteLLMProvider + p = config.get_provider() + model = config.agents.defaults.model + if not (p and p.api_key) and not model.startswith("bedrock/"): + console.print("[red]Error: No API key configured.[/red]") + console.print("Set one in ~/.nanobot/config.json under providers section") + raise typer.Exit(1) + return LiteLLMProvider( + api_key=p.api_key if p else None, + api_base=config.get_api_base(), + default_model=model, + extra_headers=p.extra_headers if p else None, + provider_name=config.get_provider_name(), + ) + + # ============================================================================ # Gateway / Server # ============================================================================ @@ -160,9 +326,9 @@ def gateway( """Start the nanobot gateway.""" from nanobot.config.loader import load_config, get_data_dir from nanobot.bus.queue import MessageBus - from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.agent.loop import AgentLoop from nanobot.channels.manager import ChannelManager + from nanobot.session.manager import SessionManager from nanobot.cron.service import CronService from nanobot.cron.types import CronJob from nanobot.heartbeat.service import HeartbeatService @@ -174,26 +340,9 @@ def gateway( console.print(f"{__logo__} Starting nanobot gateway on port {port}...") config = load_config() - - # Create components bus = MessageBus() - - # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock) - api_key = config.get_api_key() - api_base = config.get_api_base() - model = config.agents.defaults.model - is_bedrock = model.startswith("bedrock/") - - if not api_key and not is_bedrock: - console.print("[red]Error: No API key configured.[/red]") - console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey") - raise typer.Exit(1) - - provider = LiteLLMProvider( - api_key=api_key, - api_base=api_base, - default_model=config.agents.defaults.model - ) + provider = _make_provider(config) + session_manager = SessionManager(config.workspace_path) # Create cron service first (callback set after agent creation) cron_store_path = get_data_dir() / "cron" / "jobs.json" @@ -210,6 +359,7 @@ def gateway( exec_config=config.tools.exec, cron_service=cron, restrict_to_workspace=config.tools.restrict_to_workspace, + session_manager=session_manager, ) # Set cron callback (needs agent) @@ -244,7 +394,7 @@ def gateway( ) # Create channel manager - channels = ChannelManager(config, bus) + channels = ChannelManager(config, bus, session_manager=session_manager) if channels.enabled_channels: console.print(f"[green]βœ“[/green] Channels enabled: {', '.join(channels.enabled_channels)}") @@ -286,30 +436,24 @@ def gateway( def agent( message: str = typer.Option(None, "--message", "-m", help="Message to send to the agent"), session_id: str = typer.Option("cli:default", "--session", "-s", help="Session ID"), + markdown: bool = typer.Option(True, "--markdown/--no-markdown", help="Render assistant output as Markdown"), + logs: bool = typer.Option(False, "--logs/--no-logs", help="Show nanobot runtime logs during chat"), ): """Interact with the agent directly.""" from nanobot.config.loader import load_config from nanobot.bus.queue import MessageBus - from nanobot.providers.litellm_provider import LiteLLMProvider from nanobot.agent.loop import AgentLoop + from loguru import logger config = load_config() - api_key = config.get_api_key() - api_base = config.get_api_base() - model = config.agents.defaults.model - is_bedrock = model.startswith("bedrock/") - - if not api_key and not is_bedrock: - console.print("[red]Error: No API key configured.[/red]") - raise typer.Exit(1) - bus = MessageBus() - provider = LiteLLMProvider( - api_key=api_key, - api_base=api_base, - default_model=config.agents.defaults.model - ) + provider = _make_provider(config) + + if logs: + logger.enable("nanobot") + else: + logger.disable("nanobot") agent_loop = AgentLoop( bus=bus, @@ -320,27 +464,62 @@ def agent( restrict_to_workspace=config.tools.restrict_to_workspace, ) + # Show spinner when logs are off (no output to miss); skip when logs are on + def _thinking_ctx(): + if logs: + from contextlib import nullcontext + return nullcontext() + return console.status("[dim]nanobot is thinking...[/dim]", spinner="dots") + if message: # Single message mode async def run_once(): - response = await agent_loop.process_direct(message, session_id) - console.print(f"\n{__logo__} {response}") + with _thinking_ctx(): + response = await agent_loop.process_direct(message, session_id) + _print_agent_response(response, render_markdown=markdown) asyncio.run(run_once()) else: # Interactive mode - console.print(f"{__logo__} Interactive mode (Ctrl+C to exit)\n") + _enable_line_editing() + console.print(f"{__logo__} Interactive mode (type [bold]exit[/bold] or [bold]Ctrl+C[/bold] to quit)\n") + + # input() runs in a worker thread that can't be cancelled. + # Without this handler, asyncio.run() would hang waiting for it. + def _exit_on_sigint(signum, frame): + _save_history() + _restore_terminal() + console.print("\nGoodbye!") + os._exit(0) + + signal.signal(signal.SIGINT, _exit_on_sigint) async def run_interactive(): while True: try: - user_input = console.input("[bold blue]You:[/bold blue] ") - if not user_input.strip(): + _flush_pending_tty_input() + user_input = await _read_interactive_input_async() + command = user_input.strip() + if not command: continue + + if _is_exit_command(command): + _save_history() + _restore_terminal() + console.print("\nGoodbye!") + break - response = await agent_loop.process_direct(user_input, session_id) - console.print(f"\n{__logo__} {response}\n") + with _thinking_ctx(): + response = await agent_loop.process_direct(user_input, session_id) + _print_agent_response(response, render_markdown=markdown) except KeyboardInterrupt: + _save_history() + _restore_terminal() + console.print("\nGoodbye!") + break + except EOFError: + _save_history() + _restore_terminal() console.print("\nGoodbye!") break @@ -648,21 +827,24 @@ def status(): console.print(f"Workspace: {workspace} {'[green]βœ“[/green]' if workspace.exists() else '[red]βœ—[/red]'}") if config_path.exists(): + from nanobot.providers.registry import PROVIDERS + console.print(f"Model: {config.agents.defaults.model}") - # Check API keys - has_openrouter = bool(config.providers.openrouter.api_key) - has_anthropic = bool(config.providers.anthropic.api_key) - has_openai = bool(config.providers.openai.api_key) - has_gemini = bool(config.providers.gemini.api_key) - has_vllm = bool(config.providers.vllm.api_base) - - console.print(f"OpenRouter API: {'[green]βœ“[/green]' if has_openrouter else '[dim]not set[/dim]'}") - console.print(f"Anthropic API: {'[green]βœ“[/green]' if has_anthropic else '[dim]not set[/dim]'}") - console.print(f"OpenAI API: {'[green]βœ“[/green]' if has_openai else '[dim]not set[/dim]'}") - console.print(f"Gemini API: {'[green]βœ“[/green]' if has_gemini else '[dim]not set[/dim]'}") - vllm_status = f"[green]βœ“ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]" - console.print(f"vLLM/Local: {vllm_status}") + # Check API keys from registry + for spec in PROVIDERS: + p = getattr(config.providers, spec.name, None) + if p is None: + continue + if spec.is_local: + # Local deployments show api_base instead of api_key + if p.api_base: + console.print(f"{spec.label}: [green]βœ“ {p.api_base}[/green]") + else: + console.print(f"{spec.label}: [dim]not set[/dim]") + else: + has_key = bool(p.api_key) + console.print(f"{spec.label}: {'[green]βœ“[/green]' if has_key else '[dim]not set[/dim]'}") if __name__ == "__main__": diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index cc512da..aa9729b 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -30,6 +30,14 @@ class FeishuConfig(BaseModel): allow_from: list[str] = Field(default_factory=list) # Allowed user open_ids +class DingTalkConfig(BaseModel): + """DingTalk channel configuration using Stream mode.""" + enabled: bool = False + client_id: str = "" # AppKey + client_secret: str = "" # AppSecret + allow_from: list[str] = Field(default_factory=list) # Allowed staff_ids + + class DiscordConfig(BaseModel): """Discord channel configuration.""" enabled: bool = False @@ -75,6 +83,7 @@ class ChannelsConfig(BaseModel): telegram: TelegramConfig = Field(default_factory=TelegramConfig) discord: DiscordConfig = Field(default_factory=DiscordConfig) feishu: FeishuConfig = Field(default_factory=FeishuConfig) + dingtalk: DingTalkConfig = Field(default_factory=DingTalkConfig) email: EmailConfig = Field(default_factory=EmailConfig) @@ -96,6 +105,7 @@ class ProviderConfig(BaseModel): """LLM provider configuration.""" api_key: str = "" api_base: str | None = None + extra_headers: dict[str, str] | None = None # Custom headers (e.g. APP-Code for AiHubMix) class ProvidersConfig(BaseModel): @@ -110,6 +120,7 @@ class ProvidersConfig(BaseModel): vllm: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig) moonshot: ProviderConfig = Field(default_factory=ProviderConfig) + aihubmix: ProviderConfig = Field(default_factory=ProviderConfig) # AiHubMix API gateway class GatewayConfig(BaseModel): @@ -154,60 +165,52 @@ class Config(BaseSettings): """Get expanded workspace path.""" return Path(self.agents.defaults.workspace).expanduser() - def _match_provider(self, model: str | None = None) -> ProviderConfig | None: - """Match a provider based on model name.""" - model = (model or self.agents.defaults.model).lower() - # Map of keywords to provider configs - providers = { - "openrouter": self.providers.openrouter, - "deepseek": self.providers.deepseek, - "anthropic": self.providers.anthropic, - "claude": self.providers.anthropic, - "openai": self.providers.openai, - "gpt": self.providers.openai, - "gemini": self.providers.gemini, - "zhipu": self.providers.zhipu, - "glm": self.providers.zhipu, - "zai": self.providers.zhipu, - "dashscope": self.providers.dashscope, - "qwen": self.providers.dashscope, - "groq": self.providers.groq, - "moonshot": self.providers.moonshot, - "kimi": self.providers.moonshot, - "vllm": self.providers.vllm, - } - for keyword, provider in providers.items(): - if keyword in model and provider.api_key: - return provider - return None + def _match_provider(self, model: str | None = None) -> tuple["ProviderConfig | None", str | None]: + """Match provider config and its registry name. Returns (config, spec_name).""" + from nanobot.providers.registry import PROVIDERS + model_lower = (model or self.agents.defaults.model).lower() + + # Match by keyword (order follows PROVIDERS registry) + for spec in PROVIDERS: + p = getattr(self.providers, spec.name, None) + if p and any(kw in model_lower for kw in spec.keywords) and p.api_key: + return p, spec.name + + # Fallback: gateways first, then others (follows registry order) + for spec in PROVIDERS: + p = getattr(self.providers, spec.name, None) + if p and p.api_key: + return p, spec.name + return None, None + + def get_provider(self, model: str | None = None) -> ProviderConfig | None: + """Get matched provider config (api_key, api_base, extra_headers). Falls back to first available.""" + p, _ = self._match_provider(model) + return p + + def get_provider_name(self, model: str | None = None) -> str | None: + """Get the registry name of the matched provider (e.g. "deepseek", "openrouter").""" + _, name = self._match_provider(model) + return name def get_api_key(self, model: str | None = None) -> str | None: - """Get API key for the given model (or default model). Falls back to first available key.""" - # Try matching by model name first - matched = self._match_provider(model) - if matched: - return matched.api_key - # Fallback: return first available key - for provider in [ - self.providers.openrouter, self.providers.deepseek, - self.providers.anthropic, self.providers.openai, - self.providers.gemini, self.providers.zhipu, - self.providers.dashscope, self.providers.moonshot, - self.providers.vllm, self.providers.groq, - ]: - if provider.api_key: - return provider.api_key - return None + """Get API key for the given model. Falls back to first available key.""" + p = self.get_provider(model) + return p.api_key if p else None def get_api_base(self, model: str | None = None) -> str | None: - """Get API base URL based on model name.""" - model = (model or self.agents.defaults.model).lower() - if "openrouter" in model: - return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" - if any(k in model for k in ("zhipu", "glm", "zai")): - return self.providers.zhipu.api_base - if "vllm" in model: - return self.providers.vllm.api_base + """Get API base URL for the given model. Applies default URLs for known gateways.""" + from nanobot.providers.registry import find_by_name + p, name = self._match_provider(model) + if p and p.api_base: + return p.api_base + # Only gateways get a default api_base here. Standard providers + # (like Moonshot) set their base URL via env vars in _setup_env + # to avoid polluting the global litellm.api_base. + if name: + spec = find_by_name(name) + if spec and spec.is_gateway and spec.default_api_base: + return spec.default_api_base return None class Config: diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 08e44ac..c69c38b 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -20,6 +20,7 @@ class LLMResponse: tool_calls: list[ToolCallRequest] = field(default_factory=list) finish_reason: str = "stop" usage: dict[str, int] = field(default_factory=dict) + reasoning_content: str | None = None # Kimi, DeepSeek-R1 etc. @property def has_tool_calls(self) -> bool: diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index a15f05e..9d76c2a 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -1,5 +1,6 @@ """LiteLLM provider implementation for multi-provider support.""" +import json import os from typing import Any @@ -7,6 +8,7 @@ import litellm from litellm import acompletion from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest +from nanobot.providers.registry import find_by_model, find_gateway class LiteLLMProvider(LLMProvider): @@ -14,58 +16,88 @@ class LiteLLMProvider(LLMProvider): LLM provider using LiteLLM for multi-provider support. Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through - a unified interface. + a unified interface. Provider-specific logic is driven by the registry + (see providers/registry.py) β€” no if-elif chains needed here. """ def __init__( self, api_key: str | None = None, api_base: str | None = None, - default_model: str = "anthropic/claude-opus-4-5" + default_model: str = "anthropic/claude-opus-4-5", + extra_headers: dict[str, str] | None = None, + provider_name: str | None = None, ): super().__init__(api_key, api_base) self.default_model = default_model + self.extra_headers = extra_headers or {} - # Detect OpenRouter by api_key prefix or explicit api_base - self.is_openrouter = ( - (api_key and api_key.startswith("sk-or-")) or - (api_base and "openrouter" in api_base) - ) + # Detect gateway / local deployment. + # provider_name (from config key) is the primary signal; + # api_key / api_base are fallback for auto-detection. + self._gateway = find_gateway(provider_name, api_key, api_base) - # Track if using custom endpoint (vLLM, etc.) - self.is_vllm = bool(api_base) and not self.is_openrouter - - # Configure LiteLLM based on provider + # Configure environment variables if api_key: - if self.is_openrouter: - # OpenRouter mode - set key - os.environ["OPENROUTER_API_KEY"] = api_key - elif self.is_vllm: - # vLLM/custom endpoint - uses OpenAI-compatible API - os.environ["HOSTED_VLLM_API_KEY"] = api_key - elif "deepseek" in default_model: - os.environ.setdefault("DEEPSEEK_API_KEY", api_key) - elif "anthropic" in default_model: - os.environ.setdefault("ANTHROPIC_API_KEY", api_key) - elif "openai" in default_model or "gpt" in default_model: - os.environ.setdefault("OPENAI_API_KEY", api_key) - elif "gemini" in default_model.lower(): - os.environ.setdefault("GEMINI_API_KEY", api_key) - elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model: - os.environ.setdefault("ZAI_API_KEY", api_key) - elif "dashscope" in default_model or "qwen" in default_model.lower(): - os.environ.setdefault("DASHSCOPE_API_KEY", api_key) - elif "groq" in default_model: - os.environ.setdefault("GROQ_API_KEY", api_key) - elif "moonshot" in default_model or "kimi" in default_model: - os.environ.setdefault("MOONSHOT_API_KEY", api_key) - os.environ.setdefault("MOONSHOT_API_BASE", api_base or "https://api.moonshot.cn/v1") + self._setup_env(api_key, api_base, default_model) if api_base: litellm.api_base = api_base # Disable LiteLLM logging noise litellm.suppress_debug_info = True + # Drop unsupported parameters for providers (e.g., gpt-5 rejects some params) + litellm.drop_params = True + + def _setup_env(self, api_key: str, api_base: str | None, model: str) -> None: + """Set environment variables based on detected provider.""" + spec = self._gateway or find_by_model(model) + if not spec: + return + + # Gateway/local overrides existing env; standard provider doesn't + if self._gateway: + os.environ[spec.env_key] = api_key + else: + os.environ.setdefault(spec.env_key, api_key) + + # Resolve env_extras placeholders: + # {api_key} β†’ user's API key + # {api_base} β†’ user's api_base, falling back to spec.default_api_base + effective_base = api_base or spec.default_api_base + for env_name, env_val in spec.env_extras: + resolved = env_val.replace("{api_key}", api_key) + resolved = resolved.replace("{api_base}", effective_base) + os.environ.setdefault(env_name, resolved) + + def _resolve_model(self, model: str) -> str: + """Resolve model name by applying provider/gateway prefixes.""" + if self._gateway: + # Gateway mode: apply gateway prefix, skip provider-specific prefixes + prefix = self._gateway.litellm_prefix + if self._gateway.strip_model_prefix: + model = model.split("/")[-1] + if prefix and not model.startswith(f"{prefix}/"): + model = f"{prefix}/{model}" + return model + + # Standard mode: auto-prefix for known providers + spec = find_by_model(model) + if spec and spec.litellm_prefix: + if not any(model.startswith(s) for s in spec.skip_prefixes): + model = f"{spec.litellm_prefix}/{model}" + + return model + + def _apply_model_overrides(self, model: str, kwargs: dict[str, Any]) -> None: + """Apply model-specific parameter overrides from the registry.""" + model_lower = model.lower() + spec = find_by_model(model) + if spec: + for pattern, overrides in spec.model_overrides: + if pattern in model_lower: + kwargs.update(overrides) + return async def chat( self, @@ -88,48 +120,8 @@ class LiteLLMProvider(LLMProvider): Returns: LLMResponse with content and/or tool calls. """ - model = model or self.default_model + model = self._resolve_model(model or self.default_model) - # For OpenRouter, prefix model name if not already prefixed - if self.is_openrouter and not model.startswith("openrouter/"): - model = f"openrouter/{model}" - - # For Zhipu/Z.ai, ensure prefix is present - # Handle cases like "glm-4.7-flash" -> "zai/glm-4.7-flash" - if ("glm" in model.lower() or "zhipu" in model.lower()) and not ( - model.startswith("zhipu/") or - model.startswith("zai/") or - model.startswith("openrouter/") - ): - model = f"zai/{model}" - - # For DashScope/Qwen, ensure dashscope/ prefix - if ("qwen" in model.lower() or "dashscope" in model.lower()) and not ( - model.startswith("dashscope/") or - model.startswith("openrouter/") - ): - model = f"dashscope/{model}" - - # For Moonshot/Kimi, ensure moonshot/ prefix (before vLLM check) - if ("moonshot" in model.lower() or "kimi" in model.lower()) and not ( - model.startswith("moonshot/") or model.startswith("openrouter/") - ): - model = f"moonshot/{model}" - - # For Gemini, ensure gemini/ prefix if not already present - if "gemini" in model.lower() and not model.startswith("gemini/"): - model = f"gemini/{model}" - - - # For vLLM, use hosted_vllm/ prefix per LiteLLM docs - # Convert openai/ prefix to hosted_vllm/ if user specified it - if self.is_vllm: - model = f"hosted_vllm/{model}" - - # kimi-k2.5 only supports temperature=1.0 - if "kimi-k2.5" in model.lower(): - temperature = 1.0 - kwargs: dict[str, Any] = { "model": model, "messages": messages, @@ -137,10 +129,17 @@ class LiteLLMProvider(LLMProvider): "temperature": temperature, } - # Pass api_base directly for custom endpoints (vLLM, etc.) + # Apply model-specific overrides (e.g. kimi-k2.5 temperature) + self._apply_model_overrides(model, kwargs) + + # Pass api_base for custom endpoints if self.api_base: kwargs["api_base"] = self.api_base + # Pass extra headers (e.g. APP-Code for AiHubMix) + if self.extra_headers: + kwargs["extra_headers"] = self.extra_headers + if tools: kwargs["tools"] = tools kwargs["tool_choice"] = "auto" @@ -166,7 +165,6 @@ class LiteLLMProvider(LLMProvider): # Parse arguments from JSON string if needed args = tc.function.arguments if isinstance(args, str): - import json try: args = json.loads(args) except json.JSONDecodeError: @@ -186,11 +184,14 @@ class LiteLLMProvider(LLMProvider): "total_tokens": response.usage.total_tokens, } + reasoning_content = getattr(message, "reasoning_content", None) + return LLMResponse( content=message.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop", usage=usage, + reasoning_content=reasoning_content, ) def get_default_model(self) -> str: diff --git a/nanobot/providers/registry.py b/nanobot/providers/registry.py new file mode 100644 index 0000000..57db4dd --- /dev/null +++ b/nanobot/providers/registry.py @@ -0,0 +1,340 @@ +""" +Provider Registry β€” single source of truth for LLM provider metadata. + +Adding a new provider: + 1. Add a ProviderSpec to PROVIDERS below. + 2. Add a field to ProvidersConfig in config/schema.py. + Done. Env vars, prefixing, config matching, status display all derive from here. + +Order matters β€” it controls match priority and fallback. Gateways first. +Every entry writes out all fields so you can copy-paste as a template. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class ProviderSpec: + """One LLM provider's metadata. See PROVIDERS below for real examples. + + Placeholders in env_extras values: + {api_key} β€” the user's API key + {api_base} β€” api_base from config, or this spec's default_api_base + """ + + # identity + name: str # config field name, e.g. "dashscope" + keywords: tuple[str, ...] # model-name keywords for matching (lowercase) + env_key: str # LiteLLM env var, e.g. "DASHSCOPE_API_KEY" + display_name: str = "" # shown in `nanobot status` + + # model prefixing + litellm_prefix: str = "" # "dashscope" β†’ model becomes "dashscope/{model}" + skip_prefixes: tuple[str, ...] = () # don't prefix if model already starts with these + + # extra env vars, e.g. (("ZHIPUAI_API_KEY", "{api_key}"),) + env_extras: tuple[tuple[str, str], ...] = () + + # gateway / local detection + is_gateway: bool = False # routes any model (OpenRouter, AiHubMix) + is_local: bool = False # local deployment (vLLM, Ollama) + detect_by_key_prefix: str = "" # match api_key prefix, e.g. "sk-or-" + detect_by_base_keyword: str = "" # match substring in api_base URL + default_api_base: str = "" # fallback base URL + + # gateway behavior + strip_model_prefix: bool = False # strip "provider/" before re-prefixing + + # per-model param overrides, e.g. (("kimi-k2.5", {"temperature": 1.0}),) + model_overrides: tuple[tuple[str, dict[str, Any]], ...] = () + + @property + def label(self) -> str: + return self.display_name or self.name.title() + + +# --------------------------------------------------------------------------- +# PROVIDERS β€” the registry. Order = priority. Copy any entry as template. +# --------------------------------------------------------------------------- + +PROVIDERS: tuple[ProviderSpec, ...] = ( + + # === Gateways (detected by api_key / api_base, not model name) ========= + # Gateways can route any model, so they win in fallback. + + # OpenRouter: global gateway, keys start with "sk-or-" + ProviderSpec( + name="openrouter", + keywords=("openrouter",), + env_key="OPENROUTER_API_KEY", + display_name="OpenRouter", + litellm_prefix="openrouter", # claude-3 β†’ openrouter/claude-3 + skip_prefixes=(), + env_extras=(), + is_gateway=True, + is_local=False, + detect_by_key_prefix="sk-or-", + detect_by_base_keyword="openrouter", + default_api_base="https://openrouter.ai/api/v1", + strip_model_prefix=False, + model_overrides=(), + ), + + # AiHubMix: global gateway, OpenAI-compatible interface. + # strip_model_prefix=True: it doesn't understand "anthropic/claude-3", + # so we strip to bare "claude-3" then re-prefix as "openai/claude-3". + ProviderSpec( + name="aihubmix", + keywords=("aihubmix",), + env_key="OPENAI_API_KEY", # OpenAI-compatible + display_name="AiHubMix", + litellm_prefix="openai", # β†’ openai/{model} + skip_prefixes=(), + env_extras=(), + is_gateway=True, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="aihubmix", + default_api_base="https://aihubmix.com/v1", + strip_model_prefix=True, # anthropic/claude-3 β†’ claude-3 β†’ openai/claude-3 + model_overrides=(), + ), + + # === Standard providers (matched by model-name keywords) =============== + + # Anthropic: LiteLLM recognizes "claude-*" natively, no prefix needed. + ProviderSpec( + name="anthropic", + keywords=("anthropic", "claude"), + env_key="ANTHROPIC_API_KEY", + display_name="Anthropic", + litellm_prefix="", + skip_prefixes=(), + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # OpenAI: LiteLLM recognizes "gpt-*" natively, no prefix needed. + ProviderSpec( + name="openai", + keywords=("openai", "gpt"), + env_key="OPENAI_API_KEY", + display_name="OpenAI", + litellm_prefix="", + skip_prefixes=(), + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # DeepSeek: needs "deepseek/" prefix for LiteLLM routing. + ProviderSpec( + name="deepseek", + keywords=("deepseek",), + env_key="DEEPSEEK_API_KEY", + display_name="DeepSeek", + litellm_prefix="deepseek", # deepseek-chat β†’ deepseek/deepseek-chat + skip_prefixes=("deepseek/",), # avoid double-prefix + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # Gemini: needs "gemini/" prefix for LiteLLM. + ProviderSpec( + name="gemini", + keywords=("gemini",), + env_key="GEMINI_API_KEY", + display_name="Gemini", + litellm_prefix="gemini", # gemini-pro β†’ gemini/gemini-pro + skip_prefixes=("gemini/",), # avoid double-prefix + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # Zhipu: LiteLLM uses "zai/" prefix. + # Also mirrors key to ZHIPUAI_API_KEY (some LiteLLM paths check that). + # skip_prefixes: don't add "zai/" when already routed via gateway. + ProviderSpec( + name="zhipu", + keywords=("zhipu", "glm", "zai"), + env_key="ZAI_API_KEY", + display_name="Zhipu AI", + litellm_prefix="zai", # glm-4 β†’ zai/glm-4 + skip_prefixes=("zhipu/", "zai/", "openrouter/", "hosted_vllm/"), + env_extras=( + ("ZHIPUAI_API_KEY", "{api_key}"), + ), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # DashScope: Qwen models, needs "dashscope/" prefix. + ProviderSpec( + name="dashscope", + keywords=("qwen", "dashscope"), + env_key="DASHSCOPE_API_KEY", + display_name="DashScope", + litellm_prefix="dashscope", # qwen-max β†’ dashscope/qwen-max + skip_prefixes=("dashscope/", "openrouter/"), + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), + + # Moonshot: Kimi models, needs "moonshot/" prefix. + # LiteLLM requires MOONSHOT_API_BASE env var to find the endpoint. + # Kimi K2.5 API enforces temperature >= 1.0. + ProviderSpec( + name="moonshot", + keywords=("moonshot", "kimi"), + env_key="MOONSHOT_API_KEY", + display_name="Moonshot", + litellm_prefix="moonshot", # kimi-k2.5 β†’ moonshot/kimi-k2.5 + skip_prefixes=("moonshot/", "openrouter/"), + env_extras=( + ("MOONSHOT_API_BASE", "{api_base}"), + ), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="https://api.moonshot.ai/v1", # intl; use api.moonshot.cn for China + strip_model_prefix=False, + model_overrides=( + ("kimi-k2.5", {"temperature": 1.0}), + ), + ), + + # === Local deployment (matched by config key, NOT by api_base) ========= + + # vLLM / any OpenAI-compatible local server. + # Detected when config key is "vllm" (provider_name="vllm"). + ProviderSpec( + name="vllm", + keywords=("vllm",), + env_key="HOSTED_VLLM_API_KEY", + display_name="vLLM/Local", + litellm_prefix="hosted_vllm", # Llama-3-8B β†’ hosted_vllm/Llama-3-8B + skip_prefixes=(), + env_extras=(), + is_gateway=False, + is_local=True, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", # user must provide in config + strip_model_prefix=False, + model_overrides=(), + ), + + # === Auxiliary (not a primary LLM provider) ============================ + + # Groq: mainly used for Whisper voice transcription, also usable for LLM. + # Needs "groq/" prefix for LiteLLM routing. Placed last β€” it rarely wins fallback. + ProviderSpec( + name="groq", + keywords=("groq",), + env_key="GROQ_API_KEY", + display_name="Groq", + litellm_prefix="groq", # llama3-8b-8192 β†’ groq/llama3-8b-8192 + skip_prefixes=("groq/",), # avoid double-prefix + env_extras=(), + is_gateway=False, + is_local=False, + detect_by_key_prefix="", + detect_by_base_keyword="", + default_api_base="", + strip_model_prefix=False, + model_overrides=(), + ), +) + + +# --------------------------------------------------------------------------- +# Lookup helpers +# --------------------------------------------------------------------------- + +def find_by_model(model: str) -> ProviderSpec | None: + """Match a standard provider by model-name keyword (case-insensitive). + Skips gateways/local β€” those are matched by api_key/api_base instead.""" + model_lower = model.lower() + for spec in PROVIDERS: + if spec.is_gateway or spec.is_local: + continue + if any(kw in model_lower for kw in spec.keywords): + return spec + return None + + +def find_gateway( + provider_name: str | None = None, + api_key: str | None = None, + api_base: str | None = None, +) -> ProviderSpec | None: + """Detect gateway/local provider. + + Priority: + 1. provider_name β€” if it maps to a gateway/local spec, use it directly. + 2. api_key prefix β€” e.g. "sk-or-" β†’ OpenRouter. + 3. api_base keyword β€” e.g. "aihubmix" in URL β†’ AiHubMix. + + A standard provider with a custom api_base (e.g. DeepSeek behind a proxy) + will NOT be mistaken for vLLM β€” the old fallback is gone. + """ + # 1. Direct match by config key + if provider_name: + spec = find_by_name(provider_name) + if spec and (spec.is_gateway or spec.is_local): + return spec + + # 2. Auto-detect by api_key prefix / api_base keyword + for spec in PROVIDERS: + if spec.detect_by_key_prefix and api_key and api_key.startswith(spec.detect_by_key_prefix): + return spec + if spec.detect_by_base_keyword and api_base and spec.detect_by_base_keyword in api_base: + return spec + + return None + + +def find_by_name(name: str) -> ProviderSpec | None: + """Find a provider spec by config field name, e.g. "dashscope".""" + for spec in PROVIDERS: + if spec.name == name: + return spec + return None diff --git a/pyproject.toml b/pyproject.toml index 2a952a1..6fda084 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "nanobot-ai" -version = "0.1.3.post4" +version = "0.1.3.post5" description = "A lightweight personal AI assistant framework" requires-python = ">=3.11" license = {text = "MIT"} @@ -23,13 +23,15 @@ dependencies = [ "pydantic-settings>=2.0.0", "websockets>=12.0", "websocket-client>=1.6.0", - "httpx>=0.25.0", + "httpx[socks]>=0.25.0", "loguru>=0.7.0", "readability-lxml>=0.8.0", "rich>=13.0.0", "croniter>=2.0.0", - "python-telegram-bot>=21.0", + "dingtalk-stream>=0.4.0", + "python-telegram-bot[socks]>=21.0", "lark-oapi>=1.0.0", + "socksio>=1.0.0", ] [project.optional-dependencies]