From 2dd284661d04328d95c7225d6c51c50280fe107e Mon Sep 17 00:00:00 2001 From: Manus AI Date: Sun, 1 Feb 2026 14:36:15 -0500 Subject: [PATCH 01/24] feat: add Zhipu API support and set glm-4.7-flash as default model --- nanobot/cli/commands.py | 4 +++- nanobot/config/schema.py | 10 +++++++--- nanobot/providers/litellm_provider.py | 13 ++++++++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 05513f0..79837ab 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -68,7 +68,7 @@ def onboard(): console.print(f"\n{__logo__} nanobot is ready!") console.print("\nNext steps:") console.print(" 1. Add your API key to [cyan]~/.nanobot/config.json[/cyan]") - console.print(" Get one at: https://openrouter.ai/keys") + console.print(" Get one at: https://openrouter.ai/keys or https://bigmodel.cn/ (Zhipu AI)") console.print(" 2. Chat: [cyan]nanobot agent -m \"Hello!\"[/cyan]") console.print("\n[dim]Want Telegram/WhatsApp? See: https://github.com/HKUDS/nanobot#-chat-apps[/dim]") @@ -624,10 +624,12 @@ def status(): has_openrouter = bool(config.providers.openrouter.api_key) has_anthropic = bool(config.providers.anthropic.api_key) has_openai = bool(config.providers.openai.api_key) + has_zhipu = bool(config.providers.zhipu.api_key) console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}") console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}") console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}") + console.print(f"Zhipu AI API: {'[green]✓[/green]' if has_zhipu else '[dim]not set[/dim]'}") if __name__ == "__main__": diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 06c36e6..58089a4 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -28,7 +28,7 @@ class ChannelsConfig(BaseModel): class AgentDefaults(BaseModel): """Default agent configuration.""" workspace: str = "~/.nanobot/workspace" - model: str = "anthropic/claude-opus-4-5" + model: str = "glm-4.7-flash" max_tokens: int = 8192 temperature: float = 0.7 max_tool_iterations: int = 20 @@ -50,6 +50,7 @@ class ProvidersConfig(BaseModel): anthropic: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + zhipu: ProviderConfig = Field(default_factory=ProviderConfig) class GatewayConfig(BaseModel): @@ -88,18 +89,21 @@ class Config(BaseSettings): return Path(self.agents.defaults.workspace).expanduser() def get_api_key(self) -> str | None: - """Get API key in priority order: OpenRouter > Anthropic > OpenAI.""" + """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Zhipu.""" return ( self.providers.openrouter.api_key or self.providers.anthropic.api_key or self.providers.openai.api_key or + self.providers.zhipu.api_key or None ) def get_api_base(self) -> str | None: - """Get API base URL if using OpenRouter.""" + """Get API base URL if using OpenRouter or Zhipu.""" if self.providers.openrouter.api_key: return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" + if self.providers.zhipu.api_key: + return self.providers.zhipu.api_base return None class Config: diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index c84aa74..c71e776 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -21,7 +21,7 @@ class LiteLLMProvider(LLMProvider): self, api_key: str | None = None, api_base: str | None = None, - default_model: str = "anthropic/claude-opus-4-5" + default_model: str = "glm-4.7-flash" ): super().__init__(api_key, api_base) self.default_model = default_model @@ -41,6 +41,8 @@ class LiteLLMProvider(LLMProvider): os.environ.setdefault("ANTHROPIC_API_KEY", api_key) elif "openai" in default_model or "gpt" in default_model: os.environ.setdefault("OPENAI_API_KEY", api_key) + elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model: + os.environ.setdefault("ZHIPUAI_API_KEY", api_key) if api_base: litellm.api_base = api_base @@ -75,6 +77,15 @@ class LiteLLMProvider(LLMProvider): if self.is_openrouter and not model.startswith("openrouter/"): model = f"openrouter/{model}" + # For Zhipu/Z.ai, ensure prefix is present + # Handle cases like "glm-4.7-flash" -> "zhipu/glm-4.7-flash" + if ("glm" in model.lower() or "zhipu" in model.lower()) and not ( + model.startswith("zhipu/") or + model.startswith("zai/") or + model.startswith("openrouter/") + ): + model = f"zhipu/{model}" + kwargs: dict[str, Any] = { "model": model, "messages": messages, From ab45185ed893d483605bc79f2c8143a388161855 Mon Sep 17 00:00:00 2001 From: Anunay Aatipamula Date: Mon, 2 Feb 2026 11:21:41 +0530 Subject: [PATCH 02/24] feat: add Gemini provider support - Update configuration schema to include Gemini provider - Modify API key retrieval priority to include Gemini - Enhance CLI status command to display Gemini API status - Update LiteLLMProvider to support Gemini integration --- nanobot/cli/commands.py | 2 ++ nanobot/config/schema.py | 4 +++- nanobot/providers/litellm_provider.py | 8 +++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 1c04d17..6e37aec 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -624,11 +624,13 @@ def status(): has_openrouter = bool(config.providers.openrouter.api_key) has_anthropic = bool(config.providers.anthropic.api_key) has_openai = bool(config.providers.openai.api_key) + has_gemini = bool(config.providers.gemini.api_key) has_vllm = bool(config.providers.vllm.api_base) console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}") console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}") console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}") + console.print(f"Gemini API: {'[green]✓[/green]' if has_gemini else '[dim]not set[/dim]'}") vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]" console.print(f"vLLM/Local: {vllm_status}") diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 6414cf9..0db887e 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -51,6 +51,7 @@ class ProvidersConfig(BaseModel): openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig) + gemini: ProviderConfig = Field(default_factory=ProviderConfig) class GatewayConfig(BaseModel): @@ -89,11 +90,12 @@ class Config(BaseSettings): return Path(self.agents.defaults.workspace).expanduser() def get_api_key(self) -> str | None: - """Get API key in priority order: OpenRouter > Anthropic > OpenAI > vLLM.""" + """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > vLLM.""" return ( self.providers.openrouter.api_key or self.providers.anthropic.api_key or self.providers.openai.api_key or + self.providers.gemini.api_key or self.providers.vllm.api_key or None ) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 4e7305b..4502c8f 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -13,7 +13,7 @@ class LiteLLMProvider(LLMProvider): """ LLM provider using LiteLLM for multi-provider support. - Supports OpenRouter, Anthropic, OpenAI, and many other providers through + Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through a unified interface. """ @@ -47,6 +47,8 @@ class LiteLLMProvider(LLMProvider): os.environ.setdefault("ANTHROPIC_API_KEY", api_key) elif "openai" in default_model or "gpt" in default_model: os.environ.setdefault("OPENAI_API_KEY", api_key) + elif "gemini" in default_model.lower(): + os.environ.setdefault("GEMINI_API_KEY", api_key) if api_base: litellm.api_base = api_base @@ -86,6 +88,10 @@ class LiteLLMProvider(LLMProvider): if self.is_vllm: model = f"hosted_vllm/{model}" + # For Gemini, ensure gemini/ prefix if not already present + if "gemini" in model.lower() and not model.startswith("gemini/"): + model = f"gemini/{model}" + kwargs: dict[str, Any] = { "model": model, "messages": messages, From f4b081b83f28014c07a63b3a1b39e5d3f75398fc Mon Sep 17 00:00:00 2001 From: Yitong Li Date: Mon, 2 Feb 2026 15:32:12 +0800 Subject: [PATCH 03/24] feat: add vision support for image recognition in Telegram --- nanobot/agent/context.py | 70 ++++++++++++++++++++++++++++++++++------ nanobot/agent/loop.py | 3 +- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index aaba890..32585f5 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -1,8 +1,12 @@ """Context builder for assembling agent prompts.""" +import base64 +import mimetypes from pathlib import Path from typing import Any +from loguru import logger + from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader @@ -114,32 +118,80 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" self, history: list[dict[str, Any]], current_message: str, - skill_names: list[str] | None = None + skill_names: list[str] | None = None, + media: list[str] | None = None, ) -> list[dict[str, Any]]: """ Build the complete message list for an LLM call. - + Args: history: Previous conversation messages. current_message: The new user message. skill_names: Optional skills to include. - + media: Optional list of local file paths for images/media. + Returns: List of messages including system prompt. """ messages = [] - + # System prompt system_prompt = self.build_system_prompt(skill_names) messages.append({"role": "system", "content": system_prompt}) - + # History messages.extend(history) - - # Current message - messages.append({"role": "user", "content": current_message}) - + + # Current message (with optional image attachments) + user_content = self._build_user_content(current_message, media) + messages.append({"role": "user", "content": user_content}) + return messages + + def _build_user_content( + self, text: str, media: list[str] | None + ) -> str | list[dict[str, Any]]: + """ + Build user message content, optionally with images. + + Returns a plain string if no media, or a multimodal content list + with base64-encoded images. + """ + if not media: + return text + + content: list[dict[str, Any]] = [] + + for path in media: + encoded = self._encode_image(path) + if encoded: + content.append(encoded) + + if not content: + return text + + content.append({"type": "text", "text": text}) + return content + + @staticmethod + def _encode_image(file_path: str) -> dict[str, Any] | None: + """Encode a local image file to a base64 data URL for the LLM.""" + path = Path(file_path) + if not path.is_file(): + logger.warning(f"Media file not found: {file_path}") + return None + + mime, _ = mimetypes.guess_type(file_path) + if not mime or not mime.startswith("image/"): + logger.warning(f"Unsupported media type for {file_path}: {mime}") + return None + + data = path.read_bytes() + b64 = base64.b64encode(data).decode("utf-8") + return { + "type": "image_url", + "image_url": {"url": f"data:{mime};base64,{b64}"}, + } def add_tool_result( self, diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 6fe2cfd..3925a44 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -152,7 +152,8 @@ class AgentLoop: # Build initial messages (use get_history for LLM-formatted messages) messages = self.context.build_messages( history=session.get_history(), - current_message=msg.content + current_message=msg.content, + media=msg.media if msg.media else None, ) # Agent loop From b7be3a6f9a016f3741261fd14bcdad2b7a7c1640 Mon Sep 17 00:00:00 2001 From: Peter van Eijk Date: Mon, 2 Feb 2026 15:26:17 +0700 Subject: [PATCH 04/24] Add uv as install method --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 1d69635..cd55335 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,14 @@ cd nanobot pip install -e . ``` +**Install with uv** + +```bash +uv venv +source .venv/bin/activate +uv pip install nanobot-ai +``` + ## 🚀 Quick Start > [!TIP] From 20227f1f04ea195ec681477f03905f4954cc9f9a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 08:55:21 +0000 Subject: [PATCH 05/24] feat: add Dockerfile with uv-based installation Add a Dockerfile using the official uv Python image (python3.12-bookworm-slim) for fast dependency installation. Includes Node.js 20 for the WhatsApp bridge, dependency layer caching, and exposes the gateway port (18790). https://claude.ai/code/session_011C1h1NERqqZp4ht3Pqpwkc --- .dockerignore | 13 +++++++++++++ Dockerfile | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..020b9ec --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +*.egg-info +dist/ +build/ +.git +.env +.assets +node_modules/ +bridge/dist/ +workspace/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..21a502a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim + +# Install Node.js 20 for the WhatsApp bridge +RUN apt-get update && \ + apt-get install -y --no-install-recommends curl ca-certificates gnupg && \ + mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends nodejs && \ + apt-get purge -y gnupg && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install Python dependencies first (cached layer) +COPY pyproject.toml README.md LICENSE ./ +RUN mkdir -p nanobot && touch nanobot/__init__.py && \ + uv pip install --system --no-cache . && \ + rm -rf nanobot + +# Copy the full source and install +COPY nanobot/ nanobot/ +COPY bridge/ bridge/ +RUN uv pip install --system --no-cache . + +# Build the WhatsApp bridge +WORKDIR /app/bridge +RUN npm install && npm run build +WORKDIR /app + +# Create config directory +RUN mkdir -p /root/.nanobot + +# Gateway default port +EXPOSE 18790 + +ENTRYPOINT ["nanobot"] +CMD ["gateway"] From 6df4a56586d5411389f985d63b1b4b17b6d5960b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 08:59:57 +0000 Subject: [PATCH 06/24] test: add script to verify Dockerfile builds and nanobot status works Builds the image, runs onboard + status inside the container, and validates that the expected output fields (Config, Workspace, Model, API keys) are present. https://claude.ai/code/session_011C1h1NERqqZp4ht3Pqpwkc --- test_docker.sh | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100755 test_docker.sh diff --git a/test_docker.sh b/test_docker.sh new file mode 100755 index 0000000..a90e080 --- /dev/null +++ b/test_docker.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +IMAGE_NAME="nanobot-test" + +echo "=== Building Docker image ===" +docker build -t "$IMAGE_NAME" . + +echo "" +echo "=== Running 'nanobot onboard' ===" +docker run --name nanobot-test-run "$IMAGE_NAME" onboard + +echo "" +echo "=== Running 'nanobot status' ===" +STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \ + docker run --rm nanobot-test-onboarded status 2>&1) || true + +echo "$STATUS_OUTPUT" + +echo "" +echo "=== Validating output ===" +PASS=true + +check() { + if echo "$STATUS_OUTPUT" | grep -q "$1"; then + echo " PASS: found '$1'" + else + echo " FAIL: missing '$1'" + PASS=false + fi +} + +check "nanobot Status" +check "Config:" +check "Workspace:" +check "Model:" +check "OpenRouter API:" +check "Anthropic API:" +check "OpenAI API:" + +echo "" +if $PASS; then + echo "=== All checks passed ===" +else + echo "=== Some checks FAILED ===" + exit 1 +fi + +# Cleanup +echo "" +echo "=== Cleanup ===" +docker rm -f nanobot-test-run 2>/dev/null || true +docker rmi -f nanobot-test-onboarded 2>/dev/null || true +docker rmi -f "$IMAGE_NAME" 2>/dev/null || true +echo "Done." From fa25856d8cefda9f0e8559d71006f72bdcb614ed Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 09:07:23 +0000 Subject: [PATCH 07/24] fix: create stub bridge/ dir in dependency caching layer Hatchling's force-include requires bridge/ to exist at build time. The dependency caching step now stubs both nanobot/ and bridge/. https://claude.ai/code/session_011C1h1NERqqZp4ht3Pqpwkc --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 21a502a..5244f1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,9 @@ WORKDIR /app # Install Python dependencies first (cached layer) COPY pyproject.toml README.md LICENSE ./ -RUN mkdir -p nanobot && touch nanobot/__init__.py && \ +RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \ uv pip install --system --no-cache . && \ - rm -rf nanobot + rm -rf nanobot bridge # Copy the full source and install COPY nanobot/ nanobot/ From f7e8e73c5414c1e1bdb600da56ec1bc168b3b65b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 09:17:24 +0000 Subject: [PATCH 08/24] fix: add git to Dockerfile for npm bridge dependency install A bridge npm dependency requires git to be present at install time. https://claude.ai/code/session_011C1h1NERqqZp4ht3Pqpwkc --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5244f1a..4287944 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim # Install Node.js 20 for the WhatsApp bridge RUN apt-get update && \ - apt-get install -y --no-install-recommends curl ca-certificates gnupg && \ + apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \ mkdir -p /etc/apt/keyrings && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \ From 7fced16e4c73b08e2cb632542735de9ec63fd6d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 2 Feb 2026 09:22:13 +0000 Subject: [PATCH 09/24] docs: add Docker build/run instructions to README https://claude.ai/code/session_011C1h1NERqqZp4ht3Pqpwkc --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 167ae22..71d425b 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,16 @@ nanobot agent -m "What is 2+2?" That's it! You have a working AI assistant in 2 minutes. +## 🐳 Docker + +```bash +docker build -t nanobot . +docker run --rm nanobot onboard +docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot +``` + +Mount `~/.nanobot` so your config and workspace persist across runs. Edit `~/.nanobot/config.json` on the host to add API keys, then restart the container. + ## 🖥️ Local Models (vLLM) Run nanobot with your own local models using vLLM or any OpenAI-compatible server. From f61e7a596870a131a6a9a9721cd8849af876ef9b Mon Sep 17 00:00:00 2001 From: Manus Date: Mon, 2 Feb 2026 04:24:14 -0500 Subject: [PATCH 10/24] feat: improve tool execution logging (fix #10) --- nanobot/agent/loop.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 6fe2cfd..1d2b070 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -189,7 +189,8 @@ class AgentLoop: # Execute tools for tool_call in response.tool_calls: - logger.debug(f"Executing tool: {tool_call.name}") + args_str = json.dumps(tool_call.arguments) + logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}") result = await self.tools.execute(tool_call.name, tool_call.arguments) messages = self.context.add_tool_result( messages, tool_call.id, tool_call.name, result @@ -281,7 +282,8 @@ class AgentLoop: ) for tool_call in response.tool_calls: - logger.debug(f"Executing tool: {tool_call.name}") + args_str = json.dumps(tool_call.arguments) + logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}") result = await self.tools.execute(tool_call.name, tool_call.arguments) messages = self.context.add_tool_result( messages, tool_call.id, tool_call.name, result From eaf494ea31fc84b860efb3e2f0d96c5d6a1e940e Mon Sep 17 00:00:00 2001 From: Manus AI Date: Mon, 2 Feb 2026 04:30:15 -0500 Subject: [PATCH 11/24] docs: add uv installation instructions (fixes #5) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 167ae22..ab1f947 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,12 @@ ## 📦 Install +**Install with [uv](https://github.com/astral-sh/uv)** (recommended for speed) + +```bash +uv tool install nanobot-ai +``` + **Install from PyPi** ```bash From 42f62c0c1aeb4bae98949b670bd698bc40e62c11 Mon Sep 17 00:00:00 2001 From: Manus AI Date: Mon, 2 Feb 2026 04:33:26 -0500 Subject: [PATCH 12/24] feat: add voice transcription support with groq (fixes #13) --- README.md | 1 + bridge/src/whatsapp.ts | 5 +++ nanobot/channels/manager.py | 4 ++ nanobot/channels/telegram.py | 20 ++++++++- nanobot/channels/whatsapp.py | 5 +++ nanobot/config/schema.py | 4 +- nanobot/providers/litellm_provider.py | 2 + nanobot/providers/transcription.py | 65 +++++++++++++++++++++++++++ 8 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 nanobot/providers/transcription.py diff --git a/README.md b/README.md index ab1f947..ec73b51 100644 --- a/README.md +++ b/README.md @@ -329,6 +329,7 @@ nanobot/ ## 🗺️ Roadmap +- [x] **Voice Transcription** — Support for Groq Whisper (Issue #13) - [ ] **Multi-modal** — See and hear (images, voice, video) - [ ] **Long-term memory** — Never forget important context - [ ] **Better reasoning** — Multi-step planning and reflection diff --git a/bridge/src/whatsapp.ts b/bridge/src/whatsapp.ts index 4185632..a3a82fc 100644 --- a/bridge/src/whatsapp.ts +++ b/bridge/src/whatsapp.ts @@ -160,6 +160,11 @@ export class WhatsAppClient { return `[Document] ${message.documentMessage.caption}`; } + // Voice/Audio message + if (message.audioMessage) { + return `[Voice Message]`; + } + return null; } diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 04abf5f..c32aa3d 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -36,6 +36,8 @@ class ChannelManager: if self.config.channels.telegram.enabled: try: from nanobot.channels.telegram import TelegramChannel + # Inject parent config for access to providers + self.config.channels.telegram.parent = self.config self.channels["telegram"] = TelegramChannel( self.config.channels.telegram, self.bus ) @@ -47,6 +49,8 @@ class ChannelManager: if self.config.channels.whatsapp.enabled: try: from nanobot.channels.whatsapp import WhatsAppChannel + # Inject parent config for access to providers + self.config.channels.whatsapp.parent = self.config self.channels["whatsapp"] = WhatsAppChannel( self.config.channels.whatsapp, self.bus ) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 840c250..dc2f77c 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -247,7 +247,25 @@ class TelegramChannel(BaseChannel): await file.download_to_drive(str(file_path)) media_paths.append(str(file_path)) - content_parts.append(f"[{media_type}: {file_path}]") + + # Handle voice transcription + if media_type == "voice" or media_type == "audio": + from nanobot.providers.transcription import GroqTranscriptionProvider + # Try to get Groq API key from config + groq_key = None + if hasattr(self.config, 'parent') and hasattr(self.config.parent, 'providers'): + groq_key = self.config.parent.providers.groq.api_key + + transcriber = GroqTranscriptionProvider(api_key=groq_key) + transcription = await transcriber.transcribe(file_path) + if transcription: + logger.info(f"Transcribed {media_type}: {transcription[:50]}...") + content_parts.append(f"[transcription: {transcription}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + logger.debug(f"Downloaded {media_type} to {file_path}") except Exception as e: logger.error(f"Failed to download media: {e}") diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index efbd3e1..c14a6c3 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel): # Extract just the phone number as chat_id chat_id = sender.split("@")[0] if "@" in sender else sender + # Handle voice transcription if it's a voice message + if content == "[Voice Message]": + logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.") + content = "[Voice Message: Transcription not available for WhatsApp yet]" + await self._handle_message( sender_id=chat_id, chat_id=sender, # Use full JID for replies diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index e30fbb2..ee245f1 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -50,6 +50,7 @@ class ProvidersConfig(BaseModel): anthropic: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + groq: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig) @@ -89,11 +90,12 @@ class Config(BaseSettings): return Path(self.agents.defaults.workspace).expanduser() def get_api_key(self) -> str | None: - """Get API key in priority order: OpenRouter > Anthropic > OpenAI > vLLM.""" + """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Groq > vLLM.""" return ( self.providers.openrouter.api_key or self.providers.anthropic.api_key or self.providers.openai.api_key or + self.providers.groq.api_key or self.providers.vllm.api_key or None ) diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 4e7305b..f8e8456 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -47,6 +47,8 @@ class LiteLLMProvider(LLMProvider): os.environ.setdefault("ANTHROPIC_API_KEY", api_key) elif "openai" in default_model or "gpt" in default_model: os.environ.setdefault("OPENAI_API_KEY", api_key) + elif "groq" in default_model: + os.environ.setdefault("GROQ_API_KEY", api_key) if api_base: litellm.api_base = api_base diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py new file mode 100644 index 0000000..8ce909b --- /dev/null +++ b/nanobot/providers/transcription.py @@ -0,0 +1,65 @@ +"""Voice transcription provider using Groq.""" + +import os +from pathlib import Path +from typing import Any + +import httpx +from loguru import logger + + +class GroqTranscriptionProvider: + """ + Voice transcription provider using Groq's Whisper API. + + Groq offers extremely fast transcription with a generous free tier. + """ + + def __init__(self, api_key: str | None = None): + self.api_key = api_key or os.environ.get("GROQ_API_KEY") + self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions" + + async def transcribe(self, file_path: str | Path) -> str: + """ + Transcribe an audio file using Groq. + + Args: + file_path: Path to the audio file. + + Returns: + Transcribed text. + """ + if not self.api_key: + logger.warning("Groq API key not configured for transcription") + return "" + + path = Path(file_path) + if not path.exists(): + logger.error(f"Audio file not found: {file_path}") + return "" + + try: + async with httpx.AsyncClient() as client: + with open(path, "rb") as f: + files = { + "file": (path.name, f), + "model": (None, "whisper-large-v3"), + } + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + response = await client.post( + self.api_url, + headers=headers, + files=files, + timeout=60.0 + ) + + response.raise_for_status() + data = response.json() + return data.get("text", "") + + except Exception as e: + logger.error(f"Groq transcription error: {e}") + return "" From ae1830acddf6efe997bfac48af5b227caf80d0cd Mon Sep 17 00:00:00 2001 From: Peter van Eijk Date: Mon, 2 Feb 2026 16:36:22 +0700 Subject: [PATCH 13/24] feat: change default command to status --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 4287944..8132747 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,4 +37,4 @@ RUN mkdir -p /root/.nanobot EXPOSE 18790 ENTRYPOINT ["nanobot"] -CMD ["gateway"] +CMD ["status"] From 8d834d9b10876f1fb02376ae9e1d7677fc741d5a Mon Sep 17 00:00:00 2001 From: Re-bin Date: Mon, 2 Feb 2026 12:30:56 +0000 Subject: [PATCH 14/24] restore CLI commands, keep default model --- nanobot/cli/commands.py | 315 ++++++++++++++++++++++---- nanobot/config/schema.py | 2 +- nanobot/providers/litellm_provider.py | 2 +- 3 files changed, 276 insertions(+), 43 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index a4053e7..8dcc460 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -68,7 +68,7 @@ def onboard(): console.print(f"\n{__logo__} nanobot is ready!") console.print("\nNext steps:") console.print(" 1. Add your API key to [cyan]~/.nanobot/config.json[/cyan]") - console.print(" Get one at: https://openrouter.ai/keys or https://bigmodel.cn/ (Zhipu AI)") + console.print(" Get one at: https://openrouter.ai/keys") console.print(" 2. Chat: [cyan]nanobot agent -m \"Hello!\"[/cyan]") console.print("\n[dim]Want Telegram/WhatsApp? See: https://github.com/HKUDS/nanobot#-chat-apps[/dim]") @@ -328,74 +328,307 @@ def agent( response = await agent_loop.process_direct(user_input, session_id) console.print(f"\n{__logo__} {response}\n") - except (KeyboardInterrupt, EOFError): - console.print("\nExiting...") + except KeyboardInterrupt: + console.print("\nGoodbye!") break asyncio.run(run_interactive()) # ============================================================================ -# System Commands +# Channel Commands +# ============================================================================ + + +channels_app = typer.Typer(help="Manage channels") +app.add_typer(channels_app, name="channels") + + +@channels_app.command("status") +def channels_status(): + """Show channel status.""" + from nanobot.config.loader import load_config + + config = load_config() + + table = Table(title="Channel Status") + table.add_column("Channel", style="cyan") + table.add_column("Enabled", style="green") + table.add_column("Bridge URL", style="yellow") + + wa = config.channels.whatsapp + table.add_row( + "WhatsApp", + "✓" if wa.enabled else "✗", + wa.bridge_url + ) + + console.print(table) + + +def _get_bridge_dir() -> Path: + """Get the bridge directory, setting it up if needed.""" + import shutil + import subprocess + + # User's bridge location + user_bridge = Path.home() / ".nanobot" / "bridge" + + # Check if already built + if (user_bridge / "dist" / "index.js").exists(): + return user_bridge + + # Check for npm + if not shutil.which("npm"): + console.print("[red]npm not found. Please install Node.js >= 18.[/red]") + raise typer.Exit(1) + + # Find source bridge: first check package data, then source dir + pkg_bridge = Path(__file__).parent / "bridge" # nanobot/bridge (installed) + src_bridge = Path(__file__).parent.parent.parent / "bridge" # repo root/bridge (dev) + + source = None + if (pkg_bridge / "package.json").exists(): + source = pkg_bridge + elif (src_bridge / "package.json").exists(): + source = src_bridge + + if not source: + console.print("[red]Bridge source not found.[/red]") + console.print("Try reinstalling: pip install --force-reinstall nanobot") + raise typer.Exit(1) + + console.print(f"{__logo__} Setting up bridge...") + + # Copy to user directory + user_bridge.parent.mkdir(parents=True, exist_ok=True) + if user_bridge.exists(): + shutil.rmtree(user_bridge) + shutil.copytree(source, user_bridge, ignore=shutil.ignore_patterns("node_modules", "dist")) + + # Install and build + try: + console.print(" Installing dependencies...") + subprocess.run(["npm", "install"], cwd=user_bridge, check=True, capture_output=True) + + console.print(" Building...") + subprocess.run(["npm", "run", "build"], cwd=user_bridge, check=True, capture_output=True) + + console.print("[green]✓[/green] Bridge ready\n") + except subprocess.CalledProcessError as e: + console.print(f"[red]Build failed: {e}[/red]") + if e.stderr: + console.print(f"[dim]{e.stderr.decode()[:500]}[/dim]") + raise typer.Exit(1) + + return user_bridge + + +@channels_app.command("login") +def channels_login(): + """Link device via QR code.""" + import subprocess + + bridge_dir = _get_bridge_dir() + + console.print(f"{__logo__} Starting bridge...") + console.print("Scan the QR code to connect.\n") + + try: + subprocess.run(["npm", "start"], cwd=bridge_dir, check=True) + except subprocess.CalledProcessError as e: + console.print(f"[red]Bridge failed: {e}[/red]") + except FileNotFoundError: + console.print("[red]npm not found. Please install Node.js.[/red]") + + +# ============================================================================ +# Cron Commands +# ============================================================================ + +cron_app = typer.Typer(help="Manage scheduled tasks") +app.add_typer(cron_app, name="cron") + + +@cron_app.command("list") +def cron_list( + all: bool = typer.Option(False, "--all", "-a", help="Include disabled jobs"), +): + """List scheduled jobs.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + jobs = service.list_jobs(include_disabled=all) + + if not jobs: + console.print("No scheduled jobs.") + return + + table = Table(title="Scheduled Jobs") + table.add_column("ID", style="cyan") + table.add_column("Name") + table.add_column("Schedule") + table.add_column("Status") + table.add_column("Next Run") + + import time + for job in jobs: + # Format schedule + if job.schedule.kind == "every": + sched = f"every {(job.schedule.every_ms or 0) // 1000}s" + elif job.schedule.kind == "cron": + sched = job.schedule.expr or "" + else: + sched = "one-time" + + # Format next run + next_run = "" + if job.state.next_run_at_ms: + next_time = time.strftime("%Y-%m-%d %H:%M", time.localtime(job.state.next_run_at_ms / 1000)) + next_run = next_time + + status = "[green]enabled[/green]" if job.enabled else "[dim]disabled[/dim]" + + table.add_row(job.id, job.name, sched, status, next_run) + + console.print(table) + + +@cron_app.command("add") +def cron_add( + name: str = typer.Option(..., "--name", "-n", help="Job name"), + message: str = typer.Option(..., "--message", "-m", help="Message for agent"), + every: int = typer.Option(None, "--every", "-e", help="Run every N seconds"), + cron_expr: str = typer.Option(None, "--cron", "-c", help="Cron expression (e.g. '0 9 * * *')"), + at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), + deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"), + to: str = typer.Option(None, "--to", help="Recipient for delivery"), +): + """Add a scheduled job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + from nanobot.cron.types import CronSchedule + + # Determine schedule type + if every: + schedule = CronSchedule(kind="every", every_ms=every * 1000) + elif cron_expr: + schedule = CronSchedule(kind="cron", expr=cron_expr) + elif at: + import datetime + dt = datetime.datetime.fromisoformat(at) + schedule = CronSchedule(kind="at", at_ms=int(dt.timestamp() * 1000)) + else: + console.print("[red]Error: Must specify --every, --cron, or --at[/red]") + raise typer.Exit(1) + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + job = service.add_job( + name=name, + schedule=schedule, + message=message, + deliver=deliver, + to=to, + ) + + console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") + + +@cron_app.command("remove") +def cron_remove( + job_id: str = typer.Argument(..., help="Job ID to remove"), +): + """Remove a scheduled job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + if service.remove_job(job_id): + console.print(f"[green]✓[/green] Removed job {job_id}") + else: + console.print(f"[red]Job {job_id} not found[/red]") + + +@cron_app.command("enable") +def cron_enable( + job_id: str = typer.Argument(..., help="Job ID"), + disable: bool = typer.Option(False, "--disable", help="Disable instead of enable"), +): + """Enable or disable a job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + job = service.enable_job(job_id, enabled=not disable) + if job: + status = "disabled" if disable else "enabled" + console.print(f"[green]✓[/green] Job '{job.name}' {status}") + else: + console.print(f"[red]Job {job_id} not found[/red]") + + +@cron_app.command("run") +def cron_run( + job_id: str = typer.Argument(..., help="Job ID to run"), + force: bool = typer.Option(False, "--force", "-f", help="Run even if disabled"), +): + """Manually run a job.""" + from nanobot.config.loader import get_data_dir + from nanobot.cron.service import CronService + + store_path = get_data_dir() / "cron" / "jobs.json" + service = CronService(store_path) + + async def run(): + return await service.run_job(job_id, force=force) + + if asyncio.run(run()): + console.print(f"[green]✓[/green] Job executed") + else: + console.print(f"[red]Failed to run job {job_id}[/red]") + + +# ============================================================================ +# Status Commands # ============================================================================ @app.command() def status(): - """Check nanobot status and configuration.""" + """Show nanobot status.""" from nanobot.config.loader import load_config, get_config_path + from nanobot.utils.helpers import get_workspace_path config_path = get_config_path() - if not config_path.exists(): - console.print("[red]Error: nanobot is not initialized.[/red]") - console.print("Run [cyan]nanobot onboard[/cyan] first.") - raise typer.Exit(1) + workspace = get_workspace_path() - config = load_config() + console.print(f"{__logo__} nanobot Status\n") - console.print(f"{__logo__} [bold]nanobot status[/bold]") - console.print(f"Version: {__version__}") - console.print(f"Config: {config_path}") - console.print(f"Workspace: {config.workspace_path}") + console.print(f"Config: {config_path} {'[green]✓[/green]' if config_path.exists() else '[red]✗[/red]'}") + console.print(f"Workspace: {workspace} {'[green]✓[/green]' if workspace.exists() else '[red]✗[/red]'}") - table = Table(title="Configuration Summary") - table.add_column("Category", style="cyan") - table.add_column("Status", style="green") - - # Channels - enabled_channels = [] - if config.channels.whatsapp.enabled: - enabled_channels.append("WhatsApp") - if config.channels.telegram.enabled: - enabled_channels.append("Telegram") - - table.add_row("Channels", ", ".join(enabled_channels) if enabled_channels else "[dim]none[/dim]") - - # Agent - table.add_row("Default Model", config.agents.defaults.model) - - # Tools - has_brave = bool(config.tools.web.search.api_key) - table.add_row("Web Search", "[green]enabled[/green]" if has_brave else "[dim]disabled[/dim]") - - console.print(table) - - # Detailed API check - with console.status("[bold blue]Checking API providers..."): - console.print(f"\n[bold]API Providers:[/bold]") + if config_path.exists(): + config = load_config() console.print(f"Model: {config.agents.defaults.model}") # Check API keys has_openrouter = bool(config.providers.openrouter.api_key) has_anthropic = bool(config.providers.anthropic.api_key) has_openai = bool(config.providers.openai.api_key) - has_zhipu = bool(config.providers.zhipu.api_key) has_vllm = bool(config.providers.vllm.api_base) console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}") console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}") console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}") - console.print(f"Zhipu AI API: {'[green]✓[/green]' if has_zhipu else '[dim]not set[/dim]'}") vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]" console.print(f"vLLM/Local: {vllm_status}") diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index df0fc5e..5b4ef67 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -28,7 +28,7 @@ class ChannelsConfig(BaseModel): class AgentDefaults(BaseModel): """Default agent configuration.""" workspace: str = "~/.nanobot/workspace" - model: str = "glm-4.7-flash" + model: str = "anthropic/claude-opus-4-5" max_tokens: int = 8192 temperature: float = 0.7 max_tool_iterations: int = 20 diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 07e84cd..3621a7e 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -21,7 +21,7 @@ class LiteLLMProvider(LLMProvider): self, api_key: str | None = None, api_base: str | None = None, - default_model: str = "glm-4.7-flash" + default_model: str = "anthropic/claude-opus-4-5" ): super().__init__(api_key, api_base) self.default_model = default_model From 10f7cf07d9ffb7dcf601134498b7b61310a4588f Mon Sep 17 00:00:00 2001 From: Re-bin Date: Mon, 2 Feb 2026 12:52:05 +0000 Subject: [PATCH 15/24] add contributor list --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 167ae22..5cb7049 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,11 @@ nanobot/ └── cli/ # 🖥️ Commands ``` -## 🗺️ Roadmap +## 🤝 Contribute & Roadmap + +PRs welcome! The codebase is intentionally small and readable. 🤗 + +**Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! - [ ] **Multi-modal** — See and hear (images, voice, video) - [ ] **Long-term memory** — Never forget important context @@ -329,14 +333,16 @@ nanobot/ - [ ] **More integrations** — Discord, Slack, email, calendar - [ ] **Self-improvement** — Learn from feedback and mistakes -**Want to help?** Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! +### Contributors + + + + --- ## ⭐ Star History -*Community Growth Trajectory* - ---- - -## 🤝 Contribute - -PRs welcome! The codebase is intentionally small and readable. 🤗 -

Thanks for visiting ✨ nanobot!

Views From 1663acd1a1d3fc3b03be2943ded0c8ff5f1c4598 Mon Sep 17 00:00:00 2001 From: codeLzq <1293680370@qq.com> Date: Mon, 2 Feb 2026 14:36:24 +0800 Subject: [PATCH 16/24] feat: enhance sender ID handling in Telegram channel - Update sender ID construction to prioritize user ID while maintaining username for allowlist compatibility. - Improve allowlist checking in BaseChannel to support sender IDs with multiple parts separated by '|'. --- nanobot/channels/base.py | 9 ++++++++- nanobot/channels/telegram.py | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index d83367c..8f16399 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -72,7 +72,14 @@ class BaseChannel(ABC): if not allow_list: return True - return str(sender_id) in allow_list + sender_str = str(sender_id) + if sender_str in allow_list: + return True + if "|" in sender_str: + for part in sender_str.split("|"): + if part and part in allow_list: + return True + return False async def _handle_message( self, diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 840c250..ac2dba4 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -199,8 +199,10 @@ class TelegramChannel(BaseChannel): user = update.effective_user chat_id = message.chat_id - # Get sender identifier (prefer username, fallback to user_id) - sender_id = str(user.username or user.id) + # Use stable numeric ID, but keep username for allowlist compatibility + sender_id = str(user.id) + if user.username: + sender_id = f"{sender_id}|{user.username}" # Store chat_id for replies self._chat_ids[sender_id] = chat_id From ac390253c3b3f75ba0425bb86074837228d6be69 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Mon, 2 Feb 2026 13:29:38 +0000 Subject: [PATCH 17/24] simplify vision support code --- nanobot/agent/context.py | 55 ++++++++++------------------------------ 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index 32585f5..f70103d 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -5,8 +5,6 @@ import mimetypes from pathlib import Path from typing import Any -from loguru import logger - from nanobot.agent.memory import MemoryStore from nanobot.agent.skills import SkillsLoader @@ -148,50 +146,23 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" return messages - def _build_user_content( - self, text: str, media: list[str] | None - ) -> str | list[dict[str, Any]]: - """ - Build user message content, optionally with images. - - Returns a plain string if no media, or a multimodal content list - with base64-encoded images. - """ + def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]: + """Build user message content with optional base64-encoded images.""" if not media: return text - - content: list[dict[str, Any]] = [] - + + images = [] for path in media: - encoded = self._encode_image(path) - if encoded: - content.append(encoded) - - if not content: + p = Path(path) + mime, _ = mimetypes.guess_type(path) + if not p.is_file() or not mime or not mime.startswith("image/"): + continue + b64 = base64.b64encode(p.read_bytes()).decode() + images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}) + + if not images: return text - - content.append({"type": "text", "text": text}) - return content - - @staticmethod - def _encode_image(file_path: str) -> dict[str, Any] | None: - """Encode a local image file to a base64 data URL for the LLM.""" - path = Path(file_path) - if not path.is_file(): - logger.warning(f"Media file not found: {file_path}") - return None - - mime, _ = mimetypes.guess_type(file_path) - if not mime or not mime.startswith("image/"): - logger.warning(f"Unsupported media type for {file_path}: {mime}") - return None - - data = path.read_bytes() - b64 = base64.b64encode(data).decode("utf-8") - return { - "type": "image_url", - "image_url": {"url": f"data:{mime};base64,{b64}"}, - } + return images + [{"type": "text", "text": text}] def add_tool_result( self, From 30d6e4b4b6462838d8dbcc76a8f6b04967cfd189 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Mon, 2 Feb 2026 16:18:04 +0000 Subject: [PATCH 18/24] feat: enhance scheduled reminders --- nanobot/cli/commands.py | 2 ++ workspace/AGENTS.md | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index d847710..d293564 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -506,6 +506,7 @@ def cron_add( at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"), to: str = typer.Option(None, "--to", help="Recipient for delivery"), + channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"), ): """Add a scheduled job.""" from nanobot.config.loader import get_data_dir @@ -534,6 +535,7 @@ def cron_add( message=message, deliver=deliver, to=to, + channel=channel, ) console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") diff --git a/workspace/AGENTS.md b/workspace/AGENTS.md index 0e5a055..a99a7b4 100644 --- a/workspace/AGENTS.md +++ b/workspace/AGENTS.md @@ -22,6 +22,16 @@ You have access to: - Use `memory/` directory for daily notes - Use `MEMORY.md` for long-term information +## Scheduled Reminders + +When user asks for a reminder at a specific time, use `exec` to run: +``` +nanobot cron add --name "reminder" --message "Your message" --at "YYYY-MM-DDTHH:MM:SS" --deliver --to "USER_ID" --channel "CHANNEL" +``` +Get USER_ID and CHANNEL from the current session (e.g., `8281248569` and `telegram` from `telegram:8281248569`). + +**Do NOT just write reminders to MEMORY.md** — that won't trigger actual notifications. + ## Heartbeat Tasks `HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: From 1af404c4d90501708338efb6f741ba45c93822be Mon Sep 17 00:00:00 2001 From: tlguszz1010 Date: Tue, 3 Feb 2026 14:08:36 +0900 Subject: [PATCH 19/24] docs: update news date from 2025 to 2026 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 358d23e..f7706d7 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ ## 📢 News -- **2025-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot! +- **2026-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot! ## Key Features of nanobot: From 8989adc9aecd409309c6f472b1022d1eada9d58d Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 3 Feb 2026 06:36:58 +0000 Subject: [PATCH 20/24] refactor: use explicit dependency injection for groq_api_key --- nanobot/channels/manager.py | 8 +++----- nanobot/channels/telegram.py | 10 +++------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index c32aa3d..73c3334 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -36,10 +36,10 @@ class ChannelManager: if self.config.channels.telegram.enabled: try: from nanobot.channels.telegram import TelegramChannel - # Inject parent config for access to providers - self.config.channels.telegram.parent = self.config self.channels["telegram"] = TelegramChannel( - self.config.channels.telegram, self.bus + self.config.channels.telegram, + self.bus, + groq_api_key=self.config.providers.groq.api_key, ) logger.info("Telegram channel enabled") except ImportError as e: @@ -49,8 +49,6 @@ class ChannelManager: if self.config.channels.whatsapp.enabled: try: from nanobot.channels.whatsapp import WhatsAppChannel - # Inject parent config for access to providers - self.config.channels.whatsapp.parent = self.config self.channels["whatsapp"] = WhatsAppChannel( self.config.channels.whatsapp, self.bus ) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 75b9299..23e1de0 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel): name = "telegram" - def __init__(self, config: TelegramConfig, bus: MessageBus): + def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""): super().__init__(config, bus) self.config: TelegramConfig = config + self.groq_api_key = groq_api_key self._app: Application | None = None self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies @@ -253,12 +254,7 @@ class TelegramChannel(BaseChannel): # Handle voice transcription if media_type == "voice" or media_type == "audio": from nanobot.providers.transcription import GroqTranscriptionProvider - # Try to get Groq API key from config - groq_key = None - if hasattr(self.config, 'parent') and hasattr(self.config.parent, 'providers'): - groq_key = self.config.parent.providers.groq.api_key - - transcriber = GroqTranscriptionProvider(api_key=groq_key) + transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) transcription = await transcriber.transcribe(file_path) if transcription: logger.info(f"Transcribed {media_type}: {transcription[:50]}...") From 99339c7be93cdcc3cdee5dd0bdf48645bbcb12c7 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 3 Feb 2026 07:17:47 +0000 Subject: [PATCH 21/24] docs: improve README with provider info and Docker examples --- README.md | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3440fdc..55c6091 100644 --- a/README.md +++ b/README.md @@ -130,16 +130,6 @@ nanobot agent -m "What is 2+2?" That's it! You have a working AI assistant in 2 minutes. -## 🐳 Docker - -```bash -docker build -t nanobot . -docker run --rm nanobot onboard -docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot -``` - -Mount `~/.nanobot` so your config and workspace persist across runs. Edit `~/.nanobot/config.json` on the host to add API keys, then restart the container. - ## 🖥️ Local Models (vLLM) Run nanobot with your own local models using vLLM or any OpenAI-compatible server. @@ -257,6 +247,20 @@ nanobot gateway ## ⚙️ Configuration +Config file: `~/.nanobot/config.json` + +### Providers + +| Provider | Purpose | Get API Key | +|----------|---------|-------------| +| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) | +| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) | +| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) | +| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) | +| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | + +> **Note**: Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. +

Full config example @@ -270,6 +274,9 @@ nanobot gateway "providers": { "openrouter": { "apiKey": "sk-or-v1-xxx" + }, + "groq": { + "apiKey": "gsk_xxx" } }, "channels": { @@ -323,6 +330,30 @@ nanobot cron remove
+## 🐳 Docker + +Build and run nanobot in a container: + +```bash +# Build the image +docker build -t nanobot . + +# Initialize config (first time only) +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot onboard + +# Edit config on host to add API keys +vim ~/.nanobot/config.json + +# Run gateway (connects to Telegram/WhatsApp) +docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot gateway + +# Or run a single command +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!" +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status +``` + +> **Tip**: Mount `~/.nanobot` so your config and workspace persist across container restarts. + ## 📁 Project Structure ``` From 73a3934cc59de9a616271f6a121855709635406a Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 3 Feb 2026 07:21:46 +0000 Subject: [PATCH 22/24] docs: unify note/tip format to GitHub Alerts --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55c6091..cd6fc94 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,9 @@ Config file: `~/.nanobot/config.json` ### Providers +> [!NOTE] +> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. + | Provider | Purpose | Get API Key | |----------|---------|-------------| | `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) | @@ -259,7 +262,6 @@ Config file: `~/.nanobot/config.json` | `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) | | `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | -> **Note**: Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
Full config example @@ -332,6 +334,9 @@ nanobot cron remove ## 🐳 Docker +> [!TIP] +> The `-v ~/.nanobot:/root/.nanobot` flag mounts your local config directory into the container, so your config and workspace persist across container restarts. + Build and run nanobot in a container: ```bash @@ -352,8 +357,6 @@ docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!" docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status ``` -> **Tip**: Mount `~/.nanobot` so your config and workspace persist across container restarts. - ## 📁 Project Structure ``` From a4269593fc1e5f11cc94e541bde17cafeadbd248 Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 3 Feb 2026 07:24:59 +0000 Subject: [PATCH 23/24] docs: improve install methods --- README.md | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index cd6fc94..046860d 100644 --- a/README.md +++ b/README.md @@ -60,19 +60,7 @@ ## 📦 Install -**Install with [uv](https://github.com/astral-sh/uv)** (recommended for speed) - -```bash -uv tool install nanobot-ai -``` - -**Install from PyPi** - -```bash -pip install nanobot-ai -``` - -**Install from source** (recommended for development) +**Install from source** (latest features, recommended for development) ```bash git clone https://github.com/HKUDS/nanobot.git @@ -80,12 +68,16 @@ cd nanobot pip install -e . ``` -**Install with uv** +**Install with [uv](https://github.com/astral-sh/uv)** (stable, fast) ```bash -uv venv -source .venv/bin/activate -uv pip install nanobot-ai +uv tool install nanobot-ai +``` + +**Install from PyPI** (stable) + +```bash +pip install nanobot-ai ``` ## 🚀 Quick Start From c3b32afbbbab07a1f77ae7a76368606545d916bb Mon Sep 17 00:00:00 2001 From: Re-bin Date: Tue, 3 Feb 2026 11:53:21 +0000 Subject: [PATCH 24/24] docs: improve README with disclaimer --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index f87e702..ddc5ccd 100644 --- a/README.md +++ b/README.md @@ -408,3 +408,9 @@ PRs welcome! The codebase is intentionally small and readable. 🤗 Thanks for visiting ✨ nanobot!

Views

+ +--- + +

+ nanobot is for educational, research, and technical exchange purposes only +