Merge branch 'main' into pr-22

This commit is contained in:
Re-bin
2026-02-03 17:09:37 +00:00
17 changed files with 397 additions and 51 deletions

13
.dockerignore Normal file
View File

@@ -0,0 +1,13 @@
__pycache__
*.pyc
*.pyo
*.pyd
*.egg-info
dist/
build/
.git
.env
.assets
node_modules/
bridge/dist/
workspace/

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
*.pyc
dist/
build/
docs/
*.egg-info/
*.egg
*.pyc

40
Dockerfile Normal file
View File

@@ -0,0 +1,40 @@
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
# Install Node.js 20 for the WhatsApp bridge
RUN apt-get update && \
apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y --no-install-recommends nodejs && \
apt-get purge -y gnupg && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies first (cached layer)
COPY pyproject.toml README.md LICENSE ./
RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \
uv pip install --system --no-cache . && \
rm -rf nanobot bridge
# Copy the full source and install
COPY nanobot/ nanobot/
COPY bridge/ bridge/
RUN uv pip install --system --no-cache .
# Build the WhatsApp bridge
WORKDIR /app/bridge
RUN npm install && npm run build
WORKDIR /app
# Create config directory
RUN mkdir -p /root/.nanobot
# Gateway default port
EXPOSE 18790
ENTRYPOINT ["nanobot"]
CMD ["status"]

View File

@@ -8,6 +8,7 @@
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=feishu&logoColor=white" alt="Feishu"></a>
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white" alt="WeChat"></a>
<a href="https://discord.gg/MnCvHqpUGB"><img src="https://img.shields.io/badge/Discord-Community-5865F2?style=flat&logo=discord&logoColor=white" alt="Discord"></a>
</p>
</div>
@@ -17,7 +18,7 @@
## 📢 News
- **2025-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot!
- **2026-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot!
## Key Features of nanobot:
@@ -60,13 +61,7 @@
## 📦 Install
**Install from PyPi**
```bash
pip install nanobot-ai
```
**Install from source** (recommended for development)
**Install from source** (latest features, recommended for development)
```bash
git clone https://github.com/HKUDS/nanobot.git
@@ -74,6 +69,18 @@ cd nanobot
pip install -e .
```
**Install with [uv](https://github.com/astral-sh/uv)** (stable, fast)
```bash
uv tool install nanobot-ai
```
**Install from PyPI** (stable)
```bash
pip install nanobot-ai
```
## 🚀 Quick Start
> [!TIP]
@@ -233,6 +240,22 @@ nanobot gateway
## ⚙️ Configuration
Config file: `~/.nanobot/config.json`
### Providers
> [!NOTE]
> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
| Provider | Purpose | Get API Key |
|----------|---------|-------------|
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
<details>
<summary><b>Full config example</b></summary>
@@ -246,6 +269,9 @@ nanobot gateway
"providers": {
"openrouter": {
"apiKey": "sk-or-v1-xxx"
},
"groq": {
"apiKey": "gsk_xxx"
}
},
"channels": {
@@ -299,6 +325,31 @@ nanobot cron remove <job_id>
</details>
## 🐳 Docker
> [!TIP]
> The `-v ~/.nanobot:/root/.nanobot` flag mounts your local config directory into the container, so your config and workspace persist across container restarts.
Build and run nanobot in a container:
```bash
# Build the image
docker build -t nanobot .
# Initialize config (first time only)
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot onboard
# Edit config on host to add API keys
vim ~/.nanobot/config.json
# Run gateway (connects to Telegram/WhatsApp)
docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot gateway
# Or run a single command
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!"
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status
```
## 📁 Project Structure
```
@@ -321,22 +372,28 @@ nanobot/
└── cli/ # 🖥️ Commands
```
## 🗺️ Roadmap
## 🤝 Contribute & Roadmap
PRs welcome! The codebase is intentionally small and readable. 🤗
**Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)!
- [x] **Voice Transcription** — Support for Groq Whisper (Issue #13)
- [ ] **Multi-modal** — See and hear (images, voice, video)
- [ ] **Long-term memory** — Never forget important context
- [ ] **Better reasoning** — Multi-step planning and reflection
- [ ] **More integrations** — Discord, Slack, email, calendar
- [ ] **Self-improvement** — Learn from feedback and mistakes
**Want to help?** Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)!
### Contributors
<a href="https://github.com/HKUDS/nanobot/graphs/contributors">
<img src="https://contrib.rocks/image?repo=HKUDS/nanobot" />
</a>
---
## ⭐ Star History
*Community Growth Trajectory*
<div align="center">
<a href="https://star-history.com/#HKUDS/nanobot&Date">
<picture>
@@ -347,13 +404,12 @@ nanobot/
</a>
</div>
---
## 🤝 Contribute
PRs welcome! The codebase is intentionally small and readable. 🤗
<p align="center">
<em> Thanks for visiting ✨ nanobot!</em><br><br>
<img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views">
</p>
<p align="center">
<sub>nanobot is for educational, research, and technical exchange purposes only</sub>
</p>

View File

@@ -160,6 +160,11 @@ export class WhatsAppClient {
return `[Document] ${message.documentMessage.caption}`;
}
// Voice/Audio message
if (message.audioMessage) {
return `[Voice Message]`;
}
return null;
}

View File

@@ -1,5 +1,7 @@
"""Context builder for assembling agent prompts."""
import base64
import mimetypes
from pathlib import Path
from typing import Any
@@ -114,32 +116,53 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
self,
history: list[dict[str, Any]],
current_message: str,
skill_names: list[str] | None = None
skill_names: list[str] | None = None,
media: list[str] | None = None,
) -> list[dict[str, Any]]:
"""
Build the complete message list for an LLM call.
Args:
history: Previous conversation messages.
current_message: The new user message.
skill_names: Optional skills to include.
media: Optional list of local file paths for images/media.
Returns:
List of messages including system prompt.
"""
messages = []
# System prompt
system_prompt = self.build_system_prompt(skill_names)
messages.append({"role": "system", "content": system_prompt})
# History
messages.extend(history)
# Current message
messages.append({"role": "user", "content": current_message})
# Current message (with optional image attachments)
user_content = self._build_user_content(current_message, media)
messages.append({"role": "user", "content": user_content})
return messages
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
"""Build user message content with optional base64-encoded images."""
if not media:
return text
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
if not p.is_file() or not mime or not mime.startswith("image/"):
continue
b64 = base64.b64encode(p.read_bytes()).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
if not images:
return text
return images + [{"type": "text", "text": text}]
def add_tool_result(
self,

View File

@@ -152,7 +152,8 @@ class AgentLoop:
# Build initial messages (use get_history for LLM-formatted messages)
messages = self.context.build_messages(
history=session.get_history(),
current_message=msg.content
current_message=msg.content,
media=msg.media if msg.media else None,
)
# Agent loop
@@ -189,7 +190,8 @@ class AgentLoop:
# Execute tools
for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}")
args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result
@@ -281,7 +283,8 @@ class AgentLoop:
)
for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}")
args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result

View File

@@ -72,7 +72,14 @@ class BaseChannel(ABC):
if not allow_list:
return True
return str(sender_id) in allow_list
sender_str = str(sender_id)
if sender_str in allow_list:
return True
if "|" in sender_str:
for part in sender_str.split("|"):
if part and part in allow_list:
return True
return False
async def _handle_message(
self,

View File

@@ -37,7 +37,9 @@ class ChannelManager:
try:
from nanobot.channels.telegram import TelegramChannel
self.channels["telegram"] = TelegramChannel(
self.config.channels.telegram, self.bus
self.config.channels.telegram,
self.bus,
groq_api_key=self.config.providers.groq.api_key,
)
logger.info("Telegram channel enabled")
except ImportError as e:

View File

@@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
name = "telegram"
def __init__(self, config: TelegramConfig, bus: MessageBus):
def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
super().__init__(config, bus)
self.config: TelegramConfig = config
self.groq_api_key = groq_api_key
self._app: Application | None = None
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
@@ -199,8 +200,10 @@ class TelegramChannel(BaseChannel):
user = update.effective_user
chat_id = message.chat_id
# Get sender identifier (prefer username, fallback to user_id)
sender_id = str(user.username or user.id)
# Use stable numeric ID, but keep username for allowlist compatibility
sender_id = str(user.id)
if user.username:
sender_id = f"{sender_id}|{user.username}"
# Store chat_id for replies
self._chat_ids[sender_id] = chat_id
@@ -247,7 +250,20 @@ class TelegramChannel(BaseChannel):
await file.download_to_drive(str(file_path))
media_paths.append(str(file_path))
content_parts.append(f"[{media_type}: {file_path}]")
# Handle voice transcription
if media_type == "voice" or media_type == "audio":
from nanobot.providers.transcription import GroqTranscriptionProvider
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
transcription = await transcriber.transcribe(file_path)
if transcription:
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
content_parts.append(f"[transcription: {transcription}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
logger.debug(f"Downloaded {media_type} to {file_path}")
except Exception as e:
logger.error(f"Failed to download media: {e}")

View File

@@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
# Extract just the phone number as chat_id
chat_id = sender.split("@")[0] if "@" in sender else sender
# Handle voice transcription if it's a voice message
if content == "[Voice Message]":
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
content = "[Voice Message: Transcription not available for WhatsApp yet]"
await self._handle_message(
sender_id=chat_id,
chat_id=sender, # Use full JID for replies

View File

@@ -178,11 +178,13 @@ def gateway(
# Create components
bus = MessageBus()
# Create provider (supports OpenRouter, Anthropic, OpenAI)
# Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock)
api_key = config.get_api_key()
api_base = config.get_api_base()
if not api_key:
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]")
console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey")
raise typer.Exit(1)
@@ -289,11 +291,13 @@ def agent(
api_key = config.get_api_key()
api_base = config.get_api_base()
if not api_key:
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]")
raise typer.Exit(1)
bus = MessageBus()
provider = LiteLLMProvider(
api_key=api_key,
@@ -348,21 +352,31 @@ app.add_typer(channels_app, name="channels")
def channels_status():
"""Show channel status."""
from nanobot.config.loader import load_config
config = load_config()
table = Table(title="Channel Status")
table.add_column("Channel", style="cyan")
table.add_column("Enabled", style="green")
table.add_column("Bridge URL", style="yellow")
table.add_column("Configuration", style="yellow")
# WhatsApp
wa = config.channels.whatsapp
table.add_row(
"WhatsApp",
"" if wa.enabled else "",
wa.bridge_url
)
# Telegram
tg = config.channels.telegram
tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
table.add_row(
"Telegram",
"" if tg.enabled else "",
tg_config
)
console.print(table)
@@ -506,6 +520,7 @@ def cron_add(
at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"),
deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"),
to: str = typer.Option(None, "--to", help="Recipient for delivery"),
channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"),
):
"""Add a scheduled job."""
from nanobot.config.loader import get_data_dir
@@ -534,6 +549,7 @@ def cron_add(
message=message,
deliver=deliver,
to=to,
channel=channel,
)
console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})")
@@ -624,11 +640,13 @@ def status():
has_openrouter = bool(config.providers.openrouter.api_key)
has_anthropic = bool(config.providers.anthropic.api_key)
has_openai = bool(config.providers.openai.api_key)
has_gemini = bool(config.providers.gemini.api_key)
has_vllm = bool(config.providers.vllm.api_base)
console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}")
console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}")
console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}")
console.print(f"Gemini API: {'[green]✓[/green]' if has_gemini else '[dim]not set[/dim]'}")
vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]"
console.print(f"vLLM/Local: {vllm_status}")

View File

@@ -50,7 +50,10 @@ class ProvidersConfig(BaseModel):
anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
openai: ProviderConfig = Field(default_factory=ProviderConfig)
openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
groq: ProviderConfig = Field(default_factory=ProviderConfig)
zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
vllm: ProviderConfig = Field(default_factory=ProviderConfig)
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
class GatewayConfig(BaseModel):
@@ -89,19 +92,24 @@ class Config(BaseSettings):
return Path(self.agents.defaults.workspace).expanduser()
def get_api_key(self) -> str | None:
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > vLLM."""
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
return (
self.providers.openrouter.api_key or
self.providers.anthropic.api_key or
self.providers.openai.api_key or
self.providers.gemini.api_key or
self.providers.zhipu.api_key or
self.providers.groq.api_key or
self.providers.vllm.api_key or
None
)
def get_api_base(self) -> str | None:
"""Get API base URL if using OpenRouter or vLLM."""
"""Get API base URL if using OpenRouter, Zhipu or vLLM."""
if self.providers.openrouter.api_key:
return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1"
if self.providers.zhipu.api_key:
return self.providers.zhipu.api_base
if self.providers.vllm.api_base:
return self.providers.vllm.api_base
return None

View File

@@ -13,7 +13,7 @@ class LiteLLMProvider(LLMProvider):
"""
LLM provider using LiteLLM for multi-provider support.
Supports OpenRouter, Anthropic, OpenAI, and many other providers through
Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through
a unified interface.
"""
@@ -47,6 +47,12 @@ class LiteLLMProvider(LLMProvider):
os.environ.setdefault("ANTHROPIC_API_KEY", api_key)
elif "openai" in default_model or "gpt" in default_model:
os.environ.setdefault("OPENAI_API_KEY", api_key)
elif "gemini" in default_model.lower():
os.environ.setdefault("GEMINI_API_KEY", api_key)
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
elif "groq" in default_model:
os.environ.setdefault("GROQ_API_KEY", api_key)
if api_base:
litellm.api_base = api_base
@@ -81,11 +87,24 @@ class LiteLLMProvider(LLMProvider):
if self.is_openrouter and not model.startswith("openrouter/"):
model = f"openrouter/{model}"
# For Zhipu/Z.ai, ensure prefix is present
# Handle cases like "glm-4.7-flash" -> "zhipu/glm-4.7-flash"
if ("glm" in model.lower() or "zhipu" in model.lower()) and not (
model.startswith("zhipu/") or
model.startswith("zai/") or
model.startswith("openrouter/")
):
model = f"zhipu/{model}"
# For vLLM, use hosted_vllm/ prefix per LiteLLM docs
# Convert openai/ prefix to hosted_vllm/ if user specified it
if self.is_vllm:
model = f"hosted_vllm/{model}"
# For Gemini, ensure gemini/ prefix if not already present
if "gemini" in model.lower() and not model.startswith("gemini/"):
model = f"gemini/{model}"
kwargs: dict[str, Any] = {
"model": model,
"messages": messages,

View File

@@ -0,0 +1,65 @@
"""Voice transcription provider using Groq."""
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error(f"Groq transcription error: {e}")
return ""

55
test_docker.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
IMAGE_NAME="nanobot-test"
echo "=== Building Docker image ==="
docker build -t "$IMAGE_NAME" .
echo ""
echo "=== Running 'nanobot onboard' ==="
docker run --name nanobot-test-run "$IMAGE_NAME" onboard
echo ""
echo "=== Running 'nanobot status' ==="
STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \
docker run --rm nanobot-test-onboarded status 2>&1) || true
echo "$STATUS_OUTPUT"
echo ""
echo "=== Validating output ==="
PASS=true
check() {
if echo "$STATUS_OUTPUT" | grep -q "$1"; then
echo " PASS: found '$1'"
else
echo " FAIL: missing '$1'"
PASS=false
fi
}
check "nanobot Status"
check "Config:"
check "Workspace:"
check "Model:"
check "OpenRouter API:"
check "Anthropic API:"
check "OpenAI API:"
echo ""
if $PASS; then
echo "=== All checks passed ==="
else
echo "=== Some checks FAILED ==="
exit 1
fi
# Cleanup
echo ""
echo "=== Cleanup ==="
docker rm -f nanobot-test-run 2>/dev/null || true
docker rmi -f nanobot-test-onboarded 2>/dev/null || true
docker rmi -f "$IMAGE_NAME" 2>/dev/null || true
echo "Done."

View File

@@ -22,6 +22,16 @@ You have access to:
- Use `memory/` directory for daily notes
- Use `MEMORY.md` for long-term information
## Scheduled Reminders
When user asks for a reminder at a specific time, use `exec` to run:
```
nanobot cron add --name "reminder" --message "Your message" --at "YYYY-MM-DDTHH:MM:SS" --deliver --to "USER_ID" --channel "CHANNEL"
```
Get USER_ID and CHANNEL from the current session (e.g., `8281248569` and `telegram` from `telegram:8281248569`).
**Do NOT just write reminders to MEMORY.md** — that won't trigger actual notifications.
## Heartbeat Tasks
`HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: