diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..020b9ec --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +*.egg-info +dist/ +build/ +.git +.env +.assets +node_modules/ +bridge/dist/ +workspace/ diff --git a/.gitignore b/.gitignore index 9720f3b..316e214 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,8 @@ docs/ *.pyw *.pyz *.pywz -*.pyzz \ No newline at end of file +*.pyzz +.venv/ +__pycache__/ +poetry.lock +.pytest_cache/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8132747 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim + +# Install Node.js 20 for the WhatsApp bridge +RUN apt-get update && \ + apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \ + mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends nodejs && \ + apt-get purge -y gnupg && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install Python dependencies first (cached layer) +COPY pyproject.toml README.md LICENSE ./ +RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \ + uv pip install --system --no-cache . && \ + rm -rf nanobot bridge + +# Copy the full source and install +COPY nanobot/ nanobot/ +COPY bridge/ bridge/ +RUN uv pip install --system --no-cache . + +# Build the WhatsApp bridge +WORKDIR /app/bridge +RUN npm install && npm run build +WORKDIR /app + +# Create config directory +RUN mkdir -p /root/.nanobot + +# Gateway default port +EXPOSE 18790 + +ENTRYPOINT ["nanobot"] +CMD ["status"] diff --git a/README.md b/README.md index 358d23e..3408003 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ License Feishu WeChat + Discord

@@ -15,19 +16,24 @@ ⚑️ Delivers core agent functionality in just **~4,000** lines of code β€” **99% smaller** than Clawdbot's 430k+ lines. +πŸ“ Real-time line count: **3,428 lines** (run `bash core_agent_lines.sh` to verify anytime) + ## πŸ“’ News -- **2025-02-01** πŸŽ‰ nanobot launched! Welcome to try 🐈 nanobot! +- **2026-02-05** ✨ Added Feishu channel, DeepSeek provider, and enhanced scheduled tasks support! +- **2026-02-04** πŸš€ Released v0.1.3.post4 with multi-provider & Docker support! Check [release notes](https://github.com/HKUDS/nanobot/releases/tag/v0.1.3.post4) for details. +- **2026-02-03** ⚑ Integrated vLLM for local LLM support and improved natural language task scheduling! +- **2026-02-02** πŸŽ‰ nanobot officially launched! Welcome to try 🐈 nanobot! ## Key Features of nanobot: -πŸͺΆ **Ultra-Lightweight**: Just ~4,000 lines of code β€” 99% smaller than Clawdbot - core functionality. +πŸͺΆ **Ultra-Lightweight**: Just ~3,400 lines of core agent code β€” 99% smaller than Clawdbot. πŸ”¬ **Research-Ready**: Clean, readable code that's easy to understand, modify, and extend for research. ⚑️ **Lightning Fast**: Minimal footprint means faster startup, lower resource usage, and quicker iterations. -πŸ’Ž **Easy-to-Use**: One-click to depoly and you're ready to go. +πŸ’Ž **Easy-to-Use**: One-click to deploy and you're ready to go. ## πŸ—οΈ Architecture @@ -60,13 +66,7 @@ ## πŸ“¦ Install -**Install from PyPi** - -```bash -pip install nanobot-ai -``` - -**Install from source** (recommended for development) +**Install from source** (latest features, recommended for development) ```bash git clone https://github.com/HKUDS/nanobot.git @@ -74,12 +74,16 @@ cd nanobot pip install -e . ``` -**Install with uv** +**Install with [uv](https://github.com/astral-sh/uv)** (stable, fast) ```bash -uv venv -source .venv/bin/activate -uv pip install nanobot-ai +uv tool install nanobot-ai +``` + +**Install from PyPI** (stable) + +```bash +pip install nanobot-ai ``` ## πŸš€ Quick Start @@ -109,8 +113,12 @@ nanobot onboard "model": "anthropic/claude-opus-4-5" } }, - "webSearch": { - "apiKey": "BSA-xxx" + "tools": { + "web": { + "search": { + "apiKey": "BSA-xxx" + } + } } } ``` @@ -163,12 +171,14 @@ nanobot agent -m "Hello from my local LLM!" ## πŸ’¬ Chat Apps -Talk to your nanobot through Telegram or WhatsApp β€” anytime, anywhere. +Talk to your nanobot through Telegram, Discord, WhatsApp, or Feishu β€” anytime, anywhere. | Channel | Setup | |---------|-------| | **Telegram** | Easy (just a token) | +| **Discord** | Easy (bot token + intents) | | **WhatsApp** | Medium (scan QR) | +| **Feishu** | Medium (app credentials) |
Telegram (Recommended) @@ -202,6 +212,50 @@ nanobot gateway
+
+Discord + +**1. Create a bot** +- Go to https://discord.com/developers/applications +- Create an application β†’ Bot β†’ Add Bot +- Copy the bot token + +**2. Enable intents** +- In the Bot settings, enable **MESSAGE CONTENT INTENT** +- (Optional) Enable **SERVER MEMBERS INTENT** if you plan to use allow lists based on member data + +**3. Get your User ID** +- Discord Settings β†’ Advanced β†’ enable **Developer Mode** +- Right-click your avatar β†’ **Copy User ID** + +**4. Configure** + +```json +{ + "channels": { + "discord": { + "enabled": true, + "token": "YOUR_BOT_TOKEN", + "allowFrom": ["YOUR_USER_ID"] + } + } +} +``` + +**5. Invite the bot** +- OAuth2 β†’ URL Generator +- Scopes: `bot` +- Bot Permissions: `Send Messages`, `Read Message History` +- Open the generated invite URL and add the bot to your server + +**6. Run** + +```bash +nanobot gateway +``` + +
+
WhatsApp @@ -239,45 +293,85 @@ nanobot gateway
-## βš™οΈ Configuration -
-Full config example +Feishu (飞书) + +Uses **WebSocket** long connection β€” no public IP required. + +```bash +pip install nanobot-ai[feishu] +``` + +**1. Create a Feishu bot** +- Visit [Feishu Open Platform](https://open.feishu.cn/app) +- Create a new app β†’ Enable **Bot** capability +- **Permissions**: Add `im:message` (send messages) +- **Events**: Add `im.message.receive_v1` (receive messages) + - Select **Long Connection** mode (requires running nanobot first to establish connection) +- Get **App ID** and **App Secret** from "Credentials & Basic Info" +- Publish the app + +**2. Configure** ```json { - "agents": { - "defaults": { - "model": "anthropic/claude-opus-4-5" - } - }, - "providers": { - "openrouter": { - "apiKey": "sk-or-v1-xxx" - } - }, "channels": { - "telegram": { + "feishu": { "enabled": true, - "token": "123456:ABC...", - "allowFrom": ["123456789"] - }, - "whatsapp": { - "enabled": false - } - }, - "tools": { - "web": { - "search": { - "apiKey": "BSA..." - } + "appId": "cli_xxx", + "appSecret": "xxx", + "encryptKey": "", + "verificationToken": "", + "allowFrom": [] } } } ``` +> `encryptKey` and `verificationToken` are optional for Long Connection mode. +> `allowFrom`: Leave empty to allow all users, or add `["ou_xxx"]` to restrict access. + +**3. Run** + +```bash +nanobot gateway +``` + +> [!TIP] +> Feishu uses WebSocket to receive messages β€” no webhook or public IP needed! +
+## βš™οΈ Configuration + +Config file: `~/.nanobot/config.json` + +### Providers + +> [!NOTE] +> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed. + +| Provider | Purpose | Get API Key | +|----------|---------|-------------| +| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) | +| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) | +| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) | +| `deepseek` | LLM (DeepSeek direct) | [platform.deepseek.com](https://platform.deepseek.com) | +| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) | +| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) | + + +### Security + +> [!TIP] +> For production deployments, set `"restrictToWorkspace": true` in your config to sandbox the agent. + +| Option | Default | Description | +|--------|---------|-------------| +| `tools.restrictToWorkspace` | `false` | When `true`, restricts **all** agent tools (shell, file read/write/edit, list) to the workspace directory. Prevents path traversal and out-of-scope access. | +| `channels.*.allowFrom` | `[]` (allow all) | Whitelist of user IDs. Empty = allow everyone; non-empty = only listed users can interact. | + + ## CLI Reference | Command | Description | @@ -307,6 +401,31 @@ nanobot cron remove +## 🐳 Docker + +> [!TIP] +> The `-v ~/.nanobot:/root/.nanobot` flag mounts your local config directory into the container, so your config and workspace persist across container restarts. + +Build and run nanobot in a container: + +```bash +# Build the image +docker build -t nanobot . + +# Initialize config (first time only) +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot onboard + +# Edit config on host to add API keys +vim ~/.nanobot/config.json + +# Run gateway (connects to Telegram/WhatsApp) +docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot gateway + +# Or run a single command +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!" +docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status +``` + ## πŸ“ Project Structure ``` @@ -335,6 +454,7 @@ PRs welcome! The codebase is intentionally small and readable. πŸ€— **Roadmap** β€” Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! +- [x] **Voice Transcription** β€” Support for Groq Whisper (Issue #13) - [ ] **Multi-modal** β€” See and hear (images, voice, video) - [ ] **Long-term memory** β€” Never forget important context - [ ] **Better reasoning** β€” Multi-step planning and reflection @@ -344,10 +464,9 @@ PRs welcome! The codebase is intentionally small and readable. πŸ€— ### Contributors - + ---- ## ⭐ Star History @@ -365,3 +484,8 @@ PRs welcome! The codebase is intentionally small and readable. πŸ€— Thanks for visiting ✨ nanobot!

Views

+ + +

+ nanobot is for educational, research, and technical exchange purposes only +

diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..ac15ba4 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,264 @@ +# Security Policy + +## Reporting a Vulnerability + +If you discover a security vulnerability in nanobot, please report it by: + +1. **DO NOT** open a public GitHub issue +2. Create a private security advisory on GitHub or contact the repository maintainers +3. Include: + - Description of the vulnerability + - Steps to reproduce + - Potential impact + - Suggested fix (if any) + +We aim to respond to security reports within 48 hours. + +## Security Best Practices + +### 1. API Key Management + +**CRITICAL**: Never commit API keys to version control. + +```bash +# βœ… Good: Store in config file with restricted permissions +chmod 600 ~/.nanobot/config.json + +# ❌ Bad: Hardcoding keys in code or committing them +``` + +**Recommendations:** +- Store API keys in `~/.nanobot/config.json` with file permissions set to `0600` +- Consider using environment variables for sensitive keys +- Use OS keyring/credential manager for production deployments +- Rotate API keys regularly +- Use separate API keys for development and production + +### 2. Channel Access Control + +**IMPORTANT**: Always configure `allowFrom` lists for production use. + +```json +{ + "channels": { + "telegram": { + "enabled": true, + "token": "YOUR_BOT_TOKEN", + "allowFrom": ["123456789", "987654321"] + }, + "whatsapp": { + "enabled": true, + "allowFrom": ["+1234567890"] + } + } +} +``` + +**Security Notes:** +- Empty `allowFrom` list will **ALLOW ALL** users (open by default for personal use) +- Get your Telegram user ID from `@userinfobot` +- Use full phone numbers with country code for WhatsApp +- Review access logs regularly for unauthorized access attempts + +### 3. Shell Command Execution + +The `exec` tool can execute shell commands. While dangerous command patterns are blocked, you should: + +- βœ… Review all tool usage in agent logs +- βœ… Understand what commands the agent is running +- βœ… Use a dedicated user account with limited privileges +- βœ… Never run nanobot as root +- ❌ Don't disable security checks +- ❌ Don't run on systems with sensitive data without careful review + +**Blocked patterns:** +- `rm -rf /` - Root filesystem deletion +- Fork bombs +- Filesystem formatting (`mkfs.*`) +- Raw disk writes +- Other destructive operations + +### 4. File System Access + +File operations have path traversal protection, but: + +- βœ… Run nanobot with a dedicated user account +- βœ… Use filesystem permissions to protect sensitive directories +- βœ… Regularly audit file operations in logs +- ❌ Don't give unrestricted access to sensitive files + +### 5. Network Security + +**API Calls:** +- All external API calls use HTTPS by default +- Timeouts are configured to prevent hanging requests +- Consider using a firewall to restrict outbound connections if needed + +**WhatsApp Bridge:** +- The bridge runs on `localhost:3001` by default +- If exposing to network, use proper authentication and TLS +- Keep authentication data in `~/.nanobot/whatsapp-auth` secure (mode 0700) + +### 6. Dependency Security + +**Critical**: Keep dependencies updated! + +```bash +# Check for vulnerable dependencies +pip install pip-audit +pip-audit + +# Update to latest secure versions +pip install --upgrade nanobot-ai +``` + +For Node.js dependencies (WhatsApp bridge): +```bash +cd bridge +npm audit +npm audit fix +``` + +**Important Notes:** +- Keep `litellm` updated to the latest version for security fixes +- We've updated `ws` to `>=8.17.1` to fix DoS vulnerability +- Run `pip-audit` or `npm audit` regularly +- Subscribe to security advisories for nanobot and its dependencies + +### 7. Production Deployment + +For production use: + +1. **Isolate the Environment** + ```bash + # Run in a container or VM + docker run --rm -it python:3.11 + pip install nanobot-ai + ``` + +2. **Use a Dedicated User** + ```bash + sudo useradd -m -s /bin/bash nanobot + sudo -u nanobot nanobot gateway + ``` + +3. **Set Proper Permissions** + ```bash + chmod 700 ~/.nanobot + chmod 600 ~/.nanobot/config.json + chmod 700 ~/.nanobot/whatsapp-auth + ``` + +4. **Enable Logging** + ```bash + # Configure log monitoring + tail -f ~/.nanobot/logs/nanobot.log + ``` + +5. **Use Rate Limiting** + - Configure rate limits on your API providers + - Monitor usage for anomalies + - Set spending limits on LLM APIs + +6. **Regular Updates** + ```bash + # Check for updates weekly + pip install --upgrade nanobot-ai + ``` + +### 8. Development vs Production + +**Development:** +- Use separate API keys +- Test with non-sensitive data +- Enable verbose logging +- Use a test Telegram bot + +**Production:** +- Use dedicated API keys with spending limits +- Restrict file system access +- Enable audit logging +- Regular security reviews +- Monitor for unusual activity + +### 9. Data Privacy + +- **Logs may contain sensitive information** - secure log files appropriately +- **LLM providers see your prompts** - review their privacy policies +- **Chat history is stored locally** - protect the `~/.nanobot` directory +- **API keys are in plain text** - use OS keyring for production + +### 10. Incident Response + +If you suspect a security breach: + +1. **Immediately revoke compromised API keys** +2. **Review logs for unauthorized access** + ```bash + grep "Access denied" ~/.nanobot/logs/nanobot.log + ``` +3. **Check for unexpected file modifications** +4. **Rotate all credentials** +5. **Update to latest version** +6. **Report the incident** to maintainers + +## Security Features + +### Built-in Security Controls + +βœ… **Input Validation** +- Path traversal protection on file operations +- Dangerous command pattern detection +- Input length limits on HTTP requests + +βœ… **Authentication** +- Allow-list based access control +- Failed authentication attempt logging +- Open by default (configure allowFrom for production use) + +βœ… **Resource Protection** +- Command execution timeouts (60s default) +- Output truncation (10KB limit) +- HTTP request timeouts (10-30s) + +βœ… **Secure Communication** +- HTTPS for all external API calls +- TLS for Telegram API +- WebSocket security for WhatsApp bridge + +## Known Limitations + +⚠️ **Current Security Limitations:** + +1. **No Rate Limiting** - Users can send unlimited messages (add your own if needed) +2. **Plain Text Config** - API keys stored in plain text (use keyring for production) +3. **No Session Management** - No automatic session expiry +4. **Limited Command Filtering** - Only blocks obvious dangerous patterns +5. **No Audit Trail** - Limited security event logging (enhance as needed) + +## Security Checklist + +Before deploying nanobot: + +- [ ] API keys stored securely (not in code) +- [ ] Config file permissions set to 0600 +- [ ] `allowFrom` lists configured for all channels +- [ ] Running as non-root user +- [ ] File system permissions properly restricted +- [ ] Dependencies updated to latest secure versions +- [ ] Logs monitored for security events +- [ ] Rate limits configured on API providers +- [ ] Backup and disaster recovery plan in place +- [ ] Security review of custom skills/tools + +## Updates + +**Last Updated**: 2026-02-03 + +For the latest security updates and announcements, check: +- GitHub Security Advisories: https://github.com/HKUDS/nanobot/security/advisories +- Release Notes: https://github.com/HKUDS/nanobot/releases + +## License + +See LICENSE file for details. diff --git a/bridge/package.json b/bridge/package.json index e29fed8..e91517c 100644 --- a/bridge/package.json +++ b/bridge/package.json @@ -11,7 +11,7 @@ }, "dependencies": { "@whiskeysockets/baileys": "7.0.0-rc.9", - "ws": "^8.17.0", + "ws": "^8.17.1", "qrcode-terminal": "^0.12.0", "pino": "^9.0.0" }, diff --git a/bridge/src/whatsapp.ts b/bridge/src/whatsapp.ts index 4185632..a3a82fc 100644 --- a/bridge/src/whatsapp.ts +++ b/bridge/src/whatsapp.ts @@ -160,6 +160,11 @@ export class WhatsAppClient { return `[Document] ${message.documentMessage.caption}`; } + // Voice/Audio message + if (message.audioMessage) { + return `[Voice Message]`; + } + return null; } diff --git a/core_agent_lines.sh b/core_agent_lines.sh new file mode 100755 index 0000000..3f5301a --- /dev/null +++ b/core_agent_lines.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Count core agent lines (excluding channels/, cli/, providers/ adapters) +cd "$(dirname "$0")" || exit 1 + +echo "nanobot core agent line count" +echo "================================" +echo "" + +for dir in agent agent/tools bus config cron heartbeat session utils; do + count=$(find "nanobot/$dir" -maxdepth 1 -name "*.py" -exec cat {} + | wc -l) + printf " %-16s %5s lines\n" "$dir/" "$count" +done + +root=$(cat nanobot/__init__.py nanobot/__main__.py | wc -l) +printf " %-16s %5s lines\n" "(root)" "$root" + +echo "" +total=$(find nanobot -name "*.py" ! -path "*/channels/*" ! -path "*/cli/*" ! -path "*/providers/*" | xargs cat | wc -l) +echo " Core total: $total lines" +echo "" +echo " (excludes: channels/, cli/, providers/)" diff --git a/nanobot/agent/context.py b/nanobot/agent/context.py index f70103d..3ea6c04 100644 --- a/nanobot/agent/context.py +++ b/nanobot/agent/context.py @@ -2,6 +2,7 @@ import base64 import mimetypes +import platform from pathlib import Path from typing import Any @@ -74,6 +75,8 @@ Skills with available="false" need dependencies installed first - you can try in from datetime import datetime now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)") workspace_path = str(self.workspace.expanduser().resolve()) + system = platform.system() + runtime = f"{'macOS' if system == 'Darwin' else system} {platform.machine()}, Python {platform.python_version()}" return f"""# nanobot 🐈 @@ -87,6 +90,9 @@ You are nanobot, a helpful AI assistant. You have access to tools that allow you ## Current Time {now} +## Runtime +{runtime} + ## Workspace Your workspace is at: {workspace_path} - Memory files: {workspace_path}/memory/MEMORY.md @@ -118,6 +124,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" current_message: str, skill_names: list[str] | None = None, media: list[str] | None = None, + channel: str | None = None, + chat_id: str | None = None, ) -> list[dict[str, Any]]: """ Build the complete message list for an LLM call. @@ -127,6 +135,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" current_message: The new user message. skill_names: Optional skills to include. media: Optional list of local file paths for images/media. + channel: Current channel (telegram, feishu, etc.). + chat_id: Current chat/user ID. Returns: List of messages including system prompt. @@ -135,6 +145,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md""" # System prompt system_prompt = self.build_system_prompt(skill_names) + if channel and chat_id: + system_prompt += f"\n\n## Current Session\nChannel: {channel}\nChat ID: {chat_id}" messages.append({"role": "system", "content": system_prompt}) # History diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 4a96b84..e4193ec 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -17,6 +17,7 @@ from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.web import WebSearchTool, WebFetchTool from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.spawn import SpawnTool +from nanobot.agent.tools.cron import CronTool from nanobot.agent.subagent import SubagentManager from nanobot.session.manager import SessionManager @@ -40,14 +41,22 @@ class AgentLoop: workspace: Path, model: str | None = None, max_iterations: int = 20, - brave_api_key: str | None = None + brave_api_key: str | None = None, + exec_config: "ExecToolConfig | None" = None, + cron_service: "CronService | None" = None, + restrict_to_workspace: bool = False, ): + from nanobot.config.schema import ExecToolConfig + from nanobot.cron.service import CronService self.bus = bus self.provider = provider self.workspace = workspace self.model = model or provider.get_default_model() self.max_iterations = max_iterations self.brave_api_key = brave_api_key + self.exec_config = exec_config or ExecToolConfig() + self.cron_service = cron_service + self.restrict_to_workspace = restrict_to_workspace self.context = ContextBuilder(workspace) self.sessions = SessionManager(workspace) @@ -58,6 +67,8 @@ class AgentLoop: bus=bus, model=self.model, brave_api_key=brave_api_key, + exec_config=self.exec_config, + restrict_to_workspace=restrict_to_workspace, ) self._running = False @@ -65,14 +76,19 @@ class AgentLoop: def _register_default_tools(self) -> None: """Register the default set of tools.""" - # File tools - self.tools.register(ReadFileTool()) - self.tools.register(WriteFileTool()) - self.tools.register(EditFileTool()) - self.tools.register(ListDirTool()) + # File tools (restrict to workspace if configured) + allowed_dir = self.workspace if self.restrict_to_workspace else None + self.tools.register(ReadFileTool(allowed_dir=allowed_dir)) + self.tools.register(WriteFileTool(allowed_dir=allowed_dir)) + self.tools.register(EditFileTool(allowed_dir=allowed_dir)) + self.tools.register(ListDirTool(allowed_dir=allowed_dir)) # Shell tool - self.tools.register(ExecTool(working_dir=str(self.workspace))) + self.tools.register(ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.restrict_to_workspace, + )) # Web tools self.tools.register(WebSearchTool(api_key=self.brave_api_key)) @@ -85,6 +101,10 @@ class AgentLoop: # Spawn tool (for subagents) spawn_tool = SpawnTool(manager=self.subagents) self.tools.register(spawn_tool) + + # Cron tool (for scheduling) + if self.cron_service: + self.tools.register(CronTool(self.cron_service)) async def run(self) -> None: """Run the agent loop, processing messages from the bus.""" @@ -149,11 +169,17 @@ class AgentLoop: if isinstance(spawn_tool, SpawnTool): spawn_tool.set_context(msg.channel, msg.chat_id) + cron_tool = self.tools.get("cron") + if isinstance(cron_tool, CronTool): + cron_tool.set_context(msg.channel, msg.chat_id) + # Build initial messages (use get_history for LLM-formatted messages) messages = self.context.build_messages( history=session.get_history(), current_message=msg.content, media=msg.media if msg.media else None, + channel=msg.channel, + chat_id=msg.chat_id, ) # Agent loop @@ -247,10 +273,16 @@ class AgentLoop: if isinstance(spawn_tool, SpawnTool): spawn_tool.set_context(origin_channel, origin_chat_id) + cron_tool = self.tools.get("cron") + if isinstance(cron_tool, CronTool): + cron_tool.set_context(origin_channel, origin_chat_id) + # Build messages with the announce content messages = self.context.build_messages( history=session.get_history(), - current_message=msg.content + current_message=msg.content, + channel=origin_channel, + chat_id=origin_chat_id, ) # Agent loop (limited for announce handling) @@ -307,21 +339,29 @@ class AgentLoop: content=final_content ) - async def process_direct(self, content: str, session_key: str = "cli:direct") -> str: + async def process_direct( + self, + content: str, + session_key: str = "cli:direct", + channel: str = "cli", + chat_id: str = "direct", + ) -> str: """ - Process a message directly (for CLI usage). + Process a message directly (for CLI or cron usage). Args: content: The message content. session_key: Session identifier. + channel: Source channel (for context). + chat_id: Source chat ID (for context). Returns: The agent's response. """ msg = InboundMessage( - channel="cli", + channel=channel, sender_id="user", - chat_id="direct", + chat_id=chat_id, content=content ) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index d3b320c..6113efb 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -33,12 +33,17 @@ class SubagentManager: bus: MessageBus, model: str | None = None, brave_api_key: str | None = None, + exec_config: "ExecToolConfig | None" = None, + restrict_to_workspace: bool = False, ): + from nanobot.config.schema import ExecToolConfig self.provider = provider self.workspace = workspace self.bus = bus self.model = model or provider.get_default_model() self.brave_api_key = brave_api_key + self.exec_config = exec_config or ExecToolConfig() + self.restrict_to_workspace = restrict_to_workspace self._running_tasks: dict[str, asyncio.Task[None]] = {} async def spawn( @@ -93,10 +98,15 @@ class SubagentManager: try: # Build subagent tools (no message tool, no spawn tool) tools = ToolRegistry() - tools.register(ReadFileTool()) - tools.register(WriteFileTool()) - tools.register(ListDirTool()) - tools.register(ExecTool(working_dir=str(self.workspace))) + allowed_dir = self.workspace if self.restrict_to_workspace else None + tools.register(ReadFileTool(allowed_dir=allowed_dir)) + tools.register(WriteFileTool(allowed_dir=allowed_dir)) + tools.register(ListDirTool(allowed_dir=allowed_dir)) + tools.register(ExecTool( + working_dir=str(self.workspace), + timeout=self.exec_config.timeout, + restrict_to_workspace=self.restrict_to_workspace, + )) tools.register(WebSearchTool(api_key=self.brave_api_key)) tools.register(WebFetchTool()) @@ -142,7 +152,8 @@ class SubagentManager: # Execute tools for tool_call in response.tool_calls: - logger.debug(f"Subagent [{task_id}] executing: {tool_call.name}") + args_str = json.dumps(tool_call.arguments) + logger.debug(f"Subagent [{task_id}] executing: {tool_call.name} with arguments: {args_str}") result = await tools.execute(tool_call.name, tool_call.arguments) messages.append({ "role": "tool", diff --git a/nanobot/agent/tools/base.py b/nanobot/agent/tools/base.py index 6fcfec6..ca9bcc2 100644 --- a/nanobot/agent/tools/base.py +++ b/nanobot/agent/tools/base.py @@ -12,6 +12,15 @@ class Tool(ABC): the environment, such as reading files, executing commands, etc. """ + _TYPE_MAP = { + "string": str, + "integer": int, + "number": (int, float), + "boolean": bool, + "array": list, + "object": dict, + } + @property @abstractmethod def name(self) -> str: @@ -42,6 +51,44 @@ class Tool(ABC): String result of the tool execution. """ pass + + def validate_params(self, params: dict[str, Any]) -> list[str]: + """Validate tool parameters against JSON schema. Returns error list (empty if valid).""" + schema = self.parameters or {} + if schema.get("type", "object") != "object": + raise ValueError(f"Schema must be object type, got {schema.get('type')!r}") + return self._validate(params, {**schema, "type": "object"}, "") + + def _validate(self, val: Any, schema: dict[str, Any], path: str) -> list[str]: + t, label = schema.get("type"), path or "parameter" + if t in self._TYPE_MAP and not isinstance(val, self._TYPE_MAP[t]): + return [f"{label} should be {t}"] + + errors = [] + if "enum" in schema and val not in schema["enum"]: + errors.append(f"{label} must be one of {schema['enum']}") + if t in ("integer", "number"): + if "minimum" in schema and val < schema["minimum"]: + errors.append(f"{label} must be >= {schema['minimum']}") + if "maximum" in schema and val > schema["maximum"]: + errors.append(f"{label} must be <= {schema['maximum']}") + if t == "string": + if "minLength" in schema and len(val) < schema["minLength"]: + errors.append(f"{label} must be at least {schema['minLength']} chars") + if "maxLength" in schema and len(val) > schema["maxLength"]: + errors.append(f"{label} must be at most {schema['maxLength']} chars") + if t == "object": + props = schema.get("properties", {}) + for k in schema.get("required", []): + if k not in val: + errors.append(f"missing required {path + '.' + k if path else k}") + for k, v in val.items(): + if k in props: + errors.extend(self._validate(v, props[k], path + '.' + k if path else k)) + if t == "array" and "items" in schema: + for i, item in enumerate(val): + errors.extend(self._validate(item, schema["items"], f"{path}[{i}]" if path else f"[{i}]")) + return errors def to_schema(self) -> dict[str, Any]: """Convert tool to OpenAI function schema format.""" diff --git a/nanobot/agent/tools/cron.py b/nanobot/agent/tools/cron.py new file mode 100644 index 0000000..ec0d2cd --- /dev/null +++ b/nanobot/agent/tools/cron.py @@ -0,0 +1,114 @@ +"""Cron tool for scheduling reminders and tasks.""" + +from typing import Any + +from nanobot.agent.tools.base import Tool +from nanobot.cron.service import CronService +from nanobot.cron.types import CronSchedule + + +class CronTool(Tool): + """Tool to schedule reminders and recurring tasks.""" + + def __init__(self, cron_service: CronService): + self._cron = cron_service + self._channel = "" + self._chat_id = "" + + def set_context(self, channel: str, chat_id: str) -> None: + """Set the current session context for delivery.""" + self._channel = channel + self._chat_id = chat_id + + @property + def name(self) -> str: + return "cron" + + @property + def description(self) -> str: + return "Schedule reminders and recurring tasks. Actions: add, list, remove." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["add", "list", "remove"], + "description": "Action to perform" + }, + "message": { + "type": "string", + "description": "Reminder message (for add)" + }, + "every_seconds": { + "type": "integer", + "description": "Interval in seconds (for recurring tasks)" + }, + "cron_expr": { + "type": "string", + "description": "Cron expression like '0 9 * * *' (for scheduled tasks)" + }, + "job_id": { + "type": "string", + "description": "Job ID (for remove)" + } + }, + "required": ["action"] + } + + async def execute( + self, + action: str, + message: str = "", + every_seconds: int | None = None, + cron_expr: str | None = None, + job_id: str | None = None, + **kwargs: Any + ) -> str: + if action == "add": + return self._add_job(message, every_seconds, cron_expr) + elif action == "list": + return self._list_jobs() + elif action == "remove": + return self._remove_job(job_id) + return f"Unknown action: {action}" + + def _add_job(self, message: str, every_seconds: int | None, cron_expr: str | None) -> str: + if not message: + return "Error: message is required for add" + if not self._channel or not self._chat_id: + return "Error: no session context (channel/chat_id)" + + # Build schedule + if every_seconds: + schedule = CronSchedule(kind="every", every_ms=every_seconds * 1000) + elif cron_expr: + schedule = CronSchedule(kind="cron", expr=cron_expr) + else: + return "Error: either every_seconds or cron_expr is required" + + job = self._cron.add_job( + name=message[:30], + schedule=schedule, + message=message, + deliver=True, + channel=self._channel, + to=self._chat_id, + ) + return f"Created job '{job.name}' (id: {job.id})" + + def _list_jobs(self) -> str: + jobs = self._cron.list_jobs() + if not jobs: + return "No scheduled jobs." + lines = [f"- {j.name} (id: {j.id}, {j.schedule.kind})" for j in jobs] + return "Scheduled jobs:\n" + "\n".join(lines) + + def _remove_job(self, job_id: str | None) -> str: + if not job_id: + return "Error: job_id is required for remove" + if self._cron.remove_job(job_id): + return f"Removed job {job_id}" + return f"Job {job_id} not found" diff --git a/nanobot/agent/tools/filesystem.py b/nanobot/agent/tools/filesystem.py index e141fab..6b3254a 100644 --- a/nanobot/agent/tools/filesystem.py +++ b/nanobot/agent/tools/filesystem.py @@ -6,9 +6,20 @@ from typing import Any from nanobot.agent.tools.base import Tool +def _resolve_path(path: str, allowed_dir: Path | None = None) -> Path: + """Resolve path and optionally enforce directory restriction.""" + resolved = Path(path).expanduser().resolve() + if allowed_dir and not str(resolved).startswith(str(allowed_dir.resolve())): + raise PermissionError(f"Path {path} is outside allowed directory {allowed_dir}") + return resolved + + class ReadFileTool(Tool): """Tool to read file contents.""" + def __init__(self, allowed_dir: Path | None = None): + self._allowed_dir = allowed_dir + @property def name(self) -> str: return "read_file" @@ -32,7 +43,7 @@ class ReadFileTool(Tool): async def execute(self, path: str, **kwargs: Any) -> str: try: - file_path = Path(path).expanduser() + file_path = _resolve_path(path, self._allowed_dir) if not file_path.exists(): return f"Error: File not found: {path}" if not file_path.is_file(): @@ -40,8 +51,8 @@ class ReadFileTool(Tool): content = file_path.read_text(encoding="utf-8") return content - except PermissionError: - return f"Error: Permission denied: {path}" + except PermissionError as e: + return f"Error: {e}" except Exception as e: return f"Error reading file: {str(e)}" @@ -49,6 +60,9 @@ class ReadFileTool(Tool): class WriteFileTool(Tool): """Tool to write content to a file.""" + def __init__(self, allowed_dir: Path | None = None): + self._allowed_dir = allowed_dir + @property def name(self) -> str: return "write_file" @@ -76,12 +90,12 @@ class WriteFileTool(Tool): async def execute(self, path: str, content: str, **kwargs: Any) -> str: try: - file_path = Path(path).expanduser() + file_path = _resolve_path(path, self._allowed_dir) file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(content, encoding="utf-8") return f"Successfully wrote {len(content)} bytes to {path}" - except PermissionError: - return f"Error: Permission denied: {path}" + except PermissionError as e: + return f"Error: {e}" except Exception as e: return f"Error writing file: {str(e)}" @@ -89,6 +103,9 @@ class WriteFileTool(Tool): class EditFileTool(Tool): """Tool to edit a file by replacing text.""" + def __init__(self, allowed_dir: Path | None = None): + self._allowed_dir = allowed_dir + @property def name(self) -> str: return "edit_file" @@ -120,7 +137,7 @@ class EditFileTool(Tool): async def execute(self, path: str, old_text: str, new_text: str, **kwargs: Any) -> str: try: - file_path = Path(path).expanduser() + file_path = _resolve_path(path, self._allowed_dir) if not file_path.exists(): return f"Error: File not found: {path}" @@ -138,8 +155,8 @@ class EditFileTool(Tool): file_path.write_text(new_content, encoding="utf-8") return f"Successfully edited {path}" - except PermissionError: - return f"Error: Permission denied: {path}" + except PermissionError as e: + return f"Error: {e}" except Exception as e: return f"Error editing file: {str(e)}" @@ -147,6 +164,9 @@ class EditFileTool(Tool): class ListDirTool(Tool): """Tool to list directory contents.""" + def __init__(self, allowed_dir: Path | None = None): + self._allowed_dir = allowed_dir + @property def name(self) -> str: return "list_dir" @@ -170,7 +190,7 @@ class ListDirTool(Tool): async def execute(self, path: str, **kwargs: Any) -> str: try: - dir_path = Path(path).expanduser() + dir_path = _resolve_path(path, self._allowed_dir) if not dir_path.exists(): return f"Error: Directory not found: {path}" if not dir_path.is_dir(): @@ -185,7 +205,7 @@ class ListDirTool(Tool): return f"Directory {path} is empty" return "\n".join(items) - except PermissionError: - return f"Error: Permission denied: {path}" + except PermissionError as e: + return f"Error: {e}" except Exception as e: return f"Error listing directory: {str(e)}" diff --git a/nanobot/agent/tools/registry.py b/nanobot/agent/tools/registry.py index 1e8f56d..d9b33ff 100644 --- a/nanobot/agent/tools/registry.py +++ b/nanobot/agent/tools/registry.py @@ -52,8 +52,11 @@ class ToolRegistry: tool = self._tools.get(name) if not tool: return f"Error: Tool '{name}' not found" - + try: + errors = tool.validate_params(params) + if errors: + return f"Error: Invalid parameters for tool '{name}': " + "; ".join(errors) return await tool.execute(**params) except Exception as e: return f"Error executing {name}: {str(e)}" diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py index bf7f064..143d187 100644 --- a/nanobot/agent/tools/shell.py +++ b/nanobot/agent/tools/shell.py @@ -2,6 +2,8 @@ import asyncio import os +import re +from pathlib import Path from typing import Any from nanobot.agent.tools.base import Tool @@ -10,9 +12,28 @@ from nanobot.agent.tools.base import Tool class ExecTool(Tool): """Tool to execute shell commands.""" - def __init__(self, timeout: int = 60, working_dir: str | None = None): + def __init__( + self, + timeout: int = 60, + working_dir: str | None = None, + deny_patterns: list[str] | None = None, + allow_patterns: list[str] | None = None, + restrict_to_workspace: bool = False, + ): self.timeout = timeout self.working_dir = working_dir + self.deny_patterns = deny_patterns or [ + r"\brm\s+-[rf]{1,2}\b", # rm -r, rm -rf, rm -fr + r"\bdel\s+/[fq]\b", # del /f, del /q + r"\brmdir\s+/s\b", # rmdir /s + r"\b(format|mkfs|diskpart)\b", # disk operations + r"\bdd\s+if=", # dd + r">\s*/dev/sd", # write to disk + r"\b(shutdown|reboot|poweroff)\b", # system power + r":\(\)\s*\{.*\};\s*:", # fork bomb + ] + self.allow_patterns = allow_patterns or [] + self.restrict_to_workspace = restrict_to_workspace @property def name(self) -> str: @@ -41,6 +62,9 @@ class ExecTool(Tool): async def execute(self, command: str, working_dir: str | None = None, **kwargs: Any) -> str: cwd = working_dir or self.working_dir or os.getcwd() + guard_error = self._guard_command(command, cwd) + if guard_error: + return guard_error try: process = await asyncio.create_subprocess_shell( @@ -83,3 +107,35 @@ class ExecTool(Tool): except Exception as e: return f"Error executing command: {str(e)}" + + def _guard_command(self, command: str, cwd: str) -> str | None: + """Best-effort safety guard for potentially destructive commands.""" + cmd = command.strip() + lower = cmd.lower() + + for pattern in self.deny_patterns: + if re.search(pattern, lower): + return "Error: Command blocked by safety guard (dangerous pattern detected)" + + if self.allow_patterns: + if not any(re.search(p, lower) for p in self.allow_patterns): + return "Error: Command blocked by safety guard (not in allowlist)" + + if self.restrict_to_workspace: + if "..\\" in cmd or "../" in cmd: + return "Error: Command blocked by safety guard (path traversal detected)" + + cwd_path = Path(cwd).resolve() + + win_paths = re.findall(r"[A-Za-z]:\\[^\\\"']+", cmd) + posix_paths = re.findall(r"/[^\s\"']+", cmd) + + for raw in win_paths + posix_paths: + try: + p = Path(raw).resolve() + except Exception: + continue + if cwd_path not in p.parents and p != cwd_path: + return "Error: Command blocked by safety guard (path outside working dir)" + + return None diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index c9d989c..9de1d3c 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -5,6 +5,7 @@ import json import os import re from typing import Any +from urllib.parse import urlparse import httpx @@ -12,6 +13,7 @@ from nanobot.agent.tools.base import Tool # Shared constants USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36" +MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks def _strip_tags(text: str) -> str: @@ -28,6 +30,19 @@ def _normalize(text: str) -> str: return re.sub(r'\n{3,}', '\n\n', text).strip() +def _validate_url(url: str) -> tuple[bool, str]: + """Validate URL: must be http(s) with valid domain.""" + try: + p = urlparse(url) + if p.scheme not in ('http', 'https'): + return False, f"Only http/https allowed, got '{p.scheme or 'none'}'" + if not p.netloc: + return False, "Missing domain" + return True, "" + except Exception as e: + return False, str(e) + + class WebSearchTool(Tool): """Search the web using Brave Search API.""" @@ -95,12 +110,21 @@ class WebFetchTool(Tool): async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str: from readability import Document - + max_chars = maxChars or self.max_chars - + + # Validate URL before fetching + is_valid, error_msg = _validate_url(url) + if not is_valid: + return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}) + try: - async with httpx.AsyncClient() as client: - r = await client.get(url, headers={"User-Agent": USER_AGENT}, follow_redirects=True, timeout=30.0) + async with httpx.AsyncClient( + follow_redirects=True, + max_redirects=MAX_REDIRECTS, + timeout=30.0 + ) as client: + r = await client.get(url, headers={"User-Agent": USER_AGENT}) r.raise_for_status() ctype = r.headers.get("content-type", "") diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index 8f16399..30fcd1a 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import Any +from loguru import logger + from nanobot.bus.events import InboundMessage, OutboundMessage from nanobot.bus.queue import MessageBus @@ -102,6 +104,10 @@ class BaseChannel(ABC): metadata: Optional channel-specific metadata. """ if not self.is_allowed(sender_id): + logger.warning( + f"Access denied for sender {sender_id} on channel {self.name}. " + f"Add them to allowFrom list in config to grant access." + ) return msg = InboundMessage( diff --git a/nanobot/channels/discord.py b/nanobot/channels/discord.py new file mode 100644 index 0000000..a76d6ac --- /dev/null +++ b/nanobot/channels/discord.py @@ -0,0 +1,261 @@ +"""Discord channel implementation using Discord Gateway websocket.""" + +import asyncio +import json +from pathlib import Path +from typing import Any + +import httpx +import websockets +from loguru import logger + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import DiscordConfig + + +DISCORD_API_BASE = "https://discord.com/api/v10" +MAX_ATTACHMENT_BYTES = 20 * 1024 * 1024 # 20MB + + +class DiscordChannel(BaseChannel): + """Discord channel using Gateway websocket.""" + + name = "discord" + + def __init__(self, config: DiscordConfig, bus: MessageBus): + super().__init__(config, bus) + self.config: DiscordConfig = config + self._ws: websockets.WebSocketClientProtocol | None = None + self._seq: int | None = None + self._heartbeat_task: asyncio.Task | None = None + self._typing_tasks: dict[str, asyncio.Task] = {} + self._http: httpx.AsyncClient | None = None + + async def start(self) -> None: + """Start the Discord gateway connection.""" + if not self.config.token: + logger.error("Discord bot token not configured") + return + + self._running = True + self._http = httpx.AsyncClient(timeout=30.0) + + while self._running: + try: + logger.info("Connecting to Discord gateway...") + async with websockets.connect(self.config.gateway_url) as ws: + self._ws = ws + await self._gateway_loop() + except asyncio.CancelledError: + break + except Exception as e: + logger.warning(f"Discord gateway error: {e}") + if self._running: + logger.info("Reconnecting to Discord gateway in 5 seconds...") + await asyncio.sleep(5) + + async def stop(self) -> None: + """Stop the Discord channel.""" + self._running = False + if self._heartbeat_task: + self._heartbeat_task.cancel() + self._heartbeat_task = None + for task in self._typing_tasks.values(): + task.cancel() + self._typing_tasks.clear() + if self._ws: + await self._ws.close() + self._ws = None + if self._http: + await self._http.aclose() + self._http = None + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through Discord REST API.""" + if not self._http: + logger.warning("Discord HTTP client not initialized") + return + + url = f"{DISCORD_API_BASE}/channels/{msg.chat_id}/messages" + payload: dict[str, Any] = {"content": msg.content} + + if msg.reply_to: + payload["message_reference"] = {"message_id": msg.reply_to} + payload["allowed_mentions"] = {"replied_user": False} + + headers = {"Authorization": f"Bot {self.config.token}"} + + try: + for attempt in range(3): + try: + response = await self._http.post(url, headers=headers, json=payload) + if response.status_code == 429: + data = response.json() + retry_after = float(data.get("retry_after", 1.0)) + logger.warning(f"Discord rate limited, retrying in {retry_after}s") + await asyncio.sleep(retry_after) + continue + response.raise_for_status() + return + except Exception as e: + if attempt == 2: + logger.error(f"Error sending Discord message: {e}") + else: + await asyncio.sleep(1) + finally: + await self._stop_typing(msg.chat_id) + + async def _gateway_loop(self) -> None: + """Main gateway loop: identify, heartbeat, dispatch events.""" + if not self._ws: + return + + async for raw in self._ws: + try: + data = json.loads(raw) + except json.JSONDecodeError: + logger.warning(f"Invalid JSON from Discord gateway: {raw[:100]}") + continue + + op = data.get("op") + event_type = data.get("t") + seq = data.get("s") + payload = data.get("d") + + if seq is not None: + self._seq = seq + + if op == 10: + # HELLO: start heartbeat and identify + interval_ms = payload.get("heartbeat_interval", 45000) + await self._start_heartbeat(interval_ms / 1000) + await self._identify() + elif op == 0 and event_type == "READY": + logger.info("Discord gateway READY") + elif op == 0 and event_type == "MESSAGE_CREATE": + await self._handle_message_create(payload) + elif op == 7: + # RECONNECT: exit loop to reconnect + logger.info("Discord gateway requested reconnect") + break + elif op == 9: + # INVALID_SESSION: reconnect + logger.warning("Discord gateway invalid session") + break + + async def _identify(self) -> None: + """Send IDENTIFY payload.""" + if not self._ws: + return + + identify = { + "op": 2, + "d": { + "token": self.config.token, + "intents": self.config.intents, + "properties": { + "os": "nanobot", + "browser": "nanobot", + "device": "nanobot", + }, + }, + } + await self._ws.send(json.dumps(identify)) + + async def _start_heartbeat(self, interval_s: float) -> None: + """Start or restart the heartbeat loop.""" + if self._heartbeat_task: + self._heartbeat_task.cancel() + + async def heartbeat_loop() -> None: + while self._running and self._ws: + payload = {"op": 1, "d": self._seq} + try: + await self._ws.send(json.dumps(payload)) + except Exception as e: + logger.warning(f"Discord heartbeat failed: {e}") + break + await asyncio.sleep(interval_s) + + self._heartbeat_task = asyncio.create_task(heartbeat_loop()) + + async def _handle_message_create(self, payload: dict[str, Any]) -> None: + """Handle incoming Discord messages.""" + author = payload.get("author") or {} + if author.get("bot"): + return + + sender_id = str(author.get("id", "")) + channel_id = str(payload.get("channel_id", "")) + content = payload.get("content") or "" + + if not sender_id or not channel_id: + return + + if not self.is_allowed(sender_id): + return + + content_parts = [content] if content else [] + media_paths: list[str] = [] + media_dir = Path.home() / ".nanobot" / "media" + + for attachment in payload.get("attachments") or []: + url = attachment.get("url") + filename = attachment.get("filename") or "attachment" + size = attachment.get("size") or 0 + if not url or not self._http: + continue + if size and size > MAX_ATTACHMENT_BYTES: + content_parts.append(f"[attachment: {filename} - too large]") + continue + try: + media_dir.mkdir(parents=True, exist_ok=True) + file_path = media_dir / f"{attachment.get('id', 'file')}_{filename.replace('/', '_')}" + resp = await self._http.get(url) + resp.raise_for_status() + file_path.write_bytes(resp.content) + media_paths.append(str(file_path)) + content_parts.append(f"[attachment: {file_path}]") + except Exception as e: + logger.warning(f"Failed to download Discord attachment: {e}") + content_parts.append(f"[attachment: {filename} - download failed]") + + reply_to = (payload.get("referenced_message") or {}).get("id") + + await self._start_typing(channel_id) + + await self._handle_message( + sender_id=sender_id, + chat_id=channel_id, + content="\n".join(p for p in content_parts if p) or "[empty message]", + media=media_paths, + metadata={ + "message_id": str(payload.get("id", "")), + "guild_id": payload.get("guild_id"), + "reply_to": reply_to, + }, + ) + + async def _start_typing(self, channel_id: str) -> None: + """Start periodic typing indicator for a channel.""" + await self._stop_typing(channel_id) + + async def typing_loop() -> None: + url = f"{DISCORD_API_BASE}/channels/{channel_id}/typing" + headers = {"Authorization": f"Bot {self.config.token}"} + while self._running: + try: + await self._http.post(url, headers=headers) + except Exception: + pass + await asyncio.sleep(8) + + self._typing_tasks[channel_id] = asyncio.create_task(typing_loop()) + + async def _stop_typing(self, channel_id: str) -> None: + """Stop typing indicator for a channel.""" + task = self._typing_tasks.pop(channel_id, None) + if task: + task.cancel() diff --git a/nanobot/channels/feishu.py b/nanobot/channels/feishu.py new file mode 100644 index 0000000..01b808e --- /dev/null +++ b/nanobot/channels/feishu.py @@ -0,0 +1,263 @@ +"""Feishu/Lark channel implementation using lark-oapi SDK with WebSocket long connection.""" + +import asyncio +import json +import threading +from collections import OrderedDict +from typing import Any + +from loguru import logger + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.config.schema import FeishuConfig + +try: + import lark_oapi as lark + from lark_oapi.api.im.v1 import ( + CreateMessageRequest, + CreateMessageRequestBody, + CreateMessageReactionRequest, + CreateMessageReactionRequestBody, + Emoji, + P2ImMessageReceiveV1, + ) + FEISHU_AVAILABLE = True +except ImportError: + FEISHU_AVAILABLE = False + lark = None + Emoji = None + +# Message type display mapping +MSG_TYPE_MAP = { + "image": "[image]", + "audio": "[audio]", + "file": "[file]", + "sticker": "[sticker]", +} + + +class FeishuChannel(BaseChannel): + """ + Feishu/Lark channel using WebSocket long connection. + + Uses WebSocket to receive events - no public IP or webhook required. + + Requires: + - App ID and App Secret from Feishu Open Platform + - Bot capability enabled + - Event subscription enabled (im.message.receive_v1) + """ + + name = "feishu" + + def __init__(self, config: FeishuConfig, bus: MessageBus): + super().__init__(config, bus) + self.config: FeishuConfig = config + self._client: Any = None + self._ws_client: Any = None + self._ws_thread: threading.Thread | None = None + self._processed_message_ids: OrderedDict[str, None] = OrderedDict() # Ordered dedup cache + self._loop: asyncio.AbstractEventLoop | None = None + + async def start(self) -> None: + """Start the Feishu bot with WebSocket long connection.""" + if not FEISHU_AVAILABLE: + logger.error("Feishu SDK not installed. Run: pip install lark-oapi") + return + + if not self.config.app_id or not self.config.app_secret: + logger.error("Feishu app_id and app_secret not configured") + return + + self._running = True + self._loop = asyncio.get_running_loop() + + # Create Lark client for sending messages + self._client = lark.Client.builder() \ + .app_id(self.config.app_id) \ + .app_secret(self.config.app_secret) \ + .log_level(lark.LogLevel.INFO) \ + .build() + + # Create event handler (only register message receive, ignore other events) + event_handler = lark.EventDispatcherHandler.builder( + self.config.encrypt_key or "", + self.config.verification_token or "", + ).register_p2_im_message_receive_v1( + self._on_message_sync + ).build() + + # Create WebSocket client for long connection + self._ws_client = lark.ws.Client( + self.config.app_id, + self.config.app_secret, + event_handler=event_handler, + log_level=lark.LogLevel.INFO + ) + + # Start WebSocket client in a separate thread + def run_ws(): + try: + self._ws_client.start() + except Exception as e: + logger.error(f"Feishu WebSocket error: {e}") + + self._ws_thread = threading.Thread(target=run_ws, daemon=True) + self._ws_thread.start() + + logger.info("Feishu bot started with WebSocket long connection") + logger.info("No public IP required - using WebSocket to receive events") + + # Keep running until stopped + while self._running: + await asyncio.sleep(1) + + async def stop(self) -> None: + """Stop the Feishu bot.""" + self._running = False + if self._ws_client: + try: + self._ws_client.stop() + except Exception as e: + logger.warning(f"Error stopping WebSocket client: {e}") + logger.info("Feishu bot stopped") + + def _add_reaction_sync(self, message_id: str, emoji_type: str) -> None: + """Sync helper for adding reaction (runs in thread pool).""" + try: + request = CreateMessageReactionRequest.builder() \ + .message_id(message_id) \ + .request_body( + CreateMessageReactionRequestBody.builder() + .reaction_type(Emoji.builder().emoji_type(emoji_type).build()) + .build() + ).build() + + response = self._client.im.v1.message_reaction.create(request) + + if not response.success(): + logger.warning(f"Failed to add reaction: code={response.code}, msg={response.msg}") + else: + logger.debug(f"Added {emoji_type} reaction to message {message_id}") + except Exception as e: + logger.warning(f"Error adding reaction: {e}") + + async def _add_reaction(self, message_id: str, emoji_type: str = "THUMBSUP") -> None: + """ + Add a reaction emoji to a message (non-blocking). + + Common emoji types: THUMBSUP, OK, EYES, DONE, OnIt, HEART + """ + if not self._client or not Emoji: + return + + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, self._add_reaction_sync, message_id, emoji_type) + + async def send(self, msg: OutboundMessage) -> None: + """Send a message through Feishu.""" + if not self._client: + logger.warning("Feishu client not initialized") + return + + try: + # Determine receive_id_type based on chat_id format + # open_id starts with "ou_", chat_id starts with "oc_" + if msg.chat_id.startswith("oc_"): + receive_id_type = "chat_id" + else: + receive_id_type = "open_id" + + # Build text message content + content = json.dumps({"text": msg.content}) + + request = CreateMessageRequest.builder() \ + .receive_id_type(receive_id_type) \ + .request_body( + CreateMessageRequestBody.builder() + .receive_id(msg.chat_id) + .msg_type("text") + .content(content) + .build() + ).build() + + response = self._client.im.v1.message.create(request) + + if not response.success(): + logger.error( + f"Failed to send Feishu message: code={response.code}, " + f"msg={response.msg}, log_id={response.get_log_id()}" + ) + else: + logger.debug(f"Feishu message sent to {msg.chat_id}") + + except Exception as e: + logger.error(f"Error sending Feishu message: {e}") + + def _on_message_sync(self, data: "P2ImMessageReceiveV1") -> None: + """ + Sync handler for incoming messages (called from WebSocket thread). + Schedules async handling in the main event loop. + """ + if self._loop and self._loop.is_running(): + asyncio.run_coroutine_threadsafe(self._on_message(data), self._loop) + + async def _on_message(self, data: "P2ImMessageReceiveV1") -> None: + """Handle incoming message from Feishu.""" + try: + event = data.event + message = event.message + sender = event.sender + + # Deduplication check + message_id = message.message_id + if message_id in self._processed_message_ids: + return + self._processed_message_ids[message_id] = None + + # Trim cache: keep most recent 500 when exceeds 1000 + while len(self._processed_message_ids) > 1000: + self._processed_message_ids.popitem(last=False) + + # Skip bot messages + sender_type = sender.sender_type + if sender_type == "bot": + return + + sender_id = sender.sender_id.open_id if sender.sender_id else "unknown" + chat_id = message.chat_id + chat_type = message.chat_type # "p2p" or "group" + msg_type = message.message_type + + # Add reaction to indicate "seen" + await self._add_reaction(message_id, "THUMBSUP") + + # Parse message content + if msg_type == "text": + try: + content = json.loads(message.content).get("text", "") + except json.JSONDecodeError: + content = message.content or "" + else: + content = MSG_TYPE_MAP.get(msg_type, f"[{msg_type}]") + + if not content: + return + + # Forward to message bus + reply_to = chat_id if chat_type == "group" else sender_id + await self._handle_message( + sender_id=sender_id, + chat_id=reply_to, + content=content, + metadata={ + "message_id": message_id, + "chat_type": chat_type, + "msg_type": msg_type, + } + ) + + except Exception as e: + logger.error(f"Error processing Feishu message: {e}") diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 04abf5f..64ced48 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -37,7 +37,9 @@ class ChannelManager: try: from nanobot.channels.telegram import TelegramChannel self.channels["telegram"] = TelegramChannel( - self.config.channels.telegram, self.bus + self.config.channels.telegram, + self.bus, + groq_api_key=self.config.providers.groq.api_key, ) logger.info("Telegram channel enabled") except ImportError as e: @@ -53,6 +55,28 @@ class ChannelManager: logger.info("WhatsApp channel enabled") except ImportError as e: logger.warning(f"WhatsApp channel not available: {e}") + + # Discord channel + if self.config.channels.discord.enabled: + try: + from nanobot.channels.discord import DiscordChannel + self.channels["discord"] = DiscordChannel( + self.config.channels.discord, self.bus + ) + logger.info("Discord channel enabled") + except ImportError as e: + logger.warning(f"Discord channel not available: {e}") + + # Feishu channel + if self.config.channels.feishu.enabled: + try: + from nanobot.channels.feishu import FeishuChannel + self.channels["feishu"] = FeishuChannel( + self.config.channels.feishu, self.bus + ) + logger.info("Feishu channel enabled") + except ImportError as e: + logger.warning(f"Feishu channel not available: {e}") async def start_all(self) -> None: """Start WhatsApp channel and the outbound dispatcher.""" diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index ac2dba4..23e1de0 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel): name = "telegram" - def __init__(self, config: TelegramConfig, bus: MessageBus): + def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""): super().__init__(config, bus) self.config: TelegramConfig = config + self.groq_api_key = groq_api_key self._app: Application | None = None self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies @@ -249,7 +250,20 @@ class TelegramChannel(BaseChannel): await file.download_to_drive(str(file_path)) media_paths.append(str(file_path)) - content_parts.append(f"[{media_type}: {file_path}]") + + # Handle voice transcription + if media_type == "voice" or media_type == "audio": + from nanobot.providers.transcription import GroqTranscriptionProvider + transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) + transcription = await transcriber.transcribe(file_path) + if transcription: + logger.info(f"Transcribed {media_type}: {transcription[:50]}...") + content_parts.append(f"[transcription: {transcription}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + logger.debug(f"Downloaded {media_type} to {file_path}") except Exception as e: logger.error(f"Failed to download media: {e}") diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index efbd3e1..c14a6c3 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel): # Extract just the phone number as chat_id chat_id = sender.split("@")[0] if "@" in sender else sender + # Handle voice transcription if it's a voice message + if content == "[Voice Message]": + logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.") + content = "[Voice Message: Transcription not available for WhatsApp yet]" + await self._handle_message( sender_id=chat_id, chat_id=sender, # Use full JID for replies diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index d293564..bc2ea74 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -178,11 +178,13 @@ def gateway( # Create components bus = MessageBus() - # Create provider (supports OpenRouter, Anthropic, OpenAI) + # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock) api_key = config.get_api_key() api_base = config.get_api_base() - - if not api_key: + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: console.print("[red]Error: No API key configured.[/red]") console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey") raise typer.Exit(1) @@ -193,35 +195,41 @@ def gateway( default_model=config.agents.defaults.model ) - # Create agent + # Create cron service first (callback set after agent creation) + cron_store_path = get_data_dir() / "cron" / "jobs.json" + cron = CronService(cron_store_path) + + # Create agent with cron service agent = AgentLoop( bus=bus, provider=provider, workspace=config.workspace_path, model=config.agents.defaults.model, max_iterations=config.agents.defaults.max_tool_iterations, - brave_api_key=config.tools.web.search.api_key or None + brave_api_key=config.tools.web.search.api_key or None, + exec_config=config.tools.exec, + cron_service=cron, + restrict_to_workspace=config.tools.restrict_to_workspace, ) - # Create cron service + # Set cron callback (needs agent) async def on_cron_job(job: CronJob) -> str | None: """Execute a cron job through the agent.""" response = await agent.process_direct( job.payload.message, - session_key=f"cron:{job.id}" + session_key=f"cron:{job.id}", + channel=job.payload.channel or "cli", + chat_id=job.payload.to or "direct", ) - # Optionally deliver to channel if job.payload.deliver and job.payload.to: from nanobot.bus.events import OutboundMessage await bus.publish_outbound(OutboundMessage( - channel=job.payload.channel or "whatsapp", + channel=job.payload.channel or "cli", chat_id=job.payload.to, content=response or "" )) return response - - cron_store_path = get_data_dir() / "cron" / "jobs.json" - cron = CronService(cron_store_path, on_job=on_cron_job) + cron.on_job = on_cron_job # Create heartbeat service async def on_heartbeat(prompt: str) -> str: @@ -289,11 +297,13 @@ def agent( api_key = config.get_api_key() api_base = config.get_api_base() - - if not api_key: + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: console.print("[red]Error: No API key configured.[/red]") raise typer.Exit(1) - + bus = MessageBus() provider = LiteLLMProvider( api_key=api_key, @@ -305,7 +315,9 @@ def agent( bus=bus, provider=provider, workspace=config.workspace_path, - brave_api_key=config.tools.web.search.api_key or None + brave_api_key=config.tools.web.search.api_key or None, + exec_config=config.tools.exec, + restrict_to_workspace=config.tools.restrict_to_workspace, ) if message: @@ -348,21 +360,38 @@ app.add_typer(channels_app, name="channels") def channels_status(): """Show channel status.""" from nanobot.config.loader import load_config - + config = load_config() - + table = Table(title="Channel Status") table.add_column("Channel", style="cyan") table.add_column("Enabled", style="green") - table.add_column("Bridge URL", style="yellow") - + table.add_column("Configuration", style="yellow") + + # WhatsApp wa = config.channels.whatsapp table.add_row( "WhatsApp", "βœ“" if wa.enabled else "βœ—", wa.bridge_url ) + + dc = config.channels.discord + table.add_row( + "Discord", + "βœ“" if dc.enabled else "βœ—", + dc.gateway_url + ) + # Telegram + tg = config.channels.telegram + tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]" + table.add_row( + "Telegram", + "βœ“" if tg.enabled else "βœ—", + tg_config + ) + console.print(table) @@ -384,7 +413,7 @@ def _get_bridge_dir() -> Path: raise typer.Exit(1) # Find source bridge: first check package data, then source dir - pkg_bridge = Path(__file__).parent / "bridge" # nanobot/bridge (installed) + pkg_bridge = Path(__file__).parent.parent / "bridge" # nanobot/bridge (installed) src_bridge = Path(__file__).parent.parent.parent / "bridge" # repo root/bridge (dev) source = None @@ -608,18 +637,17 @@ def cron_run( def status(): """Show nanobot status.""" from nanobot.config.loader import load_config, get_config_path - from nanobot.utils.helpers import get_workspace_path - + config_path = get_config_path() - workspace = get_workspace_path() - + config = load_config() + workspace = config.workspace_path + console.print(f"{__logo__} nanobot Status\n") - + console.print(f"Config: {config_path} {'[green]βœ“[/green]' if config_path.exists() else '[red]βœ—[/red]'}") console.print(f"Workspace: {workspace} {'[green]βœ“[/green]' if workspace.exists() else '[red]βœ—[/red]'}") - + if config_path.exists(): - config = load_config() console.print(f"Model: {config.agents.defaults.model}") # Check API keys diff --git a/nanobot/config/loader.py b/nanobot/config/loader.py index f8de881..fd7d1e8 100644 --- a/nanobot/config/loader.py +++ b/nanobot/config/loader.py @@ -34,6 +34,7 @@ def load_config(config_path: Path | None = None) -> Config: try: with open(path) as f: data = json.load(f) + data = _migrate_config(data) return Config.model_validate(convert_keys(data)) except (json.JSONDecodeError, ValueError) as e: print(f"Warning: Failed to load config from {path}: {e}") @@ -61,6 +62,16 @@ def save_config(config: Config, config_path: Path | None = None) -> None: json.dump(data, f, indent=2) +def _migrate_config(data: dict) -> dict: + """Migrate old config formats to current.""" + # Move tools.exec.restrictToWorkspace β†’ tools.restrictToWorkspace + tools = data.get("tools", {}) + exec_cfg = tools.get("exec", {}) + if "restrictToWorkspace" in exec_cfg and "restrictToWorkspace" not in tools: + tools["restrictToWorkspace"] = exec_cfg.pop("restrictToWorkspace") + return data + + def convert_keys(data: Any) -> Any: """Convert camelCase keys to snake_case for Pydantic.""" if isinstance(data, dict): diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index c2109a1..590fd19 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -17,12 +17,34 @@ class TelegramConfig(BaseModel): enabled: bool = False token: str = "" # Bot token from @BotFather allow_from: list[str] = Field(default_factory=list) # Allowed user IDs or usernames + proxy: str | None = None # HTTP/SOCKS5 proxy URL, e.g. "http://127.0.0.1:7890" or "socks5://127.0.0.1:1080" + + +class FeishuConfig(BaseModel): + """Feishu/Lark channel configuration using WebSocket long connection.""" + enabled: bool = False + app_id: str = "" # App ID from Feishu Open Platform + app_secret: str = "" # App Secret from Feishu Open Platform + encrypt_key: str = "" # Encrypt Key for event subscription (optional) + verification_token: str = "" # Verification Token for event subscription (optional) + allow_from: list[str] = Field(default_factory=list) # Allowed user open_ids + + +class DiscordConfig(BaseModel): + """Discord channel configuration.""" + enabled: bool = False + token: str = "" # Bot token from Discord Developer Portal + allow_from: list[str] = Field(default_factory=list) # Allowed user IDs + gateway_url: str = "wss://gateway.discord.gg/?v=10&encoding=json" + intents: int = 37377 # GUILDS + GUILD_MESSAGES + DIRECT_MESSAGES + MESSAGE_CONTENT class ChannelsConfig(BaseModel): """Configuration for chat channels.""" whatsapp: WhatsAppConfig = Field(default_factory=WhatsAppConfig) telegram: TelegramConfig = Field(default_factory=TelegramConfig) + discord: DiscordConfig = Field(default_factory=DiscordConfig) + feishu: FeishuConfig = Field(default_factory=FeishuConfig) class AgentDefaults(BaseModel): @@ -50,9 +72,12 @@ class ProvidersConfig(BaseModel): anthropic: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + deepseek: ProviderConfig = Field(default_factory=ProviderConfig) + groq: ProviderConfig = Field(default_factory=ProviderConfig) zhipu: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig) + moonshot: ProviderConfig = Field(default_factory=ProviderConfig) class GatewayConfig(BaseModel): @@ -72,9 +97,16 @@ class WebToolsConfig(BaseModel): search: WebSearchConfig = Field(default_factory=WebSearchConfig) +class ExecToolConfig(BaseModel): + """Shell exec tool configuration.""" + timeout: int = 60 + + class ToolsConfig(BaseModel): """Tools configuration.""" web: WebToolsConfig = Field(default_factory=WebToolsConfig) + exec: ExecToolConfig = Field(default_factory=ExecToolConfig) + restrict_to_workspace: bool = False # If true, restrict all tool access to workspace directory class Config(BaseSettings): @@ -90,25 +122,57 @@ class Config(BaseSettings): """Get expanded workspace path.""" return Path(self.agents.defaults.workspace).expanduser() - def get_api_key(self) -> str | None: - """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > vLLM.""" - return ( - self.providers.openrouter.api_key or - self.providers.anthropic.api_key or - self.providers.openai.api_key or - self.providers.gemini.api_key or - self.providers.zhipu.api_key or - self.providers.vllm.api_key or - None - ) + def _match_provider(self, model: str | None = None) -> ProviderConfig | None: + """Match a provider based on model name.""" + model = (model or self.agents.defaults.model).lower() + # Map of keywords to provider configs + providers = { + "openrouter": self.providers.openrouter, + "deepseek": self.providers.deepseek, + "anthropic": self.providers.anthropic, + "claude": self.providers.anthropic, + "openai": self.providers.openai, + "gpt": self.providers.openai, + "gemini": self.providers.gemini, + "zhipu": self.providers.zhipu, + "glm": self.providers.zhipu, + "zai": self.providers.zhipu, + "groq": self.providers.groq, + "moonshot": self.providers.moonshot, + "kimi": self.providers.moonshot, + "vllm": self.providers.vllm, + } + for keyword, provider in providers.items(): + if keyword in model and provider.api_key: + return provider + return None + + def get_api_key(self, model: str | None = None) -> str | None: + """Get API key for the given model (or default model). Falls back to first available key.""" + # Try matching by model name first + matched = self._match_provider(model) + if matched: + return matched.api_key + # Fallback: return first available key + for provider in [ + self.providers.openrouter, self.providers.deepseek, + self.providers.anthropic, self.providers.openai, + self.providers.gemini, self.providers.zhipu, + self.providers.moonshot, self.providers.vllm, + self.providers.groq, + ]: + if provider.api_key: + return provider.api_key + return None - def get_api_base(self) -> str | None: - """Get API base URL if using OpenRouter, Zhipu or vLLM.""" - if self.providers.openrouter.api_key: + def get_api_base(self, model: str | None = None) -> str | None: + """Get API base URL based on model name.""" + model = (model or self.agents.defaults.model).lower() + if "openrouter" in model: return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" - if self.providers.zhipu.api_key: + if any(k in model for k in ("zhipu", "glm", "zai")): return self.providers.zhipu.api_base - if self.providers.vllm.api_base: + if "vllm" in model: return self.providers.vllm.api_base return None diff --git a/nanobot/heartbeat/service.py b/nanobot/heartbeat/service.py index 4cb469e..221ed27 100644 --- a/nanobot/heartbeat/service.py +++ b/nanobot/heartbeat/service.py @@ -115,7 +115,7 @@ class HeartbeatService: response = await self.on_heartbeat(HEARTBEAT_PROMPT) # Check if agent said "nothing to do" - if HEARTBEAT_OK_TOKEN in response.upper().replace("_", ""): + if HEARTBEAT_OK_TOKEN.replace("_", "") in response.upper().replace("_", ""): logger.info("Heartbeat: OK (no action needed)") else: logger.info(f"Heartbeat: completed task") diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 332fec0..085bbef 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -43,6 +43,8 @@ class LiteLLMProvider(LLMProvider): elif self.is_vllm: # vLLM/custom endpoint - uses OpenAI-compatible API os.environ["HOSTED_VLLM_API_KEY"] = api_key + elif "deepseek" in default_model: + os.environ.setdefault("DEEPSEEK_API_KEY", api_key) elif "anthropic" in default_model: os.environ.setdefault("ANTHROPIC_API_KEY", api_key) elif "openai" in default_model or "gpt" in default_model: @@ -51,6 +53,11 @@ class LiteLLMProvider(LLMProvider): os.environ.setdefault("GEMINI_API_KEY", api_key) elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model: os.environ.setdefault("ZHIPUAI_API_KEY", api_key) + elif "groq" in default_model: + os.environ.setdefault("GROQ_API_KEY", api_key) + elif "moonshot" in default_model or "kimi" in default_model: + os.environ.setdefault("MOONSHOT_API_KEY", api_key) + os.environ.setdefault("MOONSHOT_API_BASE", api_base or "https://api.moonshot.cn/v1") if api_base: litellm.api_base = api_base @@ -86,23 +93,33 @@ class LiteLLMProvider(LLMProvider): model = f"openrouter/{model}" # For Zhipu/Z.ai, ensure prefix is present - # Handle cases like "glm-4.7-flash" -> "zhipu/glm-4.7-flash" + # Handle cases like "glm-4.7-flash" -> "zai/glm-4.7-flash" if ("glm" in model.lower() or "zhipu" in model.lower()) and not ( model.startswith("zhipu/") or model.startswith("zai/") or model.startswith("openrouter/") ): - model = f"zhipu/{model}" - + model = f"zai/{model}" + + # For Moonshot/Kimi, ensure moonshot/ prefix (before vLLM check) + if ("moonshot" in model.lower() or "kimi" in model.lower()) and not ( + model.startswith("moonshot/") or model.startswith("openrouter/") + ): + model = f"moonshot/{model}" + + # For Gemini, ensure gemini/ prefix if not already present + if "gemini" in model.lower() and not model.startswith("gemini/"): + model = f"gemini/{model}" + # For vLLM, use hosted_vllm/ prefix per LiteLLM docs # Convert openai/ prefix to hosted_vllm/ if user specified it if self.is_vllm: model = f"hosted_vllm/{model}" - # For Gemini, ensure gemini/ prefix if not already present - if "gemini" in model.lower() and not model.startswith("gemini/"): - model = f"gemini/{model}" - + # kimi-k2.5 only supports temperature=1.0 + if "kimi-k2.5" in model.lower(): + temperature = 1.0 + kwargs: dict[str, Any] = { "model": model, "messages": messages, diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py new file mode 100644 index 0000000..8ce909b --- /dev/null +++ b/nanobot/providers/transcription.py @@ -0,0 +1,65 @@ +"""Voice transcription provider using Groq.""" + +import os +from pathlib import Path +from typing import Any + +import httpx +from loguru import logger + + +class GroqTranscriptionProvider: + """ + Voice transcription provider using Groq's Whisper API. + + Groq offers extremely fast transcription with a generous free tier. + """ + + def __init__(self, api_key: str | None = None): + self.api_key = api_key or os.environ.get("GROQ_API_KEY") + self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions" + + async def transcribe(self, file_path: str | Path) -> str: + """ + Transcribe an audio file using Groq. + + Args: + file_path: Path to the audio file. + + Returns: + Transcribed text. + """ + if not self.api_key: + logger.warning("Groq API key not configured for transcription") + return "" + + path = Path(file_path) + if not path.exists(): + logger.error(f"Audio file not found: {file_path}") + return "" + + try: + async with httpx.AsyncClient() as client: + with open(path, "rb") as f: + files = { + "file": (path.name, f), + "model": (None, "whisper-large-v3"), + } + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + response = await client.post( + self.api_url, + headers=headers, + files=files, + timeout=60.0 + ) + + response.raise_for_status() + data = response.json() + return data.get("text", "") + + except Exception as e: + logger.error(f"Groq transcription error: {e}") + return "" diff --git a/nanobot/skills/cron/SKILL.md b/nanobot/skills/cron/SKILL.md new file mode 100644 index 0000000..c8beecb --- /dev/null +++ b/nanobot/skills/cron/SKILL.md @@ -0,0 +1,40 @@ +--- +name: cron +description: Schedule reminders and recurring tasks. +--- + +# Cron + +Use the `cron` tool to schedule reminders or recurring tasks. + +## Two Modes + +1. **Reminder** - message is sent directly to user +2. **Task** - message is a task description, agent executes and sends result + +## Examples + +Fixed reminder: +``` +cron(action="add", message="Time to take a break!", every_seconds=1200) +``` + +Dynamic task (agent executes each time): +``` +cron(action="add", message="Check HKUDS/nanobot GitHub stars and report", every_seconds=600) +``` + +List/remove: +``` +cron(action="list") +cron(action="remove", job_id="abc123") +``` + +## Time Expressions + +| User says | Parameters | +|-----------|------------| +| every 20 minutes | every_seconds: 1200 | +| every hour | every_seconds: 3600 | +| every day at 8am | cron_expr: "0 8 * * *" | +| weekdays at 5pm | cron_expr: "0 17 * * 1-5" | diff --git a/nanobot/skills/skill-creator/SKILL.md b/nanobot/skills/skill-creator/SKILL.md index 4680d5e..9b5eb6f 100644 --- a/nanobot/skills/skill-creator/SKILL.md +++ b/nanobot/skills/skill-creator/SKILL.md @@ -9,9 +9,9 @@ This skill provides guidance for creating effective skills. ## About Skills -Skills are modular, self-contained packages that extend Codex's capabilities by providing +Skills are modular, self-contained packages that extend the agent's capabilities by providing specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific -domains or tasksβ€”they transform Codex from a general-purpose agent into a specialized agent +domains or tasksβ€”they transform the agent from a general-purpose agent into a specialized agent equipped with procedural knowledge that no model can fully possess. ### What Skills Provide @@ -25,9 +25,9 @@ equipped with procedural knowledge that no model can fully possess. ### Concise is Key -The context window is a public good. Skills share the context window with everything else Codex needs: system prompt, conversation history, other Skills' metadata, and the actual user request. +The context window is a public good. Skills share the context window with everything else the agent needs: system prompt, conversation history, other Skills' metadata, and the actual user request. -**Default assumption: Codex is already very smart.** Only add context Codex doesn't already have. Challenge each piece of information: "Does Codex really need this explanation?" and "Does this paragraph justify its token cost?" +**Default assumption: the agent is already very smart.** Only add context the agent doesn't already have. Challenge each piece of information: "Does the agent really need this explanation?" and "Does this paragraph justify its token cost?" Prefer concise examples over verbose explanations. @@ -41,7 +41,7 @@ Match the level of specificity to the task's fragility and variability: **Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed. -Think of Codex as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom). +Think of the agent as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom). ### Anatomy of a Skill @@ -64,7 +64,7 @@ skill-name/ Every SKILL.md consists of: -- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that Codex reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used. +- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that the agent reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used. - **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all). #### Bundled Resources (optional) @@ -76,27 +76,27 @@ Executable code (Python/Bash/etc.) for tasks that require deterministic reliabil - **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed - **Example**: `scripts/rotate_pdf.py` for PDF rotation tasks - **Benefits**: Token efficient, deterministic, may be executed without loading into context -- **Note**: Scripts may still need to be read by Codex for patching or environment-specific adjustments +- **Note**: Scripts may still need to be read by the agent for patching or environment-specific adjustments ##### References (`references/`) -Documentation and reference material intended to be loaded as needed into context to inform Codex's process and thinking. +Documentation and reference material intended to be loaded as needed into context to inform the agent's process and thinking. -- **When to include**: For documentation that Codex should reference while working +- **When to include**: For documentation that the agent should reference while working - **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications - **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides -- **Benefits**: Keeps SKILL.md lean, loaded only when Codex determines it's needed +- **Benefits**: Keeps SKILL.md lean, loaded only when the agent determines it's needed - **Best practice**: If files are large (>10k words), include grep search patterns in SKILL.md - **Avoid duplication**: Information should live in either SKILL.md or references files, not both. Prefer references files for detailed information unless it's truly core to the skillβ€”this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files. ##### Assets (`assets/`) -Files not intended to be loaded into context, but rather used within the output Codex produces. +Files not intended to be loaded into context, but rather used within the output the agent produces. - **When to include**: When the skill needs files that will be used in the final output - **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography - **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified -- **Benefits**: Separates output resources from documentation, enables Codex to use files without loading them into context +- **Benefits**: Separates output resources from documentation, enables the agent to use files without loading them into context #### What to Not Include in a Skill @@ -116,7 +116,7 @@ Skills use a three-level loading system to manage context efficiently: 1. **Metadata (name + description)** - Always in context (~100 words) 2. **SKILL.md body** - When skill triggers (<5k words) -3. **Bundled resources** - As needed by Codex (Unlimited because scripts can be executed without reading into context window) +3. **Bundled resources** - As needed by the agent (Unlimited because scripts can be executed without reading into context window) #### Progressive Disclosure Patterns @@ -141,7 +141,7 @@ Extract text with pdfplumber: - **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns ``` -Codex loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. +the agent loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. **Pattern 2: Domain-specific organization** @@ -157,7 +157,7 @@ bigquery-skill/ └── marketing.md (campaigns, attribution) ``` -When a user asks about sales metrics, Codex only reads sales.md. +When a user asks about sales metrics, the agent only reads sales.md. Similarly, for skills supporting multiple frameworks or variants, organize by variant: @@ -170,7 +170,7 @@ cloud-deploy/ └── azure.md (Azure deployment patterns) ``` -When the user chooses AWS, Codex only reads aws.md. +When the user chooses AWS, the agent only reads aws.md. **Pattern 3: Conditional details** @@ -191,12 +191,12 @@ For simple edits, modify the XML directly. **For OOXML details**: See [OOXML.md](OOXML.md) ``` -Codex reads REDLINING.md or OOXML.md only when the user needs those features. +the agent reads REDLINING.md or OOXML.md only when the user needs those features. **Important guidelines:** - **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md. -- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so Codex can see the full scope when previewing. +- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so the agent can see the full scope when previewing. ## Skill Creation Process @@ -293,7 +293,7 @@ After initialization, customize the SKILL.md and add resources as needed. If you ### Step 4: Edit the Skill -When editing the (newly-generated or existing) skill, remember that the skill is being created for another instance of Codex to use. Include information that would be beneficial and non-obvious to Codex. Consider what procedural knowledge, domain-specific details, or reusable assets would help another Codex instance execute these tasks more effectively. +When editing the (newly-generated or existing) skill, remember that the skill is being created for another instance of the agent to use. Include information that would be beneficial and non-obvious to the agent. Consider what procedural knowledge, domain-specific details, or reusable assets would help another the agent instance execute these tasks more effectively. #### Learn Proven Design Patterns @@ -321,10 +321,10 @@ If you used `--examples`, delete any placeholder files that are not needed for t Write the YAML frontmatter with `name` and `description`: - `name`: The skill name -- `description`: This is the primary triggering mechanism for your skill, and helps Codex understand when to use the skill. +- `description`: This is the primary triggering mechanism for your skill, and helps the agent understand when to use the skill. - Include both what the Skill does and specific triggers/contexts for when to use it. - - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to Codex. - - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when Codex needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" + - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to the agent. + - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when the agent needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" Do not include any other fields in YAML frontmatter. diff --git a/pyproject.toml b/pyproject.toml index d081dd7..2a952a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "nanobot-ai" -version = "0.1.3.post3" +version = "0.1.3.post4" description = "A lightweight personal AI assistant framework" requires-python = ">=3.11" license = {text = "MIT"} @@ -29,6 +29,7 @@ dependencies = [ "rich>=13.0.0", "croniter>=2.0.0", "python-telegram-bot>=21.0", + "lark-oapi>=1.0.0", ] [project.optional-dependencies] diff --git a/tests/test_docker.sh b/tests/test_docker.sh new file mode 100644 index 0000000..1e55133 --- /dev/null +++ b/tests/test_docker.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")/.." || exit 1 + +IMAGE_NAME="nanobot-test" + +echo "=== Building Docker image ===" +docker build -t "$IMAGE_NAME" . + +echo "" +echo "=== Running 'nanobot onboard' ===" +docker run --name nanobot-test-run "$IMAGE_NAME" onboard + +echo "" +echo "=== Running 'nanobot status' ===" +STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \ + docker run --rm nanobot-test-onboarded status 2>&1) || true + +echo "$STATUS_OUTPUT" + +echo "" +echo "=== Validating output ===" +PASS=true + +check() { + if echo "$STATUS_OUTPUT" | grep -q "$1"; then + echo " PASS: found '$1'" + else + echo " FAIL: missing '$1'" + PASS=false + fi +} + +check "nanobot Status" +check "Config:" +check "Workspace:" +check "Model:" +check "OpenRouter API:" +check "Anthropic API:" +check "OpenAI API:" + +echo "" +if $PASS; then + echo "=== All checks passed ===" +else + echo "=== Some checks FAILED ===" + exit 1 +fi + +# Cleanup +echo "" +echo "=== Cleanup ===" +docker rm -f nanobot-test-run 2>/dev/null || true +docker rmi -f nanobot-test-onboarded 2>/dev/null || true +docker rmi -f "$IMAGE_NAME" 2>/dev/null || true +echo "Done." diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py new file mode 100644 index 0000000..f11c667 --- /dev/null +++ b/tests/test_tool_validation.py @@ -0,0 +1,88 @@ +from typing import Any + +from nanobot.agent.tools.base import Tool +from nanobot.agent.tools.registry import ToolRegistry + + +class SampleTool(Tool): + @property + def name(self) -> str: + return "sample" + + @property + def description(self) -> str: + return "sample tool" + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "query": {"type": "string", "minLength": 2}, + "count": {"type": "integer", "minimum": 1, "maximum": 10}, + "mode": {"type": "string", "enum": ["fast", "full"]}, + "meta": { + "type": "object", + "properties": { + "tag": {"type": "string"}, + "flags": { + "type": "array", + "items": {"type": "string"}, + }, + }, + "required": ["tag"], + }, + }, + "required": ["query", "count"], + } + + async def execute(self, **kwargs: Any) -> str: + return "ok" + + +def test_validate_params_missing_required() -> None: + tool = SampleTool() + errors = tool.validate_params({"query": "hi"}) + assert "missing required count" in "; ".join(errors) + + +def test_validate_params_type_and_range() -> None: + tool = SampleTool() + errors = tool.validate_params({"query": "hi", "count": 0}) + assert any("count must be >= 1" in e for e in errors) + + errors = tool.validate_params({"query": "hi", "count": "2"}) + assert any("count should be integer" in e for e in errors) + + +def test_validate_params_enum_and_min_length() -> None: + tool = SampleTool() + errors = tool.validate_params({"query": "h", "count": 2, "mode": "slow"}) + assert any("query must be at least 2 chars" in e for e in errors) + assert any("mode must be one of" in e for e in errors) + + +def test_validate_params_nested_object_and_array() -> None: + tool = SampleTool() + errors = tool.validate_params( + { + "query": "hi", + "count": 2, + "meta": {"flags": [1, "ok"]}, + } + ) + assert any("missing required meta.tag" in e for e in errors) + assert any("meta.flags[0] should be string" in e for e in errors) + + +def test_validate_params_ignores_unknown_fields() -> None: + tool = SampleTool() + errors = tool.validate_params({"query": "hi", "count": 2, "extra": "x"}) + assert errors == [] + + +async def test_registry_returns_validation_error() -> None: + reg = ToolRegistry() + reg.register(SampleTool()) + result = await reg.execute("sample", {"query": "hi"}) + assert "Invalid parameters" in result diff --git a/workspace/AGENTS.md b/workspace/AGENTS.md index a99a7b4..b4e5b5f 100644 --- a/workspace/AGENTS.md +++ b/workspace/AGENTS.md @@ -16,6 +16,7 @@ You have access to: - Shell commands (exec) - Web access (search, fetch) - Messaging (message) +- Background tasks (spawn) ## Memory diff --git a/workspace/TOOLS.md b/workspace/TOOLS.md index 9915561..0134a64 100644 --- a/workspace/TOOLS.md +++ b/workspace/TOOLS.md @@ -37,29 +37,31 @@ exec(command: str, working_dir: str = None) -> str ``` **Safety Notes:** -- Commands have a 60-second timeout +- Commands have a configurable timeout (default 60s) +- Dangerous commands are blocked (rm -rf, format, dd, shutdown, etc.) - Output is truncated at 10,000 characters -- Use with caution for destructive operations +- Optional `restrictToWorkspace` config to limit paths ## Web Access ### web_search -Search the web using DuckDuckGo. +Search the web using Brave Search API. ``` -web_search(query: str) -> str +web_search(query: str, count: int = 5) -> str ``` -Returns top 5 search results with titles, URLs, and snippets. +Returns search results with titles, URLs, and snippets. Requires `tools.web.search.apiKey` in config. ### web_fetch Fetch and extract main content from a URL. ``` -web_fetch(url: str) -> str +web_fetch(url: str, extractMode: str = "markdown", maxChars: int = 50000) -> str ``` **Notes:** -- Content is extracted using trafilatura -- Output is truncated at 8,000 characters +- Content is extracted using readability +- Supports markdown or plain text extraction +- Output is truncated at 50,000 characters by default ## Communication @@ -69,6 +71,16 @@ Send a message to the user (used internally). message(content: str, channel: str = None, chat_id: str = None) -> str ``` +## Background Tasks + +### spawn +Spawn a subagent to handle a task in the background. +``` +spawn(task: str, label: str = None) -> str +``` + +Use for complex or time-consuming tasks that can run independently. The subagent will complete the task and report back when done. + ## Scheduled Reminders (Cron) Use the `exec` tool to create scheduled reminders with `nanobot cron add`: