Compare commits
5 Commits
subagent-i
...
feat/quota
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ece660ae69 | ||
|
|
84268edf01 | ||
|
|
9136cca1ff | ||
|
|
6035b70ae5 | ||
| e4c300bcfd |
@@ -56,7 +56,7 @@ ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
COPY pyproject.toml README.md LICENSE /app/
|
||||
COPY nanobot/ /app/nanobot/
|
||||
RUN uv pip install --system --no-cache --reinstall /app
|
||||
RUN uv pip install --system --no-cache --reinstall /app psycopg2-binary
|
||||
|
||||
ENTRYPOINT ["nanobot"]
|
||||
CMD ["gateway"]
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -74,8 +75,10 @@ class AgentLoop:
|
||||
exec_config=self.exec_config,
|
||||
restrict_to_workspace=restrict_to_workspace,
|
||||
)
|
||||
|
||||
|
||||
self._running = False
|
||||
self._quota_cache: dict[str, Any] = {} # {model: str, cached_at: float}
|
||||
self._quota_cache_ttl: float = 300.0 # 5 minutes
|
||||
self._register_default_tools()
|
||||
|
||||
def _register_default_tools(self) -> None:
|
||||
@@ -143,7 +146,95 @@ class AgentLoop:
|
||||
"""Stop the agent loop."""
|
||||
self._running = False
|
||||
logger.info("Agent loop stopping")
|
||||
|
||||
|
||||
def _select_model_based_on_quota(self) -> str:
|
||||
"""Select Opus or Sonnet based on rolling weekly quota burn rate."""
|
||||
# Check cache
|
||||
now = time.time()
|
||||
if self._quota_cache and (now - self._quota_cache.get("cached_at", 0)) < self._quota_cache_ttl:
|
||||
return self._quota_cache["model"]
|
||||
|
||||
# Default models
|
||||
OPUS = "claude-opus-4-6"
|
||||
SONNET = "claude-sonnet-4-5"
|
||||
TOLERANCE = 1.17 # 17% overage triggers downgrade
|
||||
|
||||
# Read rate limits
|
||||
rate_limits_path = self.workspace / "memory" / "rate_limits.json"
|
||||
if not rate_limits_path.exists():
|
||||
logger.warning("rate_limits.json not found, defaulting to Sonnet")
|
||||
return SONNET
|
||||
|
||||
try:
|
||||
with open(rate_limits_path) as f:
|
||||
limits = json.load(f)
|
||||
|
||||
actual_usage = limits.get("weekly_all_models")
|
||||
weekly_reset = limits.get("weekly_reset")
|
||||
|
||||
if actual_usage is None or weekly_reset is None:
|
||||
logger.warning("Rate limit data incomplete, defaulting to Sonnet")
|
||||
return SONNET
|
||||
|
||||
# Calculate expected usage
|
||||
actual_pct = actual_usage * 100
|
||||
week_start = weekly_reset - (168 * 3600)
|
||||
hours_elapsed = max(0, min((now - week_start) / 3600, 168))
|
||||
expected_pct = (hours_elapsed / 168) * 100
|
||||
threshold = expected_pct * TOLERANCE
|
||||
|
||||
# Decision logic
|
||||
if actual_pct > threshold:
|
||||
model = SONNET
|
||||
logger.info(
|
||||
f"Quota: {actual_pct:.1f}% used, expected {expected_pct:.1f}%, "
|
||||
f"threshold {threshold:.1f}% → Sonnet"
|
||||
)
|
||||
else:
|
||||
model = OPUS
|
||||
logger.info(
|
||||
f"Quota: {actual_pct:.1f}% used, expected {expected_pct:.1f}%, "
|
||||
f"threshold {threshold:.1f}% → Opus"
|
||||
)
|
||||
|
||||
# Cache decision
|
||||
self._quota_cache = {"model": model, "cached_at": now}
|
||||
return model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking quota: {e}, defaulting to Sonnet")
|
||||
return SONNET
|
||||
|
||||
def _get_quota_status(self) -> str:
|
||||
"""Return human-readable quota status."""
|
||||
rate_limits_path = self.workspace / "memory" / "rate_limits.json"
|
||||
if not rate_limits_path.exists():
|
||||
return "⚠️ No quota data available yet."
|
||||
|
||||
try:
|
||||
with open(rate_limits_path) as f:
|
||||
limits = json.load(f)
|
||||
|
||||
actual_pct = limits.get("weekly_all_models", 0) * 100
|
||||
reset_ts = limits.get("weekly_reset", 0)
|
||||
now = time.time()
|
||||
|
||||
hours_until_reset = (reset_ts - now) / 3600
|
||||
week_start = reset_ts - (168 * 3600)
|
||||
hours_elapsed = max(0, (now - week_start) / 3600)
|
||||
expected_pct = (hours_elapsed / 168) * 100
|
||||
|
||||
model = self._select_model_based_on_quota()
|
||||
|
||||
return f"""📊 Quota Status:
|
||||
• Used: {actual_pct:.1f}% (expected {expected_pct:.1f}%)
|
||||
• Resets in: {hours_until_reset:.1f}h
|
||||
• Current model: {model}
|
||||
• Burn rate: {actual_pct / max(expected_pct, 0.01):.2f}x target"""
|
||||
|
||||
except Exception as e:
|
||||
return f"⚠️ Error reading quota: {e}"
|
||||
|
||||
async def _process_message(self, msg: InboundMessage, session_key: str | None = None) -> OutboundMessage | None:
|
||||
"""
|
||||
Process a single inbound message.
|
||||
@@ -177,8 +268,11 @@ class AgentLoop:
|
||||
content="🐈 New session started. Memory consolidated.")
|
||||
if cmd == "/help":
|
||||
return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
|
||||
content="🐈 nanobot commands:\n/new — Start a new conversation\n/help — Show available commands")
|
||||
|
||||
content="🐈 nanobot commands:\n/new — Start a new conversation\n/help — Show available commands\n/quota — Show quota status")
|
||||
if cmd == "/quota":
|
||||
status = self._get_quota_status()
|
||||
return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content=status)
|
||||
|
||||
# Consolidate memory before processing if session is too large
|
||||
if len(session.messages) > self.memory_window:
|
||||
await self._consolidate_memory(session)
|
||||
@@ -204,20 +298,23 @@ class AgentLoop:
|
||||
channel=msg.channel,
|
||||
chat_id=msg.chat_id,
|
||||
)
|
||||
|
||||
|
||||
# Select model based on quota
|
||||
selected_model = self._select_model_based_on_quota()
|
||||
|
||||
# Agent loop
|
||||
iteration = 0
|
||||
final_content = None
|
||||
tools_used: list[str] = []
|
||||
|
||||
|
||||
while iteration < self.max_iterations:
|
||||
iteration += 1
|
||||
|
||||
|
||||
# Call LLM
|
||||
response = await self.provider.chat(
|
||||
messages=messages,
|
||||
tools=self.tools.get_definitions(),
|
||||
model=self.model
|
||||
model=selected_model
|
||||
)
|
||||
|
||||
# Handle tool calls
|
||||
@@ -424,7 +521,9 @@ Respond with ONLY valid JSON, no markdown fences."""
|
||||
{"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
model=self.model,
|
||||
model="claude-haiku-4-5",
|
||||
thinking_budget=0,
|
||||
max_tokens=16384,
|
||||
)
|
||||
text = (response.content or "").strip()
|
||||
if text.startswith("```"):
|
||||
|
||||
@@ -121,7 +121,7 @@ class SubagentManager:
|
||||
]
|
||||
|
||||
# Run agent loop (limited iterations)
|
||||
max_iterations = 15
|
||||
max_iterations = 50
|
||||
iteration = 0
|
||||
final_result: str | None = None
|
||||
|
||||
|
||||
@@ -92,6 +92,7 @@ class TelegramChannel(BaseChannel):
|
||||
BotCommand("start", "Start the bot"),
|
||||
BotCommand("new", "Start a new conversation"),
|
||||
BotCommand("help", "Show available commands"),
|
||||
BotCommand("quota", "Show current quota status"),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
@@ -127,6 +128,7 @@ class TelegramChannel(BaseChannel):
|
||||
self._app.add_handler(CommandHandler("start", self._on_start))
|
||||
self._app.add_handler(CommandHandler("new", self._forward_command))
|
||||
self._app.add_handler(CommandHandler("help", self._forward_command))
|
||||
self._app.add_handler(CommandHandler("quota", self._forward_command))
|
||||
|
||||
# Add message handler for text, photos, voice, documents
|
||||
self._app.add_handler(
|
||||
|
||||
@@ -225,6 +225,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
thinking_budget_override: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Make request to Anthropic API."""
|
||||
client = await self._get_client()
|
||||
@@ -236,14 +237,15 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
}
|
||||
|
||||
# Extended thinking: temperature must be 1 when enabled
|
||||
if self.thinking_budget > 0:
|
||||
effective_thinking = thinking_budget_override if thinking_budget_override is not None else self.thinking_budget
|
||||
if effective_thinking > 0:
|
||||
payload["temperature"] = 1
|
||||
# max_tokens must exceed budget_tokens
|
||||
if max_tokens <= self.thinking_budget:
|
||||
payload["max_tokens"] = self.thinking_budget + 4096
|
||||
if max_tokens <= effective_thinking:
|
||||
payload["max_tokens"] = effective_thinking + 4096
|
||||
payload["thinking"] = {
|
||||
"type": "enabled",
|
||||
"budget_tokens": self.thinking_budget,
|
||||
"budget_tokens": effective_thinking,
|
||||
}
|
||||
else:
|
||||
payload["temperature"] = temperature
|
||||
@@ -279,6 +281,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Send chat completion request to Anthropic API."""
|
||||
model = model or self.default_model
|
||||
@@ -293,6 +296,9 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
system, prepared_messages = self._prepare_messages(messages)
|
||||
anthropic_tools = self._convert_tools_to_anthropic(tools)
|
||||
|
||||
# Per-call thinking override (None = use instance default)
|
||||
effective_thinking = self.thinking_budget if thinking_budget is None else thinking_budget
|
||||
|
||||
try:
|
||||
response = await self._make_request(
|
||||
messages=prepared_messages,
|
||||
@@ -301,6 +307,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
tools=anthropic_tools,
|
||||
thinking_budget_override=effective_thinking,
|
||||
)
|
||||
return self._parse_response(response)
|
||||
except Exception as e:
|
||||
|
||||
@@ -48,6 +48,7 @@ class LLMProvider(ABC):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request.
|
||||
|
||||
@@ -106,6 +106,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request via LiteLLM.
|
||||
|
||||
Reference in New Issue
Block a user