feat: dynamic Opus/Sonnet model switching based on rolling quota
All checks were successful
Build Nanobot OAuth / build (pull_request) Successful in 5m34s
Build Nanobot OAuth / cleanup (pull_request) Has been skipped

Implement intelligent model selection to manage 7-day Opus quota burn rate:

- Add _select_model_based_on_quota() method to AgentLoop
  - Reads rate limit data from memory/rate_limits.json
  - Calculates expected vs actual quota usage (100%/168h = 0.595% per hour)
  - If actual > expected × 1.17 (17% overage), downgrades to Sonnet
  - If actual ≤ expected, uses Opus
  - Caches decision for 5 minutes to minimize file I/O

- Add /quota slash command to display real-time quota status
  - Shows current usage vs expected usage
  - Shows hours until weekly reset
  - Shows selected model and burn rate multiplier

- Main agent now calls _select_model_based_on_quota() before each conversation
  - Heartbeat subagent unaffected (explicitly uses claude-sonnet-4-20250514)

This replaces the wrong approach from PR #5 which throttled heartbeat
frequency instead of switching the main agent's model.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
wylab
2026-02-14 23:51:19 +01:00
parent 84268edf01
commit ece660ae69
2 changed files with 107 additions and 8 deletions

View File

@@ -2,6 +2,7 @@
import asyncio
import json
import time
from pathlib import Path
from typing import Any
@@ -74,8 +75,10 @@ class AgentLoop:
exec_config=self.exec_config,
restrict_to_workspace=restrict_to_workspace,
)
self._running = False
self._quota_cache: dict[str, Any] = {} # {model: str, cached_at: float}
self._quota_cache_ttl: float = 300.0 # 5 minutes
self._register_default_tools()
def _register_default_tools(self) -> None:
@@ -143,7 +146,95 @@ class AgentLoop:
"""Stop the agent loop."""
self._running = False
logger.info("Agent loop stopping")
def _select_model_based_on_quota(self) -> str:
"""Select Opus or Sonnet based on rolling weekly quota burn rate."""
# Check cache
now = time.time()
if self._quota_cache and (now - self._quota_cache.get("cached_at", 0)) < self._quota_cache_ttl:
return self._quota_cache["model"]
# Default models
OPUS = "claude-opus-4-6"
SONNET = "claude-sonnet-4-5"
TOLERANCE = 1.17 # 17% overage triggers downgrade
# Read rate limits
rate_limits_path = self.workspace / "memory" / "rate_limits.json"
if not rate_limits_path.exists():
logger.warning("rate_limits.json not found, defaulting to Sonnet")
return SONNET
try:
with open(rate_limits_path) as f:
limits = json.load(f)
actual_usage = limits.get("weekly_all_models")
weekly_reset = limits.get("weekly_reset")
if actual_usage is None or weekly_reset is None:
logger.warning("Rate limit data incomplete, defaulting to Sonnet")
return SONNET
# Calculate expected usage
actual_pct = actual_usage * 100
week_start = weekly_reset - (168 * 3600)
hours_elapsed = max(0, min((now - week_start) / 3600, 168))
expected_pct = (hours_elapsed / 168) * 100
threshold = expected_pct * TOLERANCE
# Decision logic
if actual_pct > threshold:
model = SONNET
logger.info(
f"Quota: {actual_pct:.1f}% used, expected {expected_pct:.1f}%, "
f"threshold {threshold:.1f}% → Sonnet"
)
else:
model = OPUS
logger.info(
f"Quota: {actual_pct:.1f}% used, expected {expected_pct:.1f}%, "
f"threshold {threshold:.1f}% → Opus"
)
# Cache decision
self._quota_cache = {"model": model, "cached_at": now}
return model
except Exception as e:
logger.error(f"Error checking quota: {e}, defaulting to Sonnet")
return SONNET
def _get_quota_status(self) -> str:
"""Return human-readable quota status."""
rate_limits_path = self.workspace / "memory" / "rate_limits.json"
if not rate_limits_path.exists():
return "⚠️ No quota data available yet."
try:
with open(rate_limits_path) as f:
limits = json.load(f)
actual_pct = limits.get("weekly_all_models", 0) * 100
reset_ts = limits.get("weekly_reset", 0)
now = time.time()
hours_until_reset = (reset_ts - now) / 3600
week_start = reset_ts - (168 * 3600)
hours_elapsed = max(0, (now - week_start) / 3600)
expected_pct = (hours_elapsed / 168) * 100
model = self._select_model_based_on_quota()
return f"""📊 Quota Status:
• Used: {actual_pct:.1f}% (expected {expected_pct:.1f}%)
• Resets in: {hours_until_reset:.1f}h
• Current model: {model}
• Burn rate: {actual_pct / max(expected_pct, 0.01):.2f}x target"""
except Exception as e:
return f"⚠️ Error reading quota: {e}"
async def _process_message(self, msg: InboundMessage, session_key: str | None = None) -> OutboundMessage | None:
"""
Process a single inbound message.
@@ -177,8 +268,11 @@ class AgentLoop:
content="🐈 New session started. Memory consolidated.")
if cmd == "/help":
return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
content="🐈 nanobot commands:\n/new — Start a new conversation\n/help — Show available commands")
content="🐈 nanobot commands:\n/new — Start a new conversation\n/help — Show available commands\n/quota — Show quota status")
if cmd == "/quota":
status = self._get_quota_status()
return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id, content=status)
# Consolidate memory before processing if session is too large
if len(session.messages) > self.memory_window:
await self._consolidate_memory(session)
@@ -204,20 +298,23 @@ class AgentLoop:
channel=msg.channel,
chat_id=msg.chat_id,
)
# Select model based on quota
selected_model = self._select_model_based_on_quota()
# Agent loop
iteration = 0
final_content = None
tools_used: list[str] = []
while iteration < self.max_iterations:
iteration += 1
# Call LLM
response = await self.provider.chat(
messages=messages,
tools=self.tools.get_definitions(),
model=self.model
model=selected_model
)
# Handle tool calls

View File

@@ -92,6 +92,7 @@ class TelegramChannel(BaseChannel):
BotCommand("start", "Start the bot"),
BotCommand("new", "Start a new conversation"),
BotCommand("help", "Show available commands"),
BotCommand("quota", "Show current quota status"),
]
def __init__(
@@ -127,6 +128,7 @@ class TelegramChannel(BaseChannel):
self._app.add_handler(CommandHandler("start", self._on_start))
self._app.add_handler(CommandHandler("new", self._forward_command))
self._app.add_handler(CommandHandler("help", self._forward_command))
self._app.add_handler(CommandHandler("quota", self._forward_command))
# Add message handler for text, photos, voice, documents
self._app.add_handler(