Compare commits
5 Commits
subagent-i
...
feat/rate-
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d940cb4f2 | |||
|
|
84268edf01 | ||
|
|
9136cca1ff | ||
|
|
6035b70ae5 | ||
| e4c300bcfd |
@@ -56,7 +56,7 @@ ENV PATH="/root/.local/bin:${PATH}"
|
||||
|
||||
COPY pyproject.toml README.md LICENSE /app/
|
||||
COPY nanobot/ /app/nanobot/
|
||||
RUN uv pip install --system --no-cache --reinstall /app
|
||||
RUN uv pip install --system --no-cache --reinstall /app psycopg2-binary
|
||||
|
||||
ENTRYPOINT ["nanobot"]
|
||||
CMD ["gateway"]
|
||||
|
||||
@@ -424,7 +424,9 @@ Respond with ONLY valid JSON, no markdown fences."""
|
||||
{"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
model=self.model,
|
||||
model="claude-haiku-4-5",
|
||||
thinking_budget=0,
|
||||
max_tokens=16384,
|
||||
)
|
||||
text = (response.content or "").strip()
|
||||
if text.startswith("```"):
|
||||
|
||||
@@ -121,7 +121,7 @@ class SubagentManager:
|
||||
]
|
||||
|
||||
# Run agent loop (limited iterations)
|
||||
max_iterations = 15
|
||||
max_iterations = 50
|
||||
iteration = 0
|
||||
final_result: str | None = None
|
||||
|
||||
|
||||
@@ -225,6 +225,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
thinking_budget_override: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Make request to Anthropic API."""
|
||||
client = await self._get_client()
|
||||
@@ -236,14 +237,15 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
}
|
||||
|
||||
# Extended thinking: temperature must be 1 when enabled
|
||||
if self.thinking_budget > 0:
|
||||
effective_thinking = thinking_budget_override if thinking_budget_override is not None else self.thinking_budget
|
||||
if effective_thinking > 0:
|
||||
payload["temperature"] = 1
|
||||
# max_tokens must exceed budget_tokens
|
||||
if max_tokens <= self.thinking_budget:
|
||||
payload["max_tokens"] = self.thinking_budget + 4096
|
||||
if max_tokens <= effective_thinking:
|
||||
payload["max_tokens"] = effective_thinking + 4096
|
||||
payload["thinking"] = {
|
||||
"type": "enabled",
|
||||
"budget_tokens": self.thinking_budget,
|
||||
"budget_tokens": effective_thinking,
|
||||
}
|
||||
else:
|
||||
payload["temperature"] = temperature
|
||||
@@ -266,6 +268,43 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
json=payload,
|
||||
)
|
||||
|
||||
# Dump rate limit headers for analysis
|
||||
try:
|
||||
import datetime, os
|
||||
header_dump = {
|
||||
"timestamp": datetime.datetime.utcnow().isoformat(),
|
||||
"status_code": response.status_code,
|
||||
"model": payload.get("model"),
|
||||
"headers": dict(response.headers),
|
||||
}
|
||||
dump_path = "/root/.nanobot/workspace/api_headers.jsonl"
|
||||
with open(dump_path, "a") as f:
|
||||
f.write(json.dumps(header_dump) + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Capture rate limit state for heartbeat throttling
|
||||
try:
|
||||
import datetime, os
|
||||
headers = response.headers
|
||||
rate_limit_state = {
|
||||
"updated_at": datetime.datetime.utcnow().isoformat(),
|
||||
"model": payload.get("model"),
|
||||
"weekly_all_models": float(headers.get("anthropic-ratelimit-unified-7d-utilization", 0)) if headers.get("anthropic-ratelimit-unified-7d-utilization") else None,
|
||||
"weekly_sonnet": float(headers.get("anthropic-ratelimit-unified-7d_sonnet-utilization", 0)) if headers.get("anthropic-ratelimit-unified-7d_sonnet-utilization") else None,
|
||||
"session_5h": float(headers.get("anthropic-ratelimit-unified-5h-utilization", 0)) if headers.get("anthropic-ratelimit-unified-5h-utilization") else None,
|
||||
"weekly_reset": int(headers.get("anthropic-ratelimit-unified-7d-reset", 0)) if headers.get("anthropic-ratelimit-unified-7d-reset") else None,
|
||||
"session_reset": int(headers.get("anthropic-ratelimit-unified-5h-reset", 0)) if headers.get("anthropic-ratelimit-unified-5h-reset") else None,
|
||||
"binding_limit": headers.get("anthropic-ratelimit-unified-representative-claim"),
|
||||
"sonnet_fallback": headers.get("anthropic-ratelimit-unified-fallback"),
|
||||
}
|
||||
state_path = "/root/.nanobot/workspace/memory/rate_limits.json"
|
||||
os.makedirs(os.path.dirname(state_path), exist_ok=True)
|
||||
with open(state_path, "w") as f:
|
||||
json.dump(rate_limit_state, f, indent=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
raise Exception(f"Anthropic API error {response.status_code}: {error_text}")
|
||||
@@ -279,6 +318,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Send chat completion request to Anthropic API."""
|
||||
model = model or self.default_model
|
||||
@@ -293,6 +333,9 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
system, prepared_messages = self._prepare_messages(messages)
|
||||
anthropic_tools = self._convert_tools_to_anthropic(tools)
|
||||
|
||||
# Per-call thinking override (None = use instance default)
|
||||
effective_thinking = self.thinking_budget if thinking_budget is None else thinking_budget
|
||||
|
||||
try:
|
||||
response = await self._make_request(
|
||||
messages=prepared_messages,
|
||||
@@ -301,6 +344,7 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
tools=anthropic_tools,
|
||||
thinking_budget_override=effective_thinking,
|
||||
)
|
||||
return self._parse_response(response)
|
||||
except Exception as e:
|
||||
|
||||
@@ -48,6 +48,7 @@ class LLMProvider(ABC):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request.
|
||||
|
||||
@@ -106,6 +106,7 @@ class LiteLLMProvider(LLMProvider):
|
||||
model: str | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
thinking_budget: int | None = None,
|
||||
) -> LLMResponse:
|
||||
"""
|
||||
Send a chat completion request via LiteLLM.
|
||||
|
||||
Reference in New Issue
Block a user