4 Commits

Author SHA1 Message Date
wylab
84268edf01 Fix memory consolidation truncation: set max_tokens=16384
All checks were successful
Build Nanobot OAuth / build (push) Successful in 5m40s
Build Nanobot OAuth / cleanup (push) Successful in 2s
Consolidation was failing because max_tokens defaulted to 4096,
causing Haiku's response to be truncated mid-JSON (finish_reason=max_tokens).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 18:45:35 +01:00
wylab
9136cca1ff Fix memory consolidation timeout: use Haiku without thinking
All checks were successful
Build Nanobot OAuth / build (push) Successful in 5m51s
Build Nanobot OAuth / cleanup (push) Successful in 3s
Root cause: consolidation was calling Opus 4.6 with 10k thinking budget
on 50-80 message prompts. The 300s httpx timeout killed every request
(all failures were exactly 5 minutes after start). Consolidation is just
summarization — Haiku with no thinking handles it in seconds.

Also adds per-call thinking_budget override to the provider interface
so callers can disable thinking for lightweight tasks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 17:30:48 +01:00
wylab
6035b70ae5 Add psycopg2-binary to Docker image for PostgreSQL access
All checks were successful
Build Nanobot OAuth / build (push) Successful in 5m29s
Build Nanobot OAuth / cleanup (push) Successful in 1s
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 13:25:02 +01:00
e4c300bcfd Increase subagent max_iterations from 15 to 50 (#3)
All checks were successful
Build Nanobot OAuth / build (push) Successful in 47s
Build Nanobot OAuth / cleanup (push) Successful in 1s
Co-authored-by: nanobot <nanobot@wylab.me>
Co-committed-by: nanobot <nanobot@wylab.me>
2026-02-14 11:40:50 +01:00
6 changed files with 18 additions and 7 deletions

View File

@@ -56,7 +56,7 @@ ENV PATH="/root/.local/bin:${PATH}"
COPY pyproject.toml README.md LICENSE /app/
COPY nanobot/ /app/nanobot/
RUN uv pip install --system --no-cache --reinstall /app
RUN uv pip install --system --no-cache --reinstall /app psycopg2-binary
ENTRYPOINT ["nanobot"]
CMD ["gateway"]

View File

@@ -424,7 +424,9 @@ Respond with ONLY valid JSON, no markdown fences."""
{"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
{"role": "user", "content": prompt},
],
model=self.model,
model="claude-haiku-4-5",
thinking_budget=0,
max_tokens=16384,
)
text = (response.content or "").strip()
if text.startswith("```"):

View File

@@ -121,7 +121,7 @@ class SubagentManager:
]
# Run agent loop (limited iterations)
max_iterations = 15
max_iterations = 50
iteration = 0
final_result: str | None = None

View File

@@ -225,6 +225,7 @@ class AnthropicOAuthProvider(LLMProvider):
max_tokens: int = 4096,
temperature: float = 0.7,
tools: list[dict[str, Any]] | None = None,
thinking_budget_override: int | None = None,
) -> dict[str, Any]:
"""Make request to Anthropic API."""
client = await self._get_client()
@@ -236,14 +237,15 @@ class AnthropicOAuthProvider(LLMProvider):
}
# Extended thinking: temperature must be 1 when enabled
if self.thinking_budget > 0:
effective_thinking = thinking_budget_override if thinking_budget_override is not None else self.thinking_budget
if effective_thinking > 0:
payload["temperature"] = 1
# max_tokens must exceed budget_tokens
if max_tokens <= self.thinking_budget:
payload["max_tokens"] = self.thinking_budget + 4096
if max_tokens <= effective_thinking:
payload["max_tokens"] = effective_thinking + 4096
payload["thinking"] = {
"type": "enabled",
"budget_tokens": self.thinking_budget,
"budget_tokens": effective_thinking,
}
else:
payload["temperature"] = temperature
@@ -279,6 +281,7 @@ class AnthropicOAuthProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_budget: int | None = None,
) -> LLMResponse:
"""Send chat completion request to Anthropic API."""
model = model or self.default_model
@@ -293,6 +296,9 @@ class AnthropicOAuthProvider(LLMProvider):
system, prepared_messages = self._prepare_messages(messages)
anthropic_tools = self._convert_tools_to_anthropic(tools)
# Per-call thinking override (None = use instance default)
effective_thinking = self.thinking_budget if thinking_budget is None else thinking_budget
try:
response = await self._make_request(
messages=prepared_messages,
@@ -301,6 +307,7 @@ class AnthropicOAuthProvider(LLMProvider):
max_tokens=max_tokens,
temperature=temperature,
tools=anthropic_tools,
thinking_budget_override=effective_thinking,
)
return self._parse_response(response)
except Exception as e:

View File

@@ -48,6 +48,7 @@ class LLMProvider(ABC):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_budget: int | None = None,
) -> LLMResponse:
"""
Send a chat completion request.

View File

@@ -106,6 +106,7 @@ class LiteLLMProvider(LLMProvider):
model: str | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
thinking_budget: int | None = None,
) -> LLMResponse:
"""
Send a chat completion request via LiteLLM.