diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 5f230b7..6cecd46 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -424,7 +424,8 @@ Respond with ONLY valid JSON, no markdown fences.""" {"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."}, {"role": "user", "content": prompt}, ], - model=self.model, + model="claude-haiku-4-5", + thinking_budget=0, ) text = (response.content or "").strip() if text.startswith("```"): diff --git a/nanobot/providers/anthropic_oauth.py b/nanobot/providers/anthropic_oauth.py index c14eb03..8023d32 100644 --- a/nanobot/providers/anthropic_oauth.py +++ b/nanobot/providers/anthropic_oauth.py @@ -225,6 +225,7 @@ class AnthropicOAuthProvider(LLMProvider): max_tokens: int = 4096, temperature: float = 0.7, tools: list[dict[str, Any]] | None = None, + thinking_budget_override: int | None = None, ) -> dict[str, Any]: """Make request to Anthropic API.""" client = await self._get_client() @@ -236,14 +237,15 @@ class AnthropicOAuthProvider(LLMProvider): } # Extended thinking: temperature must be 1 when enabled - if self.thinking_budget > 0: + effective_thinking = thinking_budget_override if thinking_budget_override is not None else self.thinking_budget + if effective_thinking > 0: payload["temperature"] = 1 # max_tokens must exceed budget_tokens - if max_tokens <= self.thinking_budget: - payload["max_tokens"] = self.thinking_budget + 4096 + if max_tokens <= effective_thinking: + payload["max_tokens"] = effective_thinking + 4096 payload["thinking"] = { "type": "enabled", - "budget_tokens": self.thinking_budget, + "budget_tokens": effective_thinking, } else: payload["temperature"] = temperature @@ -279,6 +281,7 @@ class AnthropicOAuthProvider(LLMProvider): model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, + thinking_budget: int | None = None, ) -> LLMResponse: """Send chat completion request to Anthropic API.""" model = model or self.default_model @@ -293,6 +296,9 @@ class AnthropicOAuthProvider(LLMProvider): system, prepared_messages = self._prepare_messages(messages) anthropic_tools = self._convert_tools_to_anthropic(tools) + # Per-call thinking override (None = use instance default) + effective_thinking = self.thinking_budget if thinking_budget is None else thinking_budget + try: response = await self._make_request( messages=prepared_messages, @@ -301,6 +307,7 @@ class AnthropicOAuthProvider(LLMProvider): max_tokens=max_tokens, temperature=temperature, tools=anthropic_tools, + thinking_budget_override=effective_thinking, ) return self._parse_response(response) except Exception as e: diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py index 5085292..b0184e9 100644 --- a/nanobot/providers/base.py +++ b/nanobot/providers/base.py @@ -48,6 +48,7 @@ class LLMProvider(ABC): model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, + thinking_budget: int | None = None, ) -> LLMResponse: """ Send a chat completion request. diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 7865139..ca001b0 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -106,6 +106,7 @@ class LiteLLMProvider(LLMProvider): model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7, + thinking_budget: int | None = None, ) -> LLMResponse: """ Send a chat completion request via LiteLLM.