Add extended thinking support for Anthropic API

Adds configurable thinking_budget in agent defaults. When >0, sends the thinking parameter to the API with the specified token budget. Handles API constraints: forces temperature=1, auto-bumps max_tokens if it's below the thinking budget, preserves thinking blocks in message history for multi-turn conversations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:38:58 +01:00
parent c5ab4098ca
commit 9a131cb0ed
4 changed files with 41 additions and 2 deletions
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -284,6 +284,7 @@ def _make_provider(config):
        api_base=config.get_api_base(),
        extra_headers=p.extra_headers if p else None,
        provider_name=config.get_provider_name(),
+        thinking_budget=config.agents.defaults.thinking_budget,
    )


--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -163,6 +163,7 @@ class AgentDefaults(BaseModel):
    temperature: float = 0.7
    max_tool_iterations: int = 20
    memory_window: int = 50
+    thinking_budget: int = 0  # 0 = disabled; >0 = token budget for extended thinking


 class AgentsConfig(BaseModel):
--- a/nanobot/providers/init.py
+++ b/nanobot/providers/init.py
@@ -21,6 +21,7 @@ def create_provider(
    api_base: str | None = None,
    extra_headers: dict[str, str] | None = None,
    provider_name: str | None = None,
+    thinking_budget: int = 0,
 ) -> LLMProvider:
    """Factory function to create appropriate provider.

@@ -32,6 +33,7 @@ def create_provider(
            oauth_token=api_key,
            default_model=model,
            api_base=api_base,
+            thinking_budget=thinking_budget,
        )

    return LiteLLMProvider(
--- a/nanobot/providers/anthropic_oauth.py
+++ b/nanobot/providers/anthropic_oauth.py
@@ -28,10 +28,12 @@ class AnthropicOAuthProvider(LLMProvider):
        oauth_token: str,
        default_model: str = "claude-opus-4-5",
        api_base: str | None = None,
+        thinking_budget: int = 0,
    ):
        super().__init__(api_key=None, api_base=api_base)
        self.oauth_token = oauth_token
        self.default_model = default_model
+        self.thinking_budget = thinking_budget
        self._client: httpx.AsyncClient | None = None

    def _get_headers(self) -> dict[str, str]:
@@ -93,6 +95,12 @@ class AnthropicOAuthProvider(LLMProvider):
            if role == "assistant" and msg.get("tool_calls"):
                # Convert OpenAI tool_calls to Anthropic content blocks
                content_blocks: list[dict[str, Any]] = []
+                # Preserve thinking block if present
+                if msg.get("reasoning_content"):
+                    content_blocks.append({
+                        "type": "thinking",
+                        "thinking": msg["reasoning_content"],
+                    })
                text = msg.get("content")
                if text:
                    content_blocks.append({"type": "text", "text": text})
@@ -113,6 +121,17 @@ class AnthropicOAuthProvider(LLMProvider):
                converted.append({"role": "assistant", "content": content_blocks})
                continue

+            if role == "assistant" and msg.get("reasoning_content"):
+                # Plain assistant message with thinking (no tool calls)
+                content_blocks = [
+                    {"type": "thinking", "thinking": msg["reasoning_content"]},
+                ]
+                text = msg.get("content")
+                if text:
+                    content_blocks.append({"type": "text", "text": text})
+                converted.append({"role": "assistant", "content": content_blocks})
+                continue
+
            if role == "tool":
                # Convert tool result to Anthropic user message with tool_result block
                tool_result_block = {
@@ -186,9 +205,21 @@ class AnthropicOAuthProvider(LLMProvider):
            "model": model,
            "messages": messages,
            "max_tokens": max_tokens,
-            "temperature": temperature,
        }

+        # Extended thinking: temperature must be 1 when enabled
+        if self.thinking_budget > 0:
+            payload["temperature"] = 1
+            # max_tokens must exceed budget_tokens
+            if max_tokens <= self.thinking_budget:
+                payload["max_tokens"] = self.thinking_budget + 4096
+            payload["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": self.thinking_budget,
+            }
+        else:
+            payload["temperature"] = temperature
+
        if system:
            payload["system"] = system

@@ -249,10 +280,13 @@ class AnthropicOAuthProvider(LLMProvider):
        content_blocks = response.get("content", [])

        text_content = ""
+        thinking_content = ""
        tool_calls = []

        for block in content_blocks:
-            if block.get("type") == "text":
+            if block.get("type") == "thinking":
+                thinking_content += block.get("thinking", "")
+            elif block.get("type") == "text":
                text_content += block.get("text", "")
            elif block.get("type") == "tool_use":
                tool_calls.append(ToolCallRequest(
@@ -277,6 +311,7 @@ class AnthropicOAuthProvider(LLMProvider):
            tool_calls=tool_calls,
            finish_reason=response.get("stop_reason", "end_turn"),
            usage=usage,
+            reasoning_content=thinking_content or None,
        )

    def get_default_model(self) -> str: