diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 5f230b7..6cecd46 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -424,7 +424,8 @@ Respond with ONLY valid JSON, no markdown fences."""
                     {"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
                     {"role": "user", "content": prompt},
                 ],
-                model=self.model,
+                model="claude-haiku-4-5",
+                thinking_budget=0,
             )
             text = (response.content or "").strip()
             if text.startswith("```"):
diff --git a/nanobot/providers/anthropic_oauth.py b/nanobot/providers/anthropic_oauth.py
index c14eb03..8023d32 100644
--- a/nanobot/providers/anthropic_oauth.py
+++ b/nanobot/providers/anthropic_oauth.py
@@ -225,6 +225,7 @@ class AnthropicOAuthProvider(LLMProvider):
         max_tokens: int = 4096,
         temperature: float = 0.7,
         tools: list[dict[str, Any]] | None = None,
+        thinking_budget_override: int | None = None,
     ) -> dict[str, Any]:
         """Make request to Anthropic API."""
         client = await self._get_client()
@@ -236,14 +237,15 @@ class AnthropicOAuthProvider(LLMProvider):
         }
 
         # Extended thinking: temperature must be 1 when enabled
-        if self.thinking_budget > 0:
+        effective_thinking = thinking_budget_override if thinking_budget_override is not None else self.thinking_budget
+        if effective_thinking > 0:
             payload["temperature"] = 1
             # max_tokens must exceed budget_tokens
-            if max_tokens <= self.thinking_budget:
-                payload["max_tokens"] = self.thinking_budget + 4096
+            if max_tokens <= effective_thinking:
+                payload["max_tokens"] = effective_thinking + 4096
             payload["thinking"] = {
                 "type": "enabled",
-                "budget_tokens": self.thinking_budget,
+                "budget_tokens": effective_thinking,
             }
         else:
             payload["temperature"] = temperature
@@ -279,6 +281,7 @@ class AnthropicOAuthProvider(LLMProvider):
         model: str | None = None,
         max_tokens: int = 4096,
         temperature: float = 0.7,
+        thinking_budget: int | None = None,
     ) -> LLMResponse:
         """Send chat completion request to Anthropic API."""
         model = model or self.default_model
@@ -293,6 +296,9 @@ class AnthropicOAuthProvider(LLMProvider):
         system, prepared_messages = self._prepare_messages(messages)
         anthropic_tools = self._convert_tools_to_anthropic(tools)
 
+        # Per-call thinking override (None = use instance default)
+        effective_thinking = self.thinking_budget if thinking_budget is None else thinking_budget
+
         try:
             response = await self._make_request(
                 messages=prepared_messages,
@@ -301,6 +307,7 @@ class AnthropicOAuthProvider(LLMProvider):
                 max_tokens=max_tokens,
                 temperature=temperature,
                 tools=anthropic_tools,
+                thinking_budget_override=effective_thinking,
             )
             return self._parse_response(response)
         except Exception as e:
diff --git a/nanobot/providers/base.py b/nanobot/providers/base.py
index 5085292..b0184e9 100644
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@@ -48,6 +48,7 @@ class LLMProvider(ABC):
         model: str | None = None,
         max_tokens: int = 4096,
         temperature: float = 0.7,
+        thinking_budget: int | None = None,
     ) -> LLMResponse:
         """
         Send a chat completion request.
diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py
index 7865139..ca001b0 100644
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@@ -106,6 +106,7 @@ class LiteLLMProvider(LLMProvider):
         model: str | None = None,
         max_tokens: int = 4096,
         temperature: float = 0.7,
+        thinking_budget: int | None = None,
     ) -> LLMResponse:
         """
         Send a chat completion request via LiteLLM.