fix(oauth): restore identity block required by Anthropic API
Anthropic now requires OAuth requests to include an approved identity string as a separate first content block in the system prompt array. Without it, Sonnet/Opus models return 400 invalid_request_error. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ import httpx
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
from nanobot.providers.oauth_utils import get_auth_headers
|
||||
from nanobot.providers.oauth_utils import get_auth_headers, get_claude_code_system_prefix
|
||||
|
||||
|
||||
class AnthropicOAuthProvider(LLMProvider):
|
||||
@@ -377,7 +377,10 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
payload["temperature"] = temperature
|
||||
|
||||
if system:
|
||||
payload["system"] = [{"type": "text", "text": system, "cache_control": {"type": "ephemeral", "ttl": "1h"}}]
|
||||
payload["system"] = [
|
||||
{"type": "text", "text": get_claude_code_system_prefix()},
|
||||
{"type": "text", "text": system, "cache_control": {"type": "ephemeral", "ttl": "1h"}},
|
||||
]
|
||||
|
||||
if tools:
|
||||
cached_tools = list(tools)
|
||||
@@ -424,70 +427,109 @@ class AnthropicOAuthProvider(LLMProvider):
|
||||
|
||||
import asyncio
|
||||
import time as _time
|
||||
_t0 = _time.monotonic()
|
||||
try:
|
||||
response = await client.post(
|
||||
self._get_api_url(),
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
except httpx.ConnectTimeout:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"ConnectTimeout after {elapsed:.1f}s — running diagnostics")
|
||||
await self._diagnose_connectivity()
|
||||
await self._reset_client()
|
||||
raise
|
||||
except httpx.PoolTimeout:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"PoolTimeout after {elapsed:.1f}s — resetting client")
|
||||
await self._reset_client()
|
||||
raise
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"{type(e).__name__} after {elapsed:.1f}s")
|
||||
raise
|
||||
elapsed = _time.monotonic() - _t0
|
||||
if elapsed > 30:
|
||||
logger.warning(f"Anthropic API slow response: {elapsed:.1f}s")
|
||||
|
||||
# Dump rate limit headers for analysis
|
||||
try:
|
||||
import datetime
|
||||
import os
|
||||
header_dump = {
|
||||
"timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
|
||||
"status_code": response.status_code,
|
||||
"model": payload.get("model"),
|
||||
"headers": dict(response.headers),
|
||||
}
|
||||
dump_path = "/root/.nanobot/workspace/api_headers.jsonl"
|
||||
with open(dump_path, "a") as f:
|
||||
f.write(json.dumps(header_dump) + "\n")
|
||||
# Capture rate limit state for quota-based model switching
|
||||
hdrs = response.headers
|
||||
rate_limit_state = {
|
||||
"updated_at": datetime.datetime.utcnow().isoformat(),
|
||||
"model": payload.get("model"),
|
||||
"weekly_all_models": float(hdrs["anthropic-ratelimit-unified-7d-utilization"]) if hdrs.get("anthropic-ratelimit-unified-7d-utilization") else None,
|
||||
"weekly_sonnet": float(hdrs["anthropic-ratelimit-unified-7d_sonnet-utilization"]) if hdrs.get("anthropic-ratelimit-unified-7d_sonnet-utilization") else None,
|
||||
"session_5h": float(hdrs["anthropic-ratelimit-unified-5h-utilization"]) if hdrs.get("anthropic-ratelimit-unified-5h-utilization") else None,
|
||||
"weekly_reset": int(hdrs["anthropic-ratelimit-unified-7d-reset"]) if hdrs.get("anthropic-ratelimit-unified-7d-reset") else None,
|
||||
"session_reset": int(hdrs["anthropic-ratelimit-unified-5h-reset"]) if hdrs.get("anthropic-ratelimit-unified-5h-reset") else None,
|
||||
"binding_limit": hdrs.get("anthropic-ratelimit-unified-representative-claim"),
|
||||
"sonnet_fallback": hdrs.get("anthropic-ratelimit-unified-fallback"),
|
||||
}
|
||||
state_path = "/root/.nanobot/workspace/memory/rate_limits.json"
|
||||
os.makedirs(os.path.dirname(state_path), exist_ok=True)
|
||||
with open(state_path, "w") as f:
|
||||
json.dump(rate_limit_state, f, indent=2)
|
||||
except Exception as e:
|
||||
logger.warning("Rate limit header capture failed: {}", e)
|
||||
max_retries = 3
|
||||
base_delay = 2.0 # seconds
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
raise Exception(f"Anthropic API error {response.status_code}: {error_text}")
|
||||
for attempt in range(max_retries + 1):
|
||||
_t0 = _time.monotonic()
|
||||
try:
|
||||
response = await client.post(
|
||||
self._get_api_url(),
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
except httpx.ConnectTimeout:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"ConnectTimeout after {elapsed:.1f}s (attempt {attempt+1}/{max_retries+1})")
|
||||
if attempt == 0:
|
||||
await self._diagnose_connectivity()
|
||||
await self._reset_client()
|
||||
if attempt < max_retries:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
logger.info(f"Retrying in {delay:.1f}s...")
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
raise
|
||||
except httpx.PoolTimeout:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"PoolTimeout after {elapsed:.1f}s (attempt {attempt+1}/{max_retries+1})")
|
||||
await self._reset_client()
|
||||
if attempt < max_retries:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
logger.info(f"Retrying in {delay:.1f}s...")
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
raise
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
elapsed = _time.monotonic() - _t0
|
||||
logger.error(f"{type(e).__name__} after {elapsed:.1f}s (attempt {attempt+1}/{max_retries+1})")
|
||||
if attempt < max_retries:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
logger.info(f"Retrying in {delay:.1f}s...")
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
raise
|
||||
elapsed = _time.monotonic() - _t0
|
||||
if elapsed > 30:
|
||||
logger.warning(f"Anthropic API slow response: {elapsed:.1f}s")
|
||||
|
||||
return response.json()
|
||||
# Dump rate limit headers for analysis
|
||||
try:
|
||||
import datetime
|
||||
import os
|
||||
header_dump = {
|
||||
"timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
|
||||
"status_code": response.status_code,
|
||||
"model": payload.get("model"),
|
||||
"headers": dict(response.headers),
|
||||
}
|
||||
dump_path = "/root/.nanobot/workspace/api_headers.jsonl"
|
||||
with open(dump_path, "a") as f:
|
||||
f.write(json.dumps(header_dump) + "\n")
|
||||
# Capture rate limit state for quota-based model switching
|
||||
hdrs = response.headers
|
||||
rate_limit_state = {
|
||||
"updated_at": datetime.datetime.utcnow().isoformat(),
|
||||
"model": payload.get("model"),
|
||||
"weekly_all_models": float(hdrs["anthropic-ratelimit-unified-7d-utilization"]) if hdrs.get("anthropic-ratelimit-unified-7d-utilization") else None,
|
||||
"weekly_sonnet": float(hdrs["anthropic-ratelimit-unified-7d_sonnet-utilization"]) if hdrs.get("anthropic-ratelimit-unified-7d_sonnet-utilization") else None,
|
||||
"session_5h": float(hdrs["anthropic-ratelimit-unified-5h-utilization"]) if hdrs.get("anthropic-ratelimit-unified-5h-utilization") else None,
|
||||
"weekly_reset": int(hdrs["anthropic-ratelimit-unified-7d-reset"]) if hdrs.get("anthropic-ratelimit-unified-7d-reset") else None,
|
||||
"session_reset": int(hdrs["anthropic-ratelimit-unified-5h-reset"]) if hdrs.get("anthropic-ratelimit-unified-5h-reset") else None,
|
||||
"binding_limit": hdrs.get("anthropic-ratelimit-unified-representative-claim"),
|
||||
"sonnet_fallback": hdrs.get("anthropic-ratelimit-unified-fallback"),
|
||||
}
|
||||
state_path = "/root/.nanobot/workspace/memory/rate_limits.json"
|
||||
os.makedirs(os.path.dirname(state_path), exist_ok=True)
|
||||
with open(state_path, "w") as f:
|
||||
json.dump(rate_limit_state, f, indent=2)
|
||||
except Exception as e:
|
||||
logger.warning("Rate limit header capture failed: {}", e)
|
||||
|
||||
# Retry on 5xx server errors and 429 rate limits
|
||||
if response.status_code >= 500 or response.status_code == 429:
|
||||
error_text = response.text
|
||||
logger.warning(f"Anthropic API {response.status_code} (attempt {attempt+1}/{max_retries+1}): {error_text[:200]}")
|
||||
if attempt < max_retries:
|
||||
if response.status_code == 429:
|
||||
retry_after = response.headers.get("retry-after")
|
||||
delay = float(retry_after) if retry_after else base_delay * (2 ** attempt)
|
||||
else:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
logger.info(f"Retrying in {delay:.1f}s...")
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
raise Exception(f"Anthropic API error {response.status_code}: {error_text}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
raise Exception(f"Anthropic API error {response.status_code}: {error_text}")
|
||||
|
||||
return response.json()
|
||||
|
||||
# Should not reach here, but just in case
|
||||
raise Exception("Exhausted all retry attempts")
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
|
||||
@@ -36,3 +36,11 @@ def get_auth_headers(token: str, is_oauth: bool = False) -> dict[str, str]:
|
||||
headers["x-api-key"] = token
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
def get_claude_code_system_prefix() -> str:
|
||||
"""Get the required system prompt prefix for OAuth tokens.
|
||||
|
||||
Anthropic requires this identity declaration for OAuth auth.
|
||||
"""
|
||||
return "You are a Claude agent, built on Anthropic's Claude Agent SDK."
|
||||
|
||||
Reference in New Issue
Block a user