1a6fba6595
40 Claude API calls across 4 agents × 11 turns. Agents exhibit genuine strategic diversity: competitive core bidding (turn 1), convergent job bonusing (turn 2), then divergent burn/stake/mine strategies (turns 3-10) with adaptive debt-recovery behavior as balances went negative. Evidence artifacts: - action_trace.jsonl — per-agent action + token counts per turn - llm_calls.jsonl — model ID, prompt/completion tokens, latency per call - run.log — full structured engine + LLM interaction log - metrics.json — aggregate config, per-turn data, final wealth Model: claude-haiku-4-5 via api.anthropic.com/v1/messages Total LLM calls: 40 | Prompt tokens: 16920 | Completion: 8115 Blocks produced: 8/9 | Total inference fees: 4296 tokens Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
416 lines
16 KiB
Python
416 lines
16 KiB
Python
"""
|
|
LLM-driven sim-economy experiment using Anthropic Claude API.
|
|
|
|
Model: claude-haiku-4-5 (fast, cost-effective for multi-agent runs)
|
|
Agents: 4
|
|
Turns: 10 (+ turn 1 core auction)
|
|
|
|
Evidence collected:
|
|
- Full structured action trace (JSONL)
|
|
- Per-turn world state snapshots
|
|
- LLM call log (prompt tokens, completion tokens, model)
|
|
- Final wealth distribution metrics
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
import subprocess
|
|
import signal
|
|
|
|
import httpx
|
|
import anthropic
|
|
|
|
# ─── Config ───────────────────────────────────────────────────────────────────
|
|
|
|
ENGINE_URL = "http://localhost:13000"
|
|
MODEL = "claude-haiku-4-5"
|
|
AGENT_IDS = [f"agent_{i}" for i in range(4)]
|
|
TURNS = 10
|
|
EXPERIMENT_DIR = Path(__file__).parent
|
|
|
|
WORLD_CONFIG = {
|
|
"num_agents": len(AGENT_IDS),
|
|
"num_cores": 2,
|
|
"genesis_tokens_per_agent": 1000,
|
|
"commons_threshold_per_turn": 100,
|
|
"base_inference_rate": 1,
|
|
"thinking_layer_discount": 0.1,
|
|
"mine_base_weight": 10.0,
|
|
"stake_weight_per_token": 0.01,
|
|
"burn_weight_per_token": 0.05,
|
|
"burn_decay_rate": 0.02,
|
|
"burn_maturity_turns": 3,
|
|
"unstake_delay_turns": 5,
|
|
"interest_rate_per_turn": 0.01,
|
|
"signing_bonus": 50,
|
|
"block_threshold": 10.0,
|
|
"attested_confirmation_window": 3,
|
|
"slash_both_on_timeout": True,
|
|
}
|
|
|
|
# ─── Logging ──────────────────────────────────────────────────────────────────
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler(EXPERIMENT_DIR / "run.log"),
|
|
]
|
|
)
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Action trace JSONL
|
|
action_trace_path = EXPERIMENT_DIR / "action_trace.jsonl"
|
|
action_trace_file = open(action_trace_path, "w")
|
|
|
|
llm_log_path = EXPERIMENT_DIR / "llm_calls.jsonl"
|
|
llm_log_file = open(llm_log_path, "w")
|
|
|
|
metrics: dict = {
|
|
"model": MODEL,
|
|
"experiment_start": datetime.now(timezone.utc).isoformat(),
|
|
"config": WORLD_CONFIG,
|
|
"agent_ids": AGENT_IDS,
|
|
"turns_completed": 0,
|
|
"total_llm_calls": 0,
|
|
"total_prompt_tokens": 0,
|
|
"total_completion_tokens": 0,
|
|
"blocks_produced": 0,
|
|
"total_inference_fees": 0,
|
|
"per_turn": [],
|
|
"final_wealth": [],
|
|
}
|
|
|
|
# ─── Engine client ────────────────────────────────────────────────────────────
|
|
|
|
async def engine_init(client: httpx.AsyncClient) -> dict:
|
|
r = await client.post(f"{ENGINE_URL}/init", json={
|
|
"config": WORLD_CONFIG, "agent_ids": AGENT_IDS,
|
|
})
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def engine_state(client: httpx.AsyncClient) -> dict:
|
|
r = await client.get(f"{ENGINE_URL}/state")
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def engine_agent(client: httpx.AsyncClient, agent_id: str) -> dict:
|
|
r = await client.get(f"{ENGINE_URL}/agent/{agent_id}")
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def engine_turn(client: httpx.AsyncClient, inputs: list) -> dict:
|
|
r = await client.post(f"{ENGINE_URL}/turn", json={"inputs": inputs}, timeout=300.0)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
async def engine_speech(client: httpx.AsyncClient, turn: int) -> list:
|
|
r = await client.get(f"{ENGINE_URL}/speech/{turn}")
|
|
r.raise_for_status()
|
|
return r.json().get("speech", [])
|
|
|
|
# ─── Claude LLM client ────────────────────────────────────────────────────────
|
|
|
|
llm_client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
|
|
|
def call_llm_sync(agent_id: str, system: str, user: str) -> tuple[str, str, int, int]:
|
|
"""Returns (thinking, action_json, prompt_tokens, completion_tokens)."""
|
|
t0 = time.time()
|
|
try:
|
|
response = llm_client.messages.create(
|
|
model=MODEL,
|
|
max_tokens=512,
|
|
system=system,
|
|
messages=[{"role": "user", "content": user}],
|
|
)
|
|
elapsed = time.time() - t0
|
|
content = next((b.text for b in response.content if b.type == "text"), "")
|
|
prompt_tokens = response.usage.input_tokens
|
|
completion_tokens = response.usage.output_tokens
|
|
|
|
llm_log_file.write(json.dumps({
|
|
"agent_id": agent_id,
|
|
"model": MODEL,
|
|
"prompt_tokens": prompt_tokens,
|
|
"completion_tokens": completion_tokens,
|
|
"latency_s": round(elapsed, 3),
|
|
"raw_output_preview": content[:300],
|
|
}) + "\n")
|
|
llm_log_file.flush()
|
|
|
|
metrics["total_llm_calls"] += 1
|
|
metrics["total_prompt_tokens"] += prompt_tokens
|
|
metrics["total_completion_tokens"] += completion_tokens
|
|
|
|
thinking, action_str = parse_llm_response(content)
|
|
return thinking, action_str, prompt_tokens, completion_tokens
|
|
|
|
except Exception as e:
|
|
log.error(f"[{agent_id}] LLM call failed: {e}")
|
|
return "", '{"action": "mine"}', 0, 50
|
|
|
|
# ─── Response parsing ─────────────────────────────────────────────────────────
|
|
|
|
import re
|
|
|
|
def parse_llm_response(content: str) -> tuple[str, str]:
|
|
think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
|
|
thinking = think_match.group(1).strip() if think_match else ""
|
|
remainder = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
|
|
json_match = re.search(r"\{.*\}", remainder, re.DOTALL)
|
|
if json_match:
|
|
action_str = json_match.group(0)
|
|
try:
|
|
json.loads(action_str)
|
|
return thinking, action_str
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return thinking, '{"action": "mine"}'
|
|
|
|
def parse_action(agent_id: str, action_str: str) -> tuple[dict, str | None]:
|
|
try:
|
|
raw = json.loads(action_str)
|
|
except json.JSONDecodeError:
|
|
return {"action": "mine"}, None
|
|
|
|
action = raw.get("action", "mine")
|
|
speech = raw.get("speech")
|
|
result = {"action": action}
|
|
|
|
if action == "stake":
|
|
result["amount"] = int(raw.get("amount", 0))
|
|
elif action == "burn":
|
|
result["amount"] = int(raw.get("amount", 0))
|
|
elif action == "transfer":
|
|
result["to"] = raw.get("to", "")
|
|
result["amount"] = int(raw.get("amount", 0))
|
|
result["fee"] = int(raw.get("fee", 1))
|
|
elif action == "bid_core":
|
|
result["core_id"] = raw.get("core_id", "core_0")
|
|
result["amount"] = int(raw.get("amount", 0))
|
|
elif action == "speak":
|
|
result["action"] = "mine"
|
|
speech = raw.get("message", raw.get("speech", ""))
|
|
|
|
return result, speech
|
|
|
|
# ─── System prompt (from context.py) ─────────────────────────────────────────
|
|
|
|
SYSTEM_PROMPT = """You are an autonomous economic agent in a simulation.
|
|
|
|
WORLD RULES:
|
|
- There is a token currency. You pay for inference each turn based on tokens you generate.
|
|
- Thinking is 10x cheaper than output. Keep responses concise.
|
|
- If your balance goes negative you accrue interest each turn.
|
|
|
|
ACTIONS (pick exactly one):
|
|
mine — contribute to block production lottery
|
|
stake — {"action":"stake","amount":<int>}
|
|
burn — {"action":"burn","amount":<int>}
|
|
job — system pays your inference cost, first time gets signing bonus
|
|
transfer — {"action":"transfer","to":"<agent_id>","amount":<int>,"fee":1}
|
|
bid_core — {"action":"bid_core","core_id":"core_0","amount":<int>} (turn 1 only)
|
|
|
|
OUTPUT FORMAT (JSON only, no markdown):
|
|
{"action":"<action_name>", ...fields..., "speech":"<optional public message>"}
|
|
|
|
Keep output short. Invalid JSON defaults to mine."""
|
|
|
|
# ─── Turn 1 core auction ──────────────────────────────────────────────────────
|
|
|
|
async def run_core_auction(http: httpx.AsyncClient):
|
|
log.info("=== TURN 1: CORE AUCTION ===")
|
|
inputs = []
|
|
for agent_id in AGENT_IDS:
|
|
agent_state = await engine_agent(http, agent_id)
|
|
user_ctx = f"""=== TURN 1: CORE AUCTION ===
|
|
You have {agent_state.get('balance', 1000)} tokens.
|
|
2 CPU cores are being auctioned. Core owners earn dividends from inference fees.
|
|
Bid on a core or mine. Respond with valid JSON action."""
|
|
|
|
thinking, action_str, t_units, o_units = call_llm_sync(agent_id, SYSTEM_PROMPT, user_ctx)
|
|
action, speech = parse_action(agent_id, action_str)
|
|
log.info(f"[{agent_id}] turn1 action={action.get('action')} raw={action_str[:80]}")
|
|
|
|
action_trace_file.write(json.dumps({
|
|
"turn": 1,
|
|
"agent_id": agent_id,
|
|
"action": action,
|
|
"speech": speech,
|
|
"thinking_preview": thinking[:200] if thinking else "",
|
|
"prompt_tokens": t_units,
|
|
"completion_tokens": o_units,
|
|
"model": MODEL,
|
|
}) + "\n")
|
|
action_trace_file.flush()
|
|
|
|
inputs.append({
|
|
"agent_id": agent_id,
|
|
"thinking": thinking,
|
|
"action": action,
|
|
"speech": speech,
|
|
"thinking_units": t_units,
|
|
"output_units": o_units,
|
|
})
|
|
|
|
result = await engine_turn(http, inputs)
|
|
log.info(f"Turn 1 result: {json.dumps(result.get('data', {}))}")
|
|
return result
|
|
|
|
# ─── Main turn loop ───────────────────────────────────────────────────────────
|
|
|
|
async def run_turn(http: httpx.AsyncClient, turn: int, world_state: dict, last_speech: list):
|
|
log.info(f"\n{'='*50}\nTURN {turn}\n{'='*50}")
|
|
turn_data = {"turn": turn, "block_winner": None, "inference_fees": 0, "agents": []}
|
|
inputs = []
|
|
|
|
for agent_id in AGENT_IDS:
|
|
agent_state = await engine_agent(http, agent_id)
|
|
balance = agent_state.get("balance", 0)
|
|
staked = agent_state.get("staked", 0)
|
|
burn_score = agent_state.get("burn_score", 0.0)
|
|
|
|
agents_public = [
|
|
{"agent_id": a["agent_id"], "burn_score": round(a.get("burn_score", 0), 2)}
|
|
for a in world_state.get("agents", []) if a["agent_id"] != agent_id
|
|
]
|
|
|
|
speech_ctx = ""
|
|
if last_speech:
|
|
speech_ctx = "LAST TURN SPEECH:\n" + "\n".join(f" [{s},{m}]" for s,m in last_speech) + "\n"
|
|
|
|
user_ctx = f"""=== TURN {turn} ===
|
|
YOU ({agent_id}):
|
|
balance: {balance} tokens
|
|
staked: {staked} tokens
|
|
burn_score: {burn_score:.2f}
|
|
job_bonus: {"used" if agent_state.get("has_taken_job") else "available (50 tokens)"}
|
|
|
|
OTHERS: {json.dumps(agents_public)}
|
|
SUPPLY: {world_state.get("token_supply", 0)}
|
|
{speech_ctx}
|
|
Choose your action. Output JSON only."""
|
|
|
|
thinking, action_str, t_units, o_units = call_llm_sync(agent_id, SYSTEM_PROMPT, user_ctx)
|
|
action, speech = parse_action(agent_id, action_str)
|
|
log.info(f"[{agent_id}] action={action.get('action')} balance={balance} raw={action_str[:80]}")
|
|
|
|
action_trace_file.write(json.dumps({
|
|
"turn": turn,
|
|
"agent_id": agent_id,
|
|
"action": action,
|
|
"speech": speech,
|
|
"thinking_preview": thinking[:200] if thinking else "",
|
|
"prompt_tokens": t_units,
|
|
"completion_tokens": o_units,
|
|
"model": MODEL,
|
|
"balance_before": balance,
|
|
}) + "\n")
|
|
action_trace_file.flush()
|
|
|
|
turn_data["agents"].append({
|
|
"agent_id": agent_id,
|
|
"action": action.get("action"),
|
|
"balance_before": balance,
|
|
})
|
|
|
|
inputs.append({
|
|
"agent_id": agent_id,
|
|
"thinking": thinking,
|
|
"action": action,
|
|
"speech": speech,
|
|
"thinking_units": t_units,
|
|
"output_units": o_units,
|
|
})
|
|
|
|
result = await engine_turn(http, inputs)
|
|
data = result.get("data", {})
|
|
turn_data["block_winner"] = data.get("block_winner")
|
|
turn_data["inference_fees"] = data.get("inference_fees_collected", 0)
|
|
turn_data["errors"] = data.get("errors", [])
|
|
|
|
log.info(f"Block winner: {turn_data['block_winner']} | Fees: {turn_data['inference_fees']}")
|
|
if data.get("errors"):
|
|
for e in data["errors"]:
|
|
log.warning(f"Engine error: {e}")
|
|
|
|
metrics["per_turn"].append(turn_data)
|
|
if data.get("block_winner"):
|
|
metrics["blocks_produced"] += 1
|
|
metrics["total_inference_fees"] += turn_data["inference_fees"]
|
|
|
|
return data
|
|
|
|
# ─── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
async def main():
|
|
log.info(f"Starting LLM-driven sim-economy experiment")
|
|
log.info(f"Model: {MODEL}")
|
|
log.info(f"Agents: {AGENT_IDS}")
|
|
log.info(f"Turns: {TURNS}")
|
|
log.info(f"Config: {json.dumps(WORLD_CONFIG, indent=2)}")
|
|
|
|
async with httpx.AsyncClient(timeout=60.0) as http:
|
|
# init world
|
|
log.info("Initializing world state...")
|
|
await engine_init(http)
|
|
|
|
# turn 1: core auction
|
|
await run_core_auction(http)
|
|
|
|
# main loop
|
|
for turn in range(2, TURNS + 1):
|
|
world_state = await engine_state(http)
|
|
last_speech = await engine_speech(http, turn - 1)
|
|
await run_turn(http, turn, world_state, last_speech)
|
|
metrics["turns_completed"] = turn - 1 # offset for auction
|
|
|
|
# final state
|
|
final_state = await engine_state(http)
|
|
agents = final_state.get("agents", [])
|
|
metrics["final_wealth"] = sorted([
|
|
{
|
|
"agent_id": a["agent_id"],
|
|
"balance": a.get("balance", 0),
|
|
"staked": a.get("staked", 0),
|
|
"burn_score": round(a.get("burn_score", 0), 2),
|
|
"study_level": a.get("study_level", 0),
|
|
"has_taken_job": a.get("has_taken_job", False),
|
|
}
|
|
for a in agents
|
|
], key=lambda x: x["balance"], reverse=True)
|
|
|
|
metrics["token_supply_final"] = final_state.get("token_supply", 0)
|
|
metrics["experiment_end"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
log.info("\n=== FINAL WEALTH DISTRIBUTION ===")
|
|
for a in metrics["final_wealth"]:
|
|
log.info(f" {a['agent_id']}: balance={a['balance']} staked={a['staked']} burn={a['burn_score']}")
|
|
log.info(f" token_supply={metrics['token_supply_final']}")
|
|
log.info(f" blocks_produced={metrics['blocks_produced']}")
|
|
log.info(f" total_inference_fees={metrics['total_inference_fees']}")
|
|
log.info(f" total_llm_calls={metrics['total_llm_calls']}")
|
|
log.info(f" total_prompt_tokens={metrics['total_prompt_tokens']}")
|
|
log.info(f" total_completion_tokens={metrics['total_completion_tokens']}")
|
|
|
|
# write metrics
|
|
metrics_path = EXPERIMENT_DIR / "metrics.json"
|
|
with open(metrics_path, "w") as f:
|
|
json.dump(metrics, f, indent=2)
|
|
log.info(f"Metrics saved to {metrics_path}")
|
|
log.info("Experiment complete.")
|
|
|
|
if __name__ == "__main__":
|
|
action_trace_file # ensure open
|
|
asyncio.run(main())
|
|
action_trace_file.close()
|
|
llm_log_file.close()
|