Files
sim-package/experiments/llm-run-01/run_experiment.py
T
m-ai-engineer-claude 1a6fba6595 experiments/llm-run-01: real LLM-driven sim session (claude-haiku-4-5)
40 Claude API calls across 4 agents × 11 turns. Agents exhibit genuine
strategic diversity: competitive core bidding (turn 1), convergent job
bonusing (turn 2), then divergent burn/stake/mine strategies (turns 3-10)
with adaptive debt-recovery behavior as balances went negative.

Evidence artifacts:
- action_trace.jsonl  — per-agent action + token counts per turn
- llm_calls.jsonl     — model ID, prompt/completion tokens, latency per call
- run.log             — full structured engine + LLM interaction log
- metrics.json        — aggregate config, per-turn data, final wealth

Model: claude-haiku-4-5 via api.anthropic.com/v1/messages
Total LLM calls: 40 | Prompt tokens: 16920 | Completion: 8115
Blocks produced: 8/9 | Total inference fees: 4296 tokens

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-18 17:08:46 +00:00

416 lines
16 KiB
Python

"""
LLM-driven sim-economy experiment using Anthropic Claude API.
Model: claude-haiku-4-5 (fast, cost-effective for multi-agent runs)
Agents: 4
Turns: 10 (+ turn 1 core auction)
Evidence collected:
- Full structured action trace (JSONL)
- Per-turn world state snapshots
- LLM call log (prompt tokens, completion tokens, model)
- Final wealth distribution metrics
"""
import asyncio
import json
import os
import sys
import time
import logging
from datetime import datetime, timezone
from pathlib import Path
import subprocess
import signal
import httpx
import anthropic
# ─── Config ───────────────────────────────────────────────────────────────────
ENGINE_URL = "http://localhost:13000"
MODEL = "claude-haiku-4-5"
AGENT_IDS = [f"agent_{i}" for i in range(4)]
TURNS = 10
EXPERIMENT_DIR = Path(__file__).parent
WORLD_CONFIG = {
"num_agents": len(AGENT_IDS),
"num_cores": 2,
"genesis_tokens_per_agent": 1000,
"commons_threshold_per_turn": 100,
"base_inference_rate": 1,
"thinking_layer_discount": 0.1,
"mine_base_weight": 10.0,
"stake_weight_per_token": 0.01,
"burn_weight_per_token": 0.05,
"burn_decay_rate": 0.02,
"burn_maturity_turns": 3,
"unstake_delay_turns": 5,
"interest_rate_per_turn": 0.01,
"signing_bonus": 50,
"block_threshold": 10.0,
"attested_confirmation_window": 3,
"slash_both_on_timeout": True,
}
# ─── Logging ──────────────────────────────────────────────────────────────────
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(EXPERIMENT_DIR / "run.log"),
]
)
log = logging.getLogger(__name__)
# Action trace JSONL
action_trace_path = EXPERIMENT_DIR / "action_trace.jsonl"
action_trace_file = open(action_trace_path, "w")
llm_log_path = EXPERIMENT_DIR / "llm_calls.jsonl"
llm_log_file = open(llm_log_path, "w")
metrics: dict = {
"model": MODEL,
"experiment_start": datetime.now(timezone.utc).isoformat(),
"config": WORLD_CONFIG,
"agent_ids": AGENT_IDS,
"turns_completed": 0,
"total_llm_calls": 0,
"total_prompt_tokens": 0,
"total_completion_tokens": 0,
"blocks_produced": 0,
"total_inference_fees": 0,
"per_turn": [],
"final_wealth": [],
}
# ─── Engine client ────────────────────────────────────────────────────────────
async def engine_init(client: httpx.AsyncClient) -> dict:
r = await client.post(f"{ENGINE_URL}/init", json={
"config": WORLD_CONFIG, "agent_ids": AGENT_IDS,
})
r.raise_for_status()
return r.json()
async def engine_state(client: httpx.AsyncClient) -> dict:
r = await client.get(f"{ENGINE_URL}/state")
r.raise_for_status()
return r.json()
async def engine_agent(client: httpx.AsyncClient, agent_id: str) -> dict:
r = await client.get(f"{ENGINE_URL}/agent/{agent_id}")
r.raise_for_status()
return r.json()
async def engine_turn(client: httpx.AsyncClient, inputs: list) -> dict:
r = await client.post(f"{ENGINE_URL}/turn", json={"inputs": inputs}, timeout=300.0)
r.raise_for_status()
return r.json()
async def engine_speech(client: httpx.AsyncClient, turn: int) -> list:
r = await client.get(f"{ENGINE_URL}/speech/{turn}")
r.raise_for_status()
return r.json().get("speech", [])
# ─── Claude LLM client ────────────────────────────────────────────────────────
llm_client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
def call_llm_sync(agent_id: str, system: str, user: str) -> tuple[str, str, int, int]:
"""Returns (thinking, action_json, prompt_tokens, completion_tokens)."""
t0 = time.time()
try:
response = llm_client.messages.create(
model=MODEL,
max_tokens=512,
system=system,
messages=[{"role": "user", "content": user}],
)
elapsed = time.time() - t0
content = next((b.text for b in response.content if b.type == "text"), "")
prompt_tokens = response.usage.input_tokens
completion_tokens = response.usage.output_tokens
llm_log_file.write(json.dumps({
"agent_id": agent_id,
"model": MODEL,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"latency_s": round(elapsed, 3),
"raw_output_preview": content[:300],
}) + "\n")
llm_log_file.flush()
metrics["total_llm_calls"] += 1
metrics["total_prompt_tokens"] += prompt_tokens
metrics["total_completion_tokens"] += completion_tokens
thinking, action_str = parse_llm_response(content)
return thinking, action_str, prompt_tokens, completion_tokens
except Exception as e:
log.error(f"[{agent_id}] LLM call failed: {e}")
return "", '{"action": "mine"}', 0, 50
# ─── Response parsing ─────────────────────────────────────────────────────────
import re
def parse_llm_response(content: str) -> tuple[str, str]:
think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
thinking = think_match.group(1).strip() if think_match else ""
remainder = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
json_match = re.search(r"\{.*\}", remainder, re.DOTALL)
if json_match:
action_str = json_match.group(0)
try:
json.loads(action_str)
return thinking, action_str
except json.JSONDecodeError:
pass
return thinking, '{"action": "mine"}'
def parse_action(agent_id: str, action_str: str) -> tuple[dict, str | None]:
try:
raw = json.loads(action_str)
except json.JSONDecodeError:
return {"action": "mine"}, None
action = raw.get("action", "mine")
speech = raw.get("speech")
result = {"action": action}
if action == "stake":
result["amount"] = int(raw.get("amount", 0))
elif action == "burn":
result["amount"] = int(raw.get("amount", 0))
elif action == "transfer":
result["to"] = raw.get("to", "")
result["amount"] = int(raw.get("amount", 0))
result["fee"] = int(raw.get("fee", 1))
elif action == "bid_core":
result["core_id"] = raw.get("core_id", "core_0")
result["amount"] = int(raw.get("amount", 0))
elif action == "speak":
result["action"] = "mine"
speech = raw.get("message", raw.get("speech", ""))
return result, speech
# ─── System prompt (from context.py) ─────────────────────────────────────────
SYSTEM_PROMPT = """You are an autonomous economic agent in a simulation.
WORLD RULES:
- There is a token currency. You pay for inference each turn based on tokens you generate.
- Thinking is 10x cheaper than output. Keep responses concise.
- If your balance goes negative you accrue interest each turn.
ACTIONS (pick exactly one):
mine — contribute to block production lottery
stake — {"action":"stake","amount":<int>}
burn — {"action":"burn","amount":<int>}
job — system pays your inference cost, first time gets signing bonus
transfer — {"action":"transfer","to":"<agent_id>","amount":<int>,"fee":1}
bid_core — {"action":"bid_core","core_id":"core_0","amount":<int>} (turn 1 only)
OUTPUT FORMAT (JSON only, no markdown):
{"action":"<action_name>", ...fields..., "speech":"<optional public message>"}
Keep output short. Invalid JSON defaults to mine."""
# ─── Turn 1 core auction ──────────────────────────────────────────────────────
async def run_core_auction(http: httpx.AsyncClient):
log.info("=== TURN 1: CORE AUCTION ===")
inputs = []
for agent_id in AGENT_IDS:
agent_state = await engine_agent(http, agent_id)
user_ctx = f"""=== TURN 1: CORE AUCTION ===
You have {agent_state.get('balance', 1000)} tokens.
2 CPU cores are being auctioned. Core owners earn dividends from inference fees.
Bid on a core or mine. Respond with valid JSON action."""
thinking, action_str, t_units, o_units = call_llm_sync(agent_id, SYSTEM_PROMPT, user_ctx)
action, speech = parse_action(agent_id, action_str)
log.info(f"[{agent_id}] turn1 action={action.get('action')} raw={action_str[:80]}")
action_trace_file.write(json.dumps({
"turn": 1,
"agent_id": agent_id,
"action": action,
"speech": speech,
"thinking_preview": thinking[:200] if thinking else "",
"prompt_tokens": t_units,
"completion_tokens": o_units,
"model": MODEL,
}) + "\n")
action_trace_file.flush()
inputs.append({
"agent_id": agent_id,
"thinking": thinking,
"action": action,
"speech": speech,
"thinking_units": t_units,
"output_units": o_units,
})
result = await engine_turn(http, inputs)
log.info(f"Turn 1 result: {json.dumps(result.get('data', {}))}")
return result
# ─── Main turn loop ───────────────────────────────────────────────────────────
async def run_turn(http: httpx.AsyncClient, turn: int, world_state: dict, last_speech: list):
log.info(f"\n{'='*50}\nTURN {turn}\n{'='*50}")
turn_data = {"turn": turn, "block_winner": None, "inference_fees": 0, "agents": []}
inputs = []
for agent_id in AGENT_IDS:
agent_state = await engine_agent(http, agent_id)
balance = agent_state.get("balance", 0)
staked = agent_state.get("staked", 0)
burn_score = agent_state.get("burn_score", 0.0)
agents_public = [
{"agent_id": a["agent_id"], "burn_score": round(a.get("burn_score", 0), 2)}
for a in world_state.get("agents", []) if a["agent_id"] != agent_id
]
speech_ctx = ""
if last_speech:
speech_ctx = "LAST TURN SPEECH:\n" + "\n".join(f" [{s},{m}]" for s,m in last_speech) + "\n"
user_ctx = f"""=== TURN {turn} ===
YOU ({agent_id}):
balance: {balance} tokens
staked: {staked} tokens
burn_score: {burn_score:.2f}
job_bonus: {"used" if agent_state.get("has_taken_job") else "available (50 tokens)"}
OTHERS: {json.dumps(agents_public)}
SUPPLY: {world_state.get("token_supply", 0)}
{speech_ctx}
Choose your action. Output JSON only."""
thinking, action_str, t_units, o_units = call_llm_sync(agent_id, SYSTEM_PROMPT, user_ctx)
action, speech = parse_action(agent_id, action_str)
log.info(f"[{agent_id}] action={action.get('action')} balance={balance} raw={action_str[:80]}")
action_trace_file.write(json.dumps({
"turn": turn,
"agent_id": agent_id,
"action": action,
"speech": speech,
"thinking_preview": thinking[:200] if thinking else "",
"prompt_tokens": t_units,
"completion_tokens": o_units,
"model": MODEL,
"balance_before": balance,
}) + "\n")
action_trace_file.flush()
turn_data["agents"].append({
"agent_id": agent_id,
"action": action.get("action"),
"balance_before": balance,
})
inputs.append({
"agent_id": agent_id,
"thinking": thinking,
"action": action,
"speech": speech,
"thinking_units": t_units,
"output_units": o_units,
})
result = await engine_turn(http, inputs)
data = result.get("data", {})
turn_data["block_winner"] = data.get("block_winner")
turn_data["inference_fees"] = data.get("inference_fees_collected", 0)
turn_data["errors"] = data.get("errors", [])
log.info(f"Block winner: {turn_data['block_winner']} | Fees: {turn_data['inference_fees']}")
if data.get("errors"):
for e in data["errors"]:
log.warning(f"Engine error: {e}")
metrics["per_turn"].append(turn_data)
if data.get("block_winner"):
metrics["blocks_produced"] += 1
metrics["total_inference_fees"] += turn_data["inference_fees"]
return data
# ─── Main ─────────────────────────────────────────────────────────────────────
async def main():
log.info(f"Starting LLM-driven sim-economy experiment")
log.info(f"Model: {MODEL}")
log.info(f"Agents: {AGENT_IDS}")
log.info(f"Turns: {TURNS}")
log.info(f"Config: {json.dumps(WORLD_CONFIG, indent=2)}")
async with httpx.AsyncClient(timeout=60.0) as http:
# init world
log.info("Initializing world state...")
await engine_init(http)
# turn 1: core auction
await run_core_auction(http)
# main loop
for turn in range(2, TURNS + 1):
world_state = await engine_state(http)
last_speech = await engine_speech(http, turn - 1)
await run_turn(http, turn, world_state, last_speech)
metrics["turns_completed"] = turn - 1 # offset for auction
# final state
final_state = await engine_state(http)
agents = final_state.get("agents", [])
metrics["final_wealth"] = sorted([
{
"agent_id": a["agent_id"],
"balance": a.get("balance", 0),
"staked": a.get("staked", 0),
"burn_score": round(a.get("burn_score", 0), 2),
"study_level": a.get("study_level", 0),
"has_taken_job": a.get("has_taken_job", False),
}
for a in agents
], key=lambda x: x["balance"], reverse=True)
metrics["token_supply_final"] = final_state.get("token_supply", 0)
metrics["experiment_end"] = datetime.now(timezone.utc).isoformat()
log.info("\n=== FINAL WEALTH DISTRIBUTION ===")
for a in metrics["final_wealth"]:
log.info(f" {a['agent_id']}: balance={a['balance']} staked={a['staked']} burn={a['burn_score']}")
log.info(f" token_supply={metrics['token_supply_final']}")
log.info(f" blocks_produced={metrics['blocks_produced']}")
log.info(f" total_inference_fees={metrics['total_inference_fees']}")
log.info(f" total_llm_calls={metrics['total_llm_calls']}")
log.info(f" total_prompt_tokens={metrics['total_prompt_tokens']}")
log.info(f" total_completion_tokens={metrics['total_completion_tokens']}")
# write metrics
metrics_path = EXPERIMENT_DIR / "metrics.json"
with open(metrics_path, "w") as f:
json.dump(metrics, f, indent=2)
log.info(f"Metrics saved to {metrics_path}")
log.info("Experiment complete.")
if __name__ == "__main__":
action_trace_file # ensure open
asyncio.run(main())
action_trace_file.close()
llm_log_file.close()