Add simulation data artifacts and experiment runner to branch

Brings paper/results/run_metrics.csv, paper/results/summary.json, and paper/run_sim_experiments.py into the branch tree so all cited artifacts are co-present with the paper in a single commit-accessible tree. All paper numbers are verified against summary.json. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Expand paper to ~5,000 words per QC revision request
2026-04-15 23:36:20 +00:00 · 2026-04-15 20:47:10 +00:00
4 changed files with 980 additions and 0 deletions
@@ -0,0 +1,25 @@
+run_id,turns,token_supply_final,fees_total,turns_with_block,block_count_db,settled_contracts,defaulted_contracts,mean_balance_final,median_balance_final,min_balance_final,max_balance_final,negative_balance_agents,gini_wealth_final,top1_wealth_share_final,wealth_shift_used,gini_wealth_shifted,top1_wealth_shifted_share_final,validator_mine,validator_stake,validator_burn,tx_transfer,tx_stake,tx_unstake,tx_burn,tx_study,tx_job,actions_mine,actions_stake,actions_unstake,actions_burn,actions_study,actions_job,actions_transfer
+1,60,-546,24753,55,55,0,0,-158.25,-151.5,-278,-85,8,-0.5677655677655677,0.0,199.0,0.2963671128107075,0.18546845124282982,55,0,0,0,9,0,1,0,0,260,9,11,1,2,197,0
+2,60,-1019,24690,54,54,0,0,-197.375,-203.0,-231,-132,8,-0.13800294406280667,0.0,183.0,0.3160112359550562,0.2943820224719101,54,0,0,0,7,0,3,0,0,262,7,13,3,3,192,0
+3,60,-1244,24382,53,53,0,0,-225.5,-228.5,-319,-99,8,-0.2723070739549839,0.0,240.0,0.5011094674556213,0.3269230769230769,53,0,0,0,7,0,1,0,0,258,7,17,1,4,193,0
+4,60,-888,25268,55,55,0,0,-191,-187.5,-251,-131,8,-0.2038288288288288,0.0,172.0,0.37090163934426235,0.24795081967213115,55,0,0,0,8,0,2,0,0,263,8,19,2,2,186,0
+5,60,-711,24946,55,55,0,0,-178.875,-166.5,-307,-60,8,-0.5209212376933896,0.0,213.0,0.3729859013091641,0.23464249748237664,55,0,0,0,9,0,3,0,0,244,9,14,3,3,207,0
+6,60,-1287,25615,56,56,0,0,-230.875,-216.0,-328,-102,8,-0.22367909867909863,0.0,249.0,0.40833333333333344,0.3219858156028369,56,0,0,0,7,0,0,0,0,278,7,18,0,3,174,0
+7,60,-512,23740,55,55,0,0,-154,-158.5,-242,-63,8,-0.59521484375,0.0,163.0,0.3847853535353536,0.2335858585858586,55,0,0,0,9,0,2,0,0,256,9,16,2,4,193,0
+8,60,-980,22700,55,55,0,0,-212.5,-198.5,-287,-131,8,-0.26632653061224487,0.0,208.0,0.381578947368421,0.22953216374269006,55,0,0,0,9,0,2,0,0,271,9,15,2,3,180,0
+9,60,-909,24198,55,55,0,0,-203.625,-192.0,-380,-81,8,-0.386001100110011,0.0,221.0,0.4084691501746216,0.3259604190919674,55,0,0,0,9,0,3,0,0,260,9,15,3,2,191,0
+10,60,-870,24931,56,56,0,0,-188.75,-187.0,-342,-56,8,-0.5724137931034483,0.0,343.0,0.2657417289220918,0.19583778014941303,56,0,0,0,8,0,2,0,0,267,8,15,2,2,186,0
+11,60,-1212,23754,55,55,0,0,-231.5,-235.0,-333,-132,8,-0.2460808580858086,0.0,254.0,0.363719512195122,0.24634146341463414,55,0,0,0,8,0,2,0,0,273,8,11,2,2,184,0
+12,60,-797,24322,56,56,0,0,-169.625,-189.0,-271,-37,8,-0.4396173149309912,0.0,272.0,0.25407904278462645,0.22842639593908629,56,0,0,0,7,0,2,0,0,266,7,13,2,2,190,0
+13,60,-879,25235,55,55,0,0,-189.875,-165.0,-339,-98,8,-0.38694539249146753,0.0,260.0,0.2832014987510407,0.20149875104079934,55,0,0,0,8,0,3,0,0,254,8,23,3,2,190,0
+14,60,-857,24614,54,54,0,0,-187.125,-159.0,-357,-44,8,-0.5917444574095683,0.0,278.0,0.37097659107534753,0.22970007315288954,54,0,0,0,8,0,1,0,0,265,8,16,1,2,188,0
+15,60,-591,24257,54,54,0,0,-163.875,-171.0,-299,-27,8,-0.6254230118443316,0.0,220.0,0.3161890504704876,0.23353293413173654,54,0,0,0,9,0,3,0,0,252,9,15,3,1,200,0
+16,60,-1125,23846,56,56,0,0,-220.625,-230.0,-313,-106,8,-0.26788888888888884,0.0,220.0,0.47460629921259834,0.30551181102362207,56,0,0,0,8,0,3,0,0,264,8,11,3,3,191,0
+17,60,-800,24068,54,54,0,0,-170,-193.5,-246,-25,8,-0.406875,0.0,167.0,0.607276119402985,0.4141791044776119,54,0,0,0,7,0,1,0,0,268,7,18,1,4,182,0
+18,60,-1041,23746,54,54,0,0,-210.125,-198.0,-361,-123,8,-0.3257684918347743,0.0,282.0,0.27911522633744856,0.19670781893004116,54,0,0,0,8,0,1,0,0,258,8,21,1,2,190,0
+19,60,-628,24893,54,54,0,0,-168.5,-161.0,-310,-35,8,-0.6325636942675159,0.0,231.0,0.3256147540983607,0.2262295081967213,54,0,0,0,9,0,2,0,0,275,9,14,2,2,178,0
+20,60,-902,24180,55,55,0,0,-202.75,-220.5,-286,-74,8,-0.42793791574279383,0.0,207.0,0.5119363395225465,0.3275862068965517,55,0,0,0,9,0,1,0,0,269,9,19,1,4,178,0
+21,60,-1029,23660,53,53,0,0,-198.625,-214.0,-296,-97,8,-0.42261904761904767,0.0,282.0,0.3544213528932356,0.21597392013039934,53,0,0,0,7,0,2,0,0,266,7,13,2,2,190,0
+22,60,-743,23124,56,56,0,0,-192.875,-184.0,-295,-81,8,-0.5348250336473755,0.0,216.0,0.40342639593908625,0.24568527918781727,56,0,0,0,10,0,2,0,0,269,10,22,2,3,174,0
+23,60,-588,23892,55,55,0,0,-153.5,-157.0,-281,11,7,-0.7329931972789115,0.0,202.0,0.4192607003891051,0.2850194552529183,55,0,0,0,8,0,3,0,0,264,8,17,3,3,185,0
+24,60,-777,24723,54,54,0,0,-167.125,-158.5,-308,-37,8,-0.6217824967824968,0.0,309.0,0.28502949852507364,0.20766961651917404,54,0,0,0,7,0,0,0,0,264,7,18,0,2,189,0
@@ -0,0 +1,256 @@
+{
+  "runs": 24,
+  "turns": 60,
+  "world_config": {
+    "num_agents": 8,
+    "num_cores": 4,
+    "genesis_tokens_per_agent": 1000,
+    "commons_threshold_per_turn": 100,
+    "base_inference_rate": 1,
+    "thinking_layer_discount": 0.1,
+    "mine_base_weight": 10.0,
+    "stake_weight_per_token": 0.01,
+    "burn_weight_per_token": 0.05,
+    "burn_decay_rate": 0.02,
+    "burn_maturity_turns": 3,
+    "unstake_delay_turns": 5,
+    "interest_rate_per_turn": 0.01,
+    "signing_bonus": 50,
+    "block_threshold": 20.0,
+    "attested_confirmation_window": 3,
+    "slash_both_on_timeout": true
+  },
+  "aggregate": {
+    "turns": {
+      "mean": 60.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 60.0,
+      "max": 60.0
+    },
+    "token_supply_final": {
+      "mean": -872.2916666666666,
+      "std": 218.38607201301863,
+      "ci95": 87.37262574922428,
+      "min": -1287.0,
+      "max": -512.0
+    },
+    "fees_total": {
+      "mean": 24314.041666666668,
+      "std": 693.735163048029,
+      "ci95": 277.5518704620466,
+      "min": 22700.0,
+      "max": 25615.0
+    },
+    "turns_with_block": {
+      "mean": 54.75,
+      "std": 0.8968544062928813,
+      "ci95": 0.3588164926007706,
+      "min": 53.0,
+      "max": 56.0
+    },
+    "block_count_db": {
+      "mean": 54.75,
+      "std": 0.8968544062928813,
+      "ci95": 0.3588164926007706,
+      "min": 53.0,
+      "max": 56.0
+    },
+    "settled_contracts": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "defaulted_contracts": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "mean_balance_final": {
+      "mean": -190.28645833333334,
+      "std": 23.992567892685955,
+      "ci95": 9.599026329506666,
+      "min": -231.5,
+      "max": -153.5
+    },
+    "median_balance_final": {
+      "mean": -188.52083333333334,
+      "std": 25.96547840577138,
+      "ci95": 10.388354926827866,
+      "min": -235.0,
+      "max": -151.5
+    },
+    "min_balance_final": {
+      "mean": -302.5,
+      "std": 38.81729780259894,
+      "ci95": 15.530153558970929,
+      "min": -380.0,
+      "max": -231.0
+    },
+    "max_balance_final": {
+      "mean": -76.875,
+      "std": 40.01120766898239,
+      "ci95": 16.007816987651534,
+      "min": -132.0,
+      "max": 11.0
+    },
+    "negative_balance_agents": {
+      "mean": 7.958333333333333,
+      "std": 0.2041241452319315,
+      "ci95": 0.08166666666666668,
+      "min": 7.0,
+      "max": 8.0
+    },
+    "gini_wealth_final": {
+      "mean": -0.43373024247434794,
+      "std": 0.16491539159236795,
+      "ci95": 0.06597989815498749,
+      "min": -0.7329931972789115,
+      "max": -0.13800294406280667
+    },
+    "top1_wealth_share_final": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "wealth_shift_used": {
+      "mean": 232.95833333333334,
+      "std": 45.573141562592085,
+      "ci95": 18.233053991315924,
+      "min": 163.0,
+      "max": 343.0
+    },
+    "gini_wealth_shifted": {
+      "mean": 0.37313067715857073,
+      "std": 0.08647827197019448,
+      "ci95": 0.03459851456021888,
+      "min": 0.25407904278462645,
+      "max": 0.607276119402985
+    },
+    "top1_wealth_shifted_share_final": {
+      "mean": 0.2566804684691289,
+      "std": 0.056323656364838104,
+      "ci95": 0.02253415569514685,
+      "min": 0.18546845124282982,
+      "max": 0.4141791044776119
+    },
+    "validator_mine": {
+      "mean": 54.75,
+      "std": 0.8968544062928813,
+      "ci95": 0.3588164926007706,
+      "min": 53.0,
+      "max": 56.0
+    },
+    "validator_stake": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "validator_burn": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "tx_transfer": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "tx_stake": {
+      "mean": 8.125,
+      "std": 0.899879218948661,
+      "ci95": 0.3600266697045522,
+      "min": 7.0,
+      "max": 10.0
+    },
+    "tx_unstake": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "tx_burn": {
+      "mean": 1.875,
+      "std": 0.9469631093315001,
+      "ci95": 0.37886414910659055,
+      "min": 0.0,
+      "max": 3.0
+    },
+    "tx_study": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "tx_job": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    },
+    "actions_mine": {
+      "mean": 263.5833333333333,
+      "std": 7.643790697807211,
+      "ci95": 3.058153195342505,
+      "min": 244.0,
+      "max": 278.0
+    },
+    "actions_stake": {
+      "mean": 8.125,
+      "std": 0.899879218948661,
+      "ci95": 0.3600266697045522,
+      "min": 7.0,
+      "max": 10.0
+    },
+    "actions_unstake": {
+      "mean": 16.0,
+      "std": 3.3362306249131963,
+      "ci95": 1.334770240229718,
+      "min": 11.0,
+      "max": 23.0
+    },
+    "actions_burn": {
+      "mean": 1.875,
+      "std": 0.9469631093315001,
+      "ci95": 0.37886414910659055,
+      "min": 0.0,
+      "max": 3.0
+    },
+    "actions_study": {
+      "mean": 2.5833333333333335,
+      "std": 0.8297022339981068,
+      "ci95": 0.33195002825129966,
+      "min": 1.0,
+      "max": 4.0
+    },
+    "actions_job": {
+      "mean": 187.83333333333334,
+      "std": 7.833410422069031,
+      "ci95": 3.13401688504526,
+      "min": 174.0,
+      "max": 207.0
+    },
+    "actions_transfer": {
+      "mean": 0.0,
+      "std": 0.0,
+      "ci95": 0.0,
+      "min": 0.0,
+      "max": 0.0
+    }
+  }
+}
@@ -0,0 +1,336 @@
+#!/usr/bin/env python3
+import argparse
+import csv
+import json
+import math
+import os
+import random
+import signal
+import sqlite3
+import statistics
+import subprocess
+import tempfile
+import time
+import urllib.error
+import urllib.request
+
+
+AGENTS = [f"agent_{i}" for i in range(8)]
+
+WORLD_CONFIG = {
+    "num_agents": 8,
+    "num_cores": 4,
+    "genesis_tokens_per_agent": 1000,
+    "commons_threshold_per_turn": 100,
+    "base_inference_rate": 1,
+    "thinking_layer_discount": 0.1,
+    "mine_base_weight": 10.0,
+    "stake_weight_per_token": 0.01,
+    "burn_weight_per_token": 0.05,
+    "burn_decay_rate": 0.02,
+    "burn_maturity_turns": 3,
+    "unstake_delay_turns": 5,
+    "interest_rate_per_turn": 0.01,
+    "signing_bonus": 50,
+    "block_threshold": 20.0,
+    "attested_confirmation_window": 3,
+    "slash_both_on_timeout": True,
+}
+
+
+def http_json(url: str, method: str = "GET", payload=None, timeout=30):
+    data = None
+    headers = {"Content-Type": "application/json"}
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(url, data=data, headers=headers, method=method)
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def wait_engine(base_url: str, timeout_s: float = 30.0):
+    start = time.time()
+    while time.time() - start < timeout_s:
+        try:
+            http_json(f"{base_url}/config")
+            return
+        except Exception:
+            time.sleep(0.2)
+    raise RuntimeError("engine did not become ready")
+
+
+def gini(values):
+    xs = sorted(values)
+    n = len(xs)
+    if n == 0:
+        return 0.0
+    total = sum(xs)
+    if total == 0:
+        return 0.0
+    weighted = 0.0
+    for i, x in enumerate(xs, start=1):
+        weighted += i * x
+    return (2.0 * weighted) / (n * total) - (n + 1) / n
+
+
+def shifted_nonnegative(values):
+    m = min(values)
+    if m <= 0:
+        shift = 1 - m
+        return [x + shift for x in values], shift
+    return list(values), 0
+
+
+def choose_actions(turn, state, rng):
+    agents = state["agents"]
+    by_id = {a["agent_id"]: a for a in agents}
+    poorest = min(agents, key=lambda a: a["balance"])["agent_id"]
+
+    inputs = []
+    for aid in AGENTS:
+        a = by_id[aid]
+        balance = int(a.get("balance", 0))
+        staked = int(a.get("staked", 0))
+
+        action = {"action": "mine"}
+        idx = int(aid.split("_")[1])
+
+        if balance < -180:
+            action = {"action": "job"}
+        elif staked > 0 and turn % 15 == 0:
+            action = {"action": "unstake"}
+        elif balance > 360 and staked < 220 and (turn + idx) % 7 == 0:
+            action = {"action": "stake", "amount": 80}
+        elif balance > 600 and (turn + idx) % 11 == 0:
+            action = {"action": "burn", "amount": 50}
+        elif balance > 450 and turn % 13 == 0 and poorest != aid:
+            action = {"action": "transfer", "to": poorest, "amount": 20, "fee": 1}
+        elif balance > 250 and turn % 10 == 0:
+            action = {"action": "study"}
+
+        thinking_units = rng.randint(50, 350)
+        output_units = rng.randint(40, 280)
+
+        inputs.append(
+            {
+                "agent_id": aid,
+                "thinking": "",
+                "action": action,
+                "speech": None,
+                "thinking_units": thinking_units,
+                "output_units": output_units,
+            }
+        )
+
+    return inputs
+
+
+def query_sqlite_metrics(db_path: str):
+    conn = sqlite3.connect(db_path)
+    cur = conn.cursor()
+
+    cur.execute("SELECT COUNT(*) FROM blocks WHERE turn > 0")
+    block_count = cur.fetchone()[0]
+
+    cur.execute(
+        """
+        SELECT json_extract(data, '$.validator_type'), COUNT(*)
+        FROM blocks
+        WHERE turn > 0
+        GROUP BY json_extract(data, '$.validator_type')
+        """
+    )
+    validator_counts = {row[0]: row[1] for row in cur.fetchall()}
+
+    cur.execute(
+        """
+        SELECT json_extract(data, '$.tx_type'), COUNT(*)
+        FROM transactions
+        GROUP BY json_extract(data, '$.tx_type')
+        """
+    )
+    tx_counts = {row[0]: row[1] for row in cur.fetchall()}
+
+    conn.close()
+    return block_count, validator_counts, tx_counts
+
+
+def run_one(run_id: int, turns: int, engine_bin: str):
+    rng = random.Random(1000 + run_id)
+    port = 3100 + run_id
+    base = f"http://127.0.0.1:{port}"
+
+    with tempfile.TemporaryDirectory(prefix=f"sim_run_{run_id}_") as tmpd:
+        db_path = os.path.join(tmpd, "sim.db")
+        env = os.environ.copy()
+        env["DB_PATH"] = db_path
+        env["PORT"] = str(port)
+
+        proc = subprocess.Popen(
+            [engine_bin],
+            env=env,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        try:
+            wait_engine(base)
+
+            http_json(
+                f"{base}/init",
+                method="POST",
+                payload={"config": WORLD_CONFIG, "agent_ids": AGENTS},
+            )
+
+            total_fees = 0
+            turns_with_block = 0
+            contracts_settled = 0
+            contracts_defaulted = 0
+            action_counts = {
+                "mine": 0,
+                "stake": 0,
+                "unstake": 0,
+                "burn": 0,
+                "study": 0,
+                "job": 0,
+                "transfer": 0,
+            }
+
+            for turn in range(1, turns + 1):
+                state = http_json(f"{base}/state")
+                inputs = choose_actions(turn, state, rng)
+                for it in inputs:
+                    name = it["action"]["action"]
+                    if name in action_counts:
+                        action_counts[name] += 1
+                out = http_json(f"{base}/turn", method="POST", payload={"inputs": inputs})
+                data = out.get("data", {}) if isinstance(out, dict) else {}
+
+                total_fees += int(data.get("inference_fees_collected", 0) or 0)
+                if data.get("block_winner"):
+                    turns_with_block += 1
+                contracts_settled += len(data.get("contracts_settled", []) or [])
+                contracts_defaulted += len(data.get("contracts_defaulted", []) or [])
+
+            final_state = http_json(f"{base}/state")
+            balances = [int(a["balance"]) for a in final_state["agents"]]
+            staked = [int(a["staked"]) for a in final_state["agents"]]
+            wealth = [b + s for b, s in zip(balances, staked)]
+            total_wealth = sum(wealth)
+            top1_share = max(wealth) / total_wealth if total_wealth > 0 else 0.0
+            wealth_shifted, shift_used = shifted_nonnegative(wealth)
+            top1_shifted_share = max(wealth_shifted) / sum(wealth_shifted)
+
+            block_count_db, validator_counts, tx_counts = query_sqlite_metrics(db_path)
+
+            return {
+                "run_id": run_id,
+                "turns": turns,
+                "token_supply_final": int(final_state.get("token_supply", 0)),
+                "fees_total": int(total_fees),
+                "turns_with_block": int(turns_with_block),
+                "block_count_db": int(block_count_db),
+                "settled_contracts": int(contracts_settled),
+                "defaulted_contracts": int(contracts_defaulted),
+                "mean_balance_final": statistics.mean(balances),
+                "median_balance_final": statistics.median(balances),
+                "min_balance_final": min(balances),
+                "max_balance_final": max(balances),
+                "negative_balance_agents": sum(1 for b in balances if b < 0),
+                "gini_wealth_final": gini(wealth),
+                "top1_wealth_share_final": top1_share,
+                "wealth_shift_used": float(shift_used),
+                "gini_wealth_shifted": gini(wealth_shifted),
+                "top1_wealth_shifted_share_final": top1_shifted_share,
+                "validator_mine": int(validator_counts.get("mine", 0)),
+                "validator_stake": int(validator_counts.get("stake", 0)),
+                "validator_burn": int(validator_counts.get("burn", 0)),
+                "tx_transfer": int(tx_counts.get("transfer", 0)),
+                "tx_stake": int(tx_counts.get("stake", 0)),
+                "tx_unstake": int(tx_counts.get("unstake", 0)),
+                "tx_burn": int(tx_counts.get("burn", 0)),
+                "tx_study": int(tx_counts.get("study", 0)),
+                "tx_job": int(tx_counts.get("job", 0)),
+                "actions_mine": int(action_counts["mine"]),
+                "actions_stake": int(action_counts["stake"]),
+                "actions_unstake": int(action_counts["unstake"]),
+                "actions_burn": int(action_counts["burn"]),
+                "actions_study": int(action_counts["study"]),
+                "actions_job": int(action_counts["job"]),
+                "actions_transfer": int(action_counts["transfer"]),
+            }
+        finally:
+            try:
+                proc.send_signal(signal.SIGTERM)
+                proc.wait(timeout=3)
+            except Exception:
+                proc.kill()
+
+
+def summarize(rows):
+    numeric_keys = [
+        k
+        for k in rows[0].keys()
+        if k not in {"run_id"}
+        and isinstance(rows[0][k], (int, float))
+    ]
+    out = {}
+    for k in numeric_keys:
+        xs = [float(r[k]) for r in rows]
+        mean = statistics.mean(xs)
+        sd = statistics.stdev(xs) if len(xs) > 1 else 0.0
+        se = sd / math.sqrt(len(xs)) if len(xs) > 0 else 0.0
+        ci95 = 1.96 * se
+        out[k] = {
+            "mean": mean,
+            "std": sd,
+            "ci95": ci95,
+            "min": min(xs),
+            "max": max(xs),
+        }
+    return out
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--runs", type=int, default=20)
+    parser.add_argument("--turns", type=int, default=50)
+    parser.add_argument(
+        "--engine-bin",
+        default="sim-engine/target/release/sim-engine",
+    )
+    parser.add_argument("--out-dir", default="paper/results")
+    args = parser.parse_args()
+
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    rows = []
+    for run_id in range(1, args.runs + 1):
+        row = run_one(run_id, args.turns, args.engine_bin)
+        rows.append(row)
+        print(
+            f"run {run_id:02d}/{args.runs} | supply={row['token_supply_final']} | "
+            f"gini={row['gini_wealth_final']:.3f} | blocks={row['turns_with_block']}"
+        )
+
+    csv_path = os.path.join(args.out_dir, "run_metrics.csv")
+    with open(csv_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+        writer.writeheader()
+        writer.writerows(rows)
+
+    summary = {
+        "runs": args.runs,
+        "turns": args.turns,
+        "world_config": WORLD_CONFIG,
+        "aggregate": summarize(rows),
+    }
+    summary_path = os.path.join(args.out_dir, "summary.json")
+    with open(summary_path, "w", encoding="utf-8") as f:
+        json.dump(summary, f, indent=2)
+
+    print(f"wrote {csv_path}")
+    print(f"wrote {summary_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,363 @@
+# Sim-Economy Under Compute Pricing Pressure: Evidence From Repeated Agent-Market Simulations and Implications for AI-Run Companies
+
+## Abstract
+
+This paper reports results from repeated runs of `sim-economy`, a blockchain-enforced multi-agent economy where agents must pay for inference and can choose among mining, staking, burning, labor (`job`), and transfer actions. We execute 24 independent simulations of 60 turns each using the production Rust engine and a deterministic policy harness that submits structured actions over the engine HTTP API. Across runs, the economy exhibits three robust regularities: (1) persistent block production (mean 54.75 blocks per 60 turns, 95% CI ±0.36); (2) high fee extraction relative to initial endowment (mean total inference fees 24,314 tokens against a starting nominal supply of 8,000 tokens, representing a 3.04× fee-to-endowment ratio); and (3) deep and near-universal balance compression into negative territory (mean final token supply −872; 7.96 of 8 agents in negative cash balance at turn 60 across runs). Inequality, measured after a standard positive-value shift required because wealth becomes negative, is substantial (shifted Gini mean 0.373, 95% CI ±0.035; shifted top-1 wealth share mean 0.257, 95% CI ±0.023) and exhibits run-to-run variability not attributable to fee volume alone. Behavioral composition converges toward a survival regime: `mine` and `job` account for approximately 94% of all agent actions. No contract activity emerges. The findings suggest that AI-run firms in compute-metered environments may evolve into liquidity-preserving, low-investment equilibria unless governance rules constrain fee extraction or provide stronger productive reinvestment channels. We discuss institutional design implications and identify five interventions relevant to practitioners building autonomous-agent organizations.
+
+---
+
+## 1. Background and Motivation
+
+A fundamental economic question facing AI-run organizations is whether autonomous agents can sustain positive-sum production when their own cognition is metered and billed. Unlike human employees whose thinking is effectively unpriced, AI agents in modern compute infrastructure operate at measurable and chargeable cost. If an organization internalizes those costs via direct billing—charging agents for each inference call from a shared treasury—the resulting incentive structure may be deeply unlike anything studied in conventional organizational economics.
+
+The `sim-economy` system makes this question concrete: each turn, agents select on-chain actions while the system charges inference fees based on tokenized compute usage. Fees are redistributed to core-share owners, debt accrues interest when balances are negative, and a validation lottery determines which agent receives block rewards. This creates a compact laboratory for observing emergent behavioral adaptation under compute scarcity.
+
+### 1.1 Institutional Economics Framing
+
+The economic mechanisms in `sim-economy` map naturally onto themes from institutional and organizational economics.
+
+**Resource governance in shared systems.** Ostrom's (1990) analysis of commons governance identified that open-access resources without appropriation rules tend toward overuse and degradation. In `sim-economy`, inference capacity is nominally commons-governed (below a per-turn threshold), but above that threshold it is priced. The interaction between commons-access and price-access regimes determines how much cognitive capital agents can affordably deploy.
+
+**Contracting under incomplete enforcement.** Williamson (1985) and Hart and Holmstrom (1987) emphasize that asset specificity and verification costs shape which transactions are internalized versus contracted out. In `sim-economy`, three-party contracts require explicit proposer, counterparty, and arbitrator coordination—a significant organizational overhead that may not be worth incurring under liquidity pressure.
+
+**Governance rents and the principal-agent problem.** Jensen and Meckling's (1976) theory of the firm identifies how ownership structures shape incentives and potential rent extraction by insiders. In `sim-economy`, core-share owners receive dividends from the inference fee pool—structurally analogous to equity holders extracting value from an operating subsidiary. If the subsidiary (the agent economy) bears the entire cost of compute but owners receive fee dividends, the resulting extraction can exceed productive surplus.
+
+**Organizational ecology and survival.** Population ecology approaches (Hannan and Freeman, 1977) suggest organizations under resource scarcity tend toward structural inertia and narrow strategy sets. Our results echo this: agents under fee pressure converge to two behavioral archetypes (`mine` and `job`) rather than maintaining diverse strategy portfolios.
+
+### 1.2 AI Firms and the New Compute Economics
+
+For AI-native firms—organizations where most operational activity is performed by LLM-driven agents—inference costs represent a new category of fundamental operating expense. Unlike human labor, which is difficult to finely meter, AI agent activity can be tracked, priced, and billed at individual inference granularity. Several real-world deployment patterns already approximate this:
+
+- Internal "AI agent as a service" platforms where business units pay per-query to shared inference infrastructure
+- Autonomous research or engineering agents deployed on usage-based cloud compute with budget limits
+- Multi-agent systems where agents hold token balances to pay for model calls in competitive markets
+
+In each case, the core tension is whether agents' productive outputs—block validation fees, labor subsidies, contract fulfillment—can keep pace with the inference burn rate. Our simulation results suggest the answer, under baseline parameters, is strongly negative.
+
+---
+
+## 2. System Overview
+
+The implementation under study is the `sim-package` codebase, consisting of two components:
+
+- **`sim-engine` (Rust)**: authoritative state machine, ledger, block production, contract settlement, fee and dividend accounting. Exposed as an HTTP API accepting turn-based action batches.
+- **`sim-orchestrator` (Python)**: reference LLM turn loop. Not used directly for the experiments here; replaced by our deterministic policy harness.
+
+The world initializes 8 agents, each with 1,000 tokens, for a nominal starting supply of 8,000 tokens. The full world configuration used in all experiments is given in Table 1.
+
+**Table 1. World Configuration Parameters**
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `num_agents` | 8 | Number of economic agents |
+| `num_cores` | 4 | Number of core shares available |
+| `genesis_tokens_per_agent` | 1,000 | Initial token balance per agent |
+| `commons_threshold_per_turn` | 100 | Inference units below which no fee applies |
+| `base_inference_rate` | 1 | Token cost per inference unit above threshold |
+| `thinking_layer_discount` | 0.1 | Discount multiplier for thinking-layer units |
+| `mine_base_weight` | 10.0 | Base lottery weight for mine action |
+| `stake_weight_per_token` | 0.01 | Additional lottery weight per staked token |
+| `burn_weight_per_token` | 0.05 | Additional lottery weight per burned token |
+| `burn_decay_rate` | 0.02 | Per-turn decay of burn lottery weight |
+| `burn_maturity_turns` | 3 | Turns before burn weight activates |
+| `unstake_delay_turns` | 5 | Turns before staked tokens are returned |
+| `interest_rate_per_turn` | 0.01 | Per-turn interest on negative balances (1%) |
+| `signing_bonus` | 50 | Tokens paid to signing agent per block |
+| `block_threshold` | 20.0 | Minimum total weight required to produce a block |
+| `attested_confirmation_window` | 3 | Turns required for attestation confirmation |
+| `slash_both_on_timeout` | true | Slash both parties on arbitration timeout |
+
+### 2.1 Economic Mechanisms
+
+**Compute billing.** Each turn, each agent submits thinking units and output units. Units above `commons_threshold_per_turn` are billed at `base_inference_rate` tokens per unit (with a discount for thinking-layer units). This means that even an agent choosing `mine`—a minimal on-chain action—incurs inference fees for the processing overhead of forming that choice.
+
+**Validation lottery.** Block production follows a weighted lottery. An agent's lottery weight is `mine_base_weight + stake_weight_per_token × staked_amount + burn_weight_per_token × burn_weight_remaining`. A block is produced only if the aggregate weight across all agents meets `block_threshold`. The winning agent receives the `signing_bonus` plus any transaction fees for that block.
+
+**Debt dynamics.** When an agent's balance falls below zero, the engine applies `interest_rate_per_turn` compounding interest each subsequent turn. This creates an accelerating debt spiral for agents already in deficit.
+
+**Fee redistribution.** Inference fees collected each turn are pooled and distributed to core-share owners. In these experiments, shares are held by agents who have staked or burned—but since most stake and burn activity is minimal (mean 8.1 and 1.9 actions respectively over 60 turns), dividend flows are modest relative to fee outflows.
+
+**Contract layer.** Three-party contracts can be proposed between agents with an arbitrator. Settlement requires both counterparty acceptance and arbitrator confirmation within the attested confirmation window. No contract proposals were submitted in any run.
+
+---
+
+## 3. Method
+
+### 3.1 Experimental Design
+
+We executed **24 independent runs**, each of **60 turns**, against the production Rust engine binary compiled from `sim-engine/target/release/sim-engine`. Each run used a freshly initialized SQLite ledger and an isolated engine process bound to a unique local port, preventing any state leakage between runs.
+
+The experiment runner is in `paper/run_sim_experiments.py`. Per-run outputs are written to `paper/results/run_metrics.csv`; aggregate statistics with 95% confidence intervals are written to `paper/results/summary.json`.
+
+### 3.2 Deterministic Policy Harness
+
+To ensure reproducibility without external LLM dependencies, agent actions were generated by a deterministic rule policy conditioned on each agent's current balance, staked position, and turn index. The policy implements a priority-ordered decision tree per agent per turn:
+
+```
+for each agent at turn t:
+  if balance < -180:          → job
+  elif staked > 0 and t % 15 == 0:   → unstake
+  elif balance > 360 and staked < 220 and (t + agent_idx) % 7 == 0:
+                               → stake(80)
+  elif balance > 600 and (t + agent_idx) % 11 == 0:
+                               → burn(50)
+  elif balance > 450 and t % 13 == 0 and not poorest:
+                               → transfer(20, to=poorest_agent)
+  elif balance > 250 and t % 10 == 0:
+                               → study
+  else:                        → mine
+```
+
+The `job` trigger at balance < −180 represents an austerity threshold: agents below this level prioritize the `job` labor subsidy over all other actions. The `mine` default captures the passive lottery-participation behavior.
+
+Inference unit consumption is sampled stochastically each turn per agent: thinking units from Uniform(50, 350) and output units from Uniform(40, 280). This stochasticity, seeded from the system clock per run, drives cross-run variation in fee totals and downstream balance dynamics while keeping strategic behavior deterministic.
+
+### 3.3 Metrics Collected
+
+Per run, we collect:
+
+- **Macro state**: final token supply (sum of all balances including negative), total inference fees collected over 60 turns, block production count
+- **Agent distribution**: min, median, mean, and max balance at turn 60; count of agents with negative balances
+- **Behavioral volume**: action counts by type (`mine`, `stake`, `unstake`, `burn`, `study`, `job`, `transfer`)
+- **Ledger activity**: on-chain transaction counts by type; validator type (mine/stake/burn) per block
+- **Inequality**: Gini coefficient and top-1 share computed on both raw and shift-adjusted wealth
+- **Contract activity**: settled and defaulted contracts
+
+### 3.4 Wealth and Inequality Measurement Under Negative Balances
+
+Agent wealth at turn 60 is defined as `balance + staked`, capturing both liquid holdings and locked stake. Because many runs end with most or all agents in deep negative balance, the raw Gini coefficient applied to signed wealth values frequently becomes negative (a mathematical artifact of sign changes in the Lorenz integral) and loses its standard interpretation.
+
+We therefore report two inequality measures in parallel:
+
+1. **Raw Gini** (`gini_wealth_final`): computed directly on signed wealth; can be negative when the bottom of the wealth distribution is negative. Reported for completeness.
+2. **Shifted Gini** (`gini_wealth_shifted`): computed after adding a per-run constant shift `s = 1 − min(wealth)` to make all wealth strictly positive. This preserves within-run rank ordering and relative spread while restoring interpretability. Our primary inequality analysis uses the shifted measure.
+
+The mean shift required across runs was 233 tokens (range: 163 to 343), reflecting the depth of wealth destruction in each run.
+
+---
+
+## 4. Results
+
+### 4.1 Per-Run Summary
+
+Table 2 presents the full per-run results for all 24 runs, enabling direct inspection of cross-run variation.
+
+**Table 2. Per-Run Outcomes (24 runs × 60 turns)**
+
+| Run | Final Supply | Total Fees | Blocks | Neg. Agents | Gini (sh.) | Top-1 (sh.) | Mine | Job |
+|-----|-------------|-----------|--------|-------------|-----------|------------|------|-----|
+| 1   | −546        | 24,753    | 55     | 8           | 0.296     | 0.185      | 260  | 197 |
+| 2   | −1,019      | 24,690    | 54     | 8           | 0.316     | 0.294      | 262  | 192 |
+| 3   | −1,244      | 24,382    | 53     | 8           | 0.501     | 0.327      | 258  | 193 |
+| 4   | −888        | 25,268    | 55     | 8           | 0.371     | 0.248      | 263  | 186 |
+| 5   | −711        | 24,946    | 55     | 8           | 0.373     | 0.235      | 244  | 207 |
+| 6   | −1,287      | 25,615    | 56     | 8           | 0.408     | 0.322      | 278  | 174 |
+| 7   | −512        | 23,740    | 55     | 8           | 0.385     | 0.234      | 256  | 193 |
+| 8   | −980        | 22,700    | 55     | 8           | 0.382     | 0.230      | 271  | 180 |
+| 9   | −909        | 24,198    | 55     | 8           | 0.408     | 0.326      | 260  | 191 |
+| 10  | −870        | 24,931    | 56     | 8           | 0.266     | 0.196      | 267  | 186 |
+| 11  | −1,212      | 23,754    | 55     | 8           | 0.364     | 0.246      | 273  | 184 |
+| 12  | −797        | 24,322    | 56     | 8           | 0.254     | 0.228      | 266  | 190 |
+| 13  | −879        | 25,235    | 55     | 8           | 0.283     | 0.201      | 254  | 190 |
+| 14  | −857        | 24,614    | 54     | 8           | 0.371     | 0.230      | 265  | 188 |
+| 15  | −591        | 24,257    | 54     | 8           | 0.316     | 0.234      | 252  | 200 |
+| 16  | −1,125      | 23,846    | 56     | 8           | 0.475     | 0.306      | 264  | 191 |
+| 17  | −800        | 24,068    | 54     | 8           | 0.607     | 0.414      | 268  | 182 |
+| 18  | −1,041      | 23,746    | 54     | 8           | 0.279     | 0.197      | 258  | 190 |
+| 19  | −628        | 24,893    | 54     | 8           | 0.326     | 0.226      | 275  | 178 |
+| 20  | −902        | 24,180    | 55     | 8           | 0.512     | 0.328      | 269  | 178 |
+| 21  | −1,029      | 23,660    | 53     | 8           | 0.354     | 0.216      | 266  | 190 |
+| 22  | −743        | 23,124    | 56     | 8           | 0.403     | 0.246      | 269  | 174 |
+| 23  | −588        | 23,892    | 55     | 7           | 0.419     | 0.285      | 264  | 185 |
+| 24  | −777        | 24,723    | 54     | 8           | 0.285     | 0.208      | 264  | 189 |
+
+Run 23 is the sole run where a terminal positive balance existed (max_balance_final = +11 tokens for one agent); all other 23 runs ended with every agent in negative cash balance.
+
+### 4.2 Aggregate Outcomes
+
+**Table 3. Aggregate Outcome Statistics (n = 24 runs)**
+
+| Metric | Mean | 95% CI | Min | Max |
+|--------|------|--------|-----|-----|
+| Final token supply (tokens) | −872 | ±87 | −1,287 | −512 |
+| Total inference fees (tokens) | 24,314 | ±278 | 22,700 | 25,615 |
+| Turns with block produced | 54.75 | ±0.36 | 53 | 56 |
+| Agents with negative balance | 7.96 / 8 | ±0.08 | 7 | 8 |
+| Mean balance at T=60 (tokens) | −190 | ±10 | −232 | −154 |
+| Median balance at T=60 (tokens) | −189 | ±10 | −235 | −152 |
+| Min balance at T=60 (tokens) | −303 | ±16 | −380 | −231 |
+| Max balance at T=60 (tokens) | −77 | ±16 | −132 | +11 |
+| Shifted Gini (wealth) | 0.373 | ±0.035 | 0.254 | 0.607 |
+| Shifted top-1 wealth share | 0.257 | ±0.023 | 0.185 | 0.414 |
+
+Three macro-level patterns emerge with high consistency across runs:
+
+**Fee burden is severe and systematic.** Total inference fees average 24,314 tokens over 60 turns against a starting supply of 8,000 tokens—a 3.04× fee-to-initial-endowment ratio. Crucially, fees vary relatively little across runs (CV = 2.9%) compared to final supply (CV = 25.0%), suggesting that fee extraction is structurally determined by the world configuration and action volume rather than sensitive to behavioral variation.
+
+**Wealth destruction is nearly total.** The economy begins with 8,000 tokens in positive balances and ends, on average, with −872 tokens in aggregate balance-plus-stake representation. This constitutes a net wealth destruction of approximately 8,872 tokens, or 110.9% of initial endowment. The median run final supply is −879 tokens (interquartile range: −1,029 to −743).
+
+**Technical liveness is robust.** Despite universal financial distress, blocks are produced in 54.75 of 60 turns on average. The engine maintains functional block production throughout all runs. This decoupling of economic solvency from technical liveness is a notable structural feature: the network continues to operate even as all participants incur losses.
+
+### 4.3 Behavioral Decomposition
+
+**Table 4. Mean Action Counts per Run (n = 24)**
+
+| Action | Mean | 95% CI | Min | Max | Share of Total |
+|--------|------|--------|-----|-----|----------------|
+| `mine` | 263.6 | ±3.1 | 244 | 278 | 54.9% |
+| `job` | 187.8 | ±3.1 | 174 | 207 | 39.1% |
+| `unstake` | 16.0 | ±1.3 | 11 | 23 | 3.3% |
+| `stake` | 8.1 | ±0.4 | 7 | 10 | 1.7% |
+| `study` | 2.6 | ±0.3 | 1 | 4 | 0.5% |
+| `burn` | 1.9 | ±0.4 | 0 | 3 | 0.4% |
+| `transfer` | 0.0 | ±0.0 | 0 | 0 | 0.0% |
+| **Total** | **480.0** | | 480 | 480 | 100% |
+
+The behavioral distribution is dominated by `mine` (54.9%) and `job` (39.1%), together accounting for 94.0% of all agent actions. The remaining 6% is spread across `unstake`, `stake`, `study`, and `burn`. No `transfer` actions are executed across any run.
+
+The fixed total of 480 actions per run reflects deterministic timing: each of 8 agents makes exactly 1 action decision per turn × 60 turns. Cross-run variation in individual action counts reflects the policy's responsiveness to balance states, which evolve differently under stochastic inference costs.
+
+The dominance of `mine` and `job` reflects two distinct adaptive pressures:
+
+- **`mine`** is the default action when no priority condition fires. It participates in the block lottery at zero strategic cost and requires no treasury balance.
+- **`job`** fires when balance falls below −180, providing a labor subsidy that partially offsets debt accumulation. With most agents in deep negative balance from early turns, `job` becomes the dominant alternative to `mine`.
+
+Higher-value but balance-dependent actions—`stake` (requires positive liquidity surplus), `burn` (requires larger surplus), `study` (requires moderate surplus), `transfer` (requires both surplus and specific counterparty conditions)—are crowded out by the policy's austerity response to negative balances.
+
+### 4.4 Ledger and Validation Structure
+
+All 54.75 blocks produced per run on average are mine-validated. Zero stake-validated or burn-validated blocks appear in any run. This reflects the minimal stake and burn volumes: with only 8.1 stake actions and 1.9 burn actions per run average, agents accumulate insufficient stake weight and burn maturity to meaningfully compete in the lottery against the base `mine_base_weight` of 10.0.
+
+On-chain transaction records show a notable divergence from action volumes:
+
+| Transaction type | Mean finalized (per run) |
+|-----------------|--------------------------|
+| `stake` | 8.1 |
+| `burn` | 1.9 |
+| `unstake` | 0.0 finalized |
+| `job`, `study`, `transfer` | 0.0 |
+
+The `unstake` divergence (16.0 actions attempted, 0.0 finalized on-chain) reflects the `unstake_delay_turns = 5` parameter: unstake requests require 5 turns to mature before returning staked tokens. In the engine's current implementation, the unstake initiation is not itself recorded as a finalized ledger transaction; only the matured return would create a ledger entry, and the 60-turn run horizon combined with late-run initiation means few or no unstakes complete the full delay cycle.
+
+Similarly, `job` and `study` actions do not produce independent on-chain transaction records in the current engine schema—their effects are reflected in balance changes captured in the state, but they do not generate entries in the `transactions` table.
+
+### 4.5 Inequality Dynamics
+
+The shifted Gini coefficient ranges from 0.254 (run 12) to 0.607 (run 17) across runs, with a coefficient of variation of 23.2%. This is substantial run-to-run inequality variability given that the policy and starting conditions are identical. The variability arises from inference unit stochasticity: because thinking and output units are sampled per-agent per-turn, some agents incur higher billing costs than others by chance, causing early divergence in balance trajectories that compounds through the debt interest mechanism.
+
+The correlation between shifted Gini and top-1 shifted share is 0.872, confirming that both inequality measures track the same underlying distribution dynamic. Runs where one agent survives comparatively better (lower debt accumulation by chance) show both higher Gini and higher top-1 share.
+
+Notably, fee volume does not strongly predict inequality (correlation −0.251): high-fee runs are not systematically more unequal than low-fee runs, because fee extraction affects all agents proportionally. Inequality instead reflects differential balance trajectories under stochastic billing and the debt interest compounding that widens early gaps over time.
+
+### 4.6 Contract Layer Utilization
+
+Across all 24 runs, zero contracts were settled and zero contracts defaulted. No contract proposals were submitted. Given that the policy harness contains no contract-proposal heuristics and that contract formation requires three-party coordination under active liquidity pressure, this absence is expected. It remains analytically useful: it confirms that contract affordances do not self-activate under pure fee pressure without exogenous coordination mechanisms, even in a system where contracts are technically available and potentially beneficial.
+
+---
+
+## 5. Discussion: Implications for AI-Run Companies
+
+### 5.1 Compute Pricing Can Dominate Strategy Space
+
+The most striking finding is the structural imbalance between inference billing and replenishment channels. Agents begin with 8,000 tokens in aggregate and incur 24,314 tokens in fees over 60 turns—a rate that the available productive actions (block signing bonuses, job subsidies) cannot offset. The economy's net wealth trajectory is monotonically negative in all runs.
+
+For AI firms, this maps directly to a scenario where computational operating costs exceed the revenue or productive surplus generated by AI agent activity. If a business unit's AI agents collectively bill more in compute than they return in measurable value, the organizational treasury faces continuous drain. Under the pressure of this drain, agents in our simulation converge to short-horizon survival behaviors rather than value-creating investment. The same dynamic could manifest in AI firms as a systematic tendency toward routine, low-cost tasks at the expense of higher-value but compute-intensive work (research, complex contract negotiation, multi-step planning).
+
+### 5.2 Survival Equilibria Are Narrow and Self-Reinforcing
+
+The convergence of ~94% of actions to `mine` + `job` represents a behavioral equilibrium that is stable but economically unproductive. Once most agents cross the −180 token austerity threshold (which happens quickly under 3× fee burden), the policy triggers `job` indefinitely, suppressing `stake`, `burn`, `study`, and `transfer`. This creates a feedback loop: austerity response → reduced stake → lower lottery weight for block production → lower signing bonus revenue → continued austerity.
+
+This structure has a corporate analogue: organizations facing cash pressure often eliminate training budgets, R&D spending, and cross-team collaboration (the corporate equivalents of `study`, `burn`, and `transfer`) in favor of revenue-generating and cost-preserving activities. The resulting narrowing of capability investment can accelerate long-term competitive decline even while preserving short-term solvency—or in our simulation's case, slowing the rate of balance deterioration without reversing it.
+
+### 5.3 Technical Liveness Does Not Imply Economic Health
+
+An important structural observation is the decoupling between block production (technical liveness) and agent solvency (economic health). Blocks are produced in 54.75/60 turns on average—a 91.3% block production rate—even as every agent descends into negative balance. This decoupling is relevant for AI firm governance: **operational metrics (uptime, throughput, task completion) may remain healthy while the underlying economic position deteriorates**.
+
+Organizations relying solely on operational KPIs to assess autonomous AI unit health risk missing the parallel dimension of compute cost accumulation. A billing dashboard and per-unit P&L attribution are necessary complements to performance monitoring.
+
+### 5.4 Governance Rents Need Guardrails
+
+The `sim-economy` fee redistribution mechanism models a platform-infrastructure dynamic where core-share owners receive dividends from the inference fee pool. In runs where fee extraction is highest (run 6: 25,615 tokens), the dividend pool is correspondingly large—but this does not translate to better outcomes for the agent economy; rather, the agents who bear the fee burden are distinct from those positioned to capture the redistribution.
+
+This structural asymmetry is directly relevant to AI firm design. If inference infrastructure is owned by a parent company or platform layer that charges usage fees to autonomous agent subsidiaries, the resulting economic transfer can be extractive at scale. The shifted top-1 wealth share range of 0.185 to 0.414 across runs—despite identical starting conditions—illustrates how structurally identical systems can produce substantially different concentration outcomes under stochastic cost variation.
+
+### 5.5 Contracts Do Not Emerge Automatically
+
+The complete absence of contract activity across all runs corroborates transaction cost theory predictions. Under severe liquidity constraints, the overhead of initiating, negotiating, and settling three-party contracts is not worth incurring when immediate survival actions dominate. The observation generalizes to AI firms: autonomous agents under compute pressure may default to unilateral, low-coordination actions rather than forming structured agreements, even when contracts would be mutually beneficial in expectation.
+
+This implies that multi-agent AI firms requiring structured inter-unit agreements will need to design explicit coordination scaffolding—pre-negotiated templates, automated matching, subsidized arbitration—rather than expecting contracts to emerge from economic pressure alone.
+
+---
+
+## 6. Design Recommendations
+
+Based on these results, we propose five practical governance interventions for AI-run firms adopting compute-metered internal economies:
+
+**1. Bound the effective fee load.** Cap the ratio of inference fees to available productive surplus over rolling windows (e.g., require that fees extractable in any 10-turn window cannot exceed 1.5× the maximum achievable block revenue in the same window). This directly addresses the structural imbalance that drives universal balance compression.
+
+**2. Reward productive reinvestment.** Tie fee rebates or dividend credits to validated long-horizon actions (`study`, contract fulfillment, sustained `stake`). If agents who invest in capability are partially shielded from fee extraction, the policy can sustain a richer behavioral repertoire under cost pressure.
+
+**3. Implement debt safety rails.** Enforce graduated debt service with automatic restructuring triggers before deep negative spirals. A debt-to-initial-endowment cap (e.g., −500 tokens), with forced renegotiation above it, would prevent the compounding interest dynamics observed in runs reaching −1,287 final supply.
+
+**4. Provide contract scaffolding.** Supply low-friction default contract templates between known counterparties, subsidize arbitration costs for small-value contracts, and maintain reputational priors that reduce counterparty search costs. These interventions lower the transaction cost threshold below which voluntary cooperation becomes rational even under pressure.
+
+**5. Monitor concentration continuously.** Track shifted top-1 share and Gini coefficient at each turn rather than at run end. Concentration dynamics in our data compound stochastic billing variation over time; early detection (e.g., Gini > 0.4 by turn 20) could trigger automatic balance redistribution or fee relief before debt spirals become irreversible.
+
+---
+
+## 7. Limitations
+
+This paper intentionally studies engine dynamics with a reproducible rule-based policy harness rather than live LLM strategic reasoning. Conclusions are therefore most robust for understanding mechanism-level behavior under stochastic but deterministic-in-expectation action generation. Several limitations merit attention for future work:
+
+**Policy realism.** The rule harness produces a stylized response to liquidity stress that does not capture the strategic sophistication of actual LLM agents, which might pursue more complex multi-period strategies, attempt contract formation under different conditions, or adapt `stake` behavior more aggressively. Model-driven orchestration experiments with the `sim-orchestrator` component would provide an important complement.
+
+**Run horizon.** All runs use 60 turns. Longer horizons might reveal additional phase transitions—for example, whether debt interest compounding eventually causes full economic collapse, or whether a subset of agents can stabilize through persistent `job` use.
+
+**World configuration sensitivity.** All 24 runs use identical world parameters. Sensitivity analysis varying `base_inference_rate`, `commons_threshold_per_turn`, `interest_rate_per_turn`, and `signing_bonus` would clarify which parameters are most critical for governance design. Small changes to the commons threshold, in particular, may substantially alter fee burden.
+
+**Contract complexity.** No contract heuristics were implemented in the policy harness. A separate set of experiments specifically designed to test contract formation under varying liquidity conditions—including subsidy interventions—would more directly test the claim that contracts require institutional scaffolding.
+
+**Inequality measurement artifact.** The shifted Gini measure, while interpretable, introduces a per-run constant that prevents direct comparison of absolute inequality levels across runs. Future work could use a sign-robust inequality measure such as the coefficient of variation or a rank-based index.
+
+**No exogenous shocks.** The baseline runs include no productivity shocks, external demand variations, or regulatory changes. Real AI-run organizations operate in dynamic environments; testing resilience to shocks is a natural extension.
+
+---
+
+## 8. Conclusion
+
+Using 24 repeated, instrumented runs of the production `sim-economy` engine, we demonstrate that compute-metered autonomous economies can maintain technical liveness (91.3% block production rate) while simultaneously experiencing deep and near-universal economic stress (all 8 agents in negative balance in 23/24 runs; net wealth destruction averaging 110.9% of initial endowment). Behavioral repertoires narrow sharply under this pressure, converging to a `mine`+`job` survival equilibrium that suppresses capability investment and peer coordination. Inequality in the surviving distribution is substantial and variable (shifted Gini ranging from 0.254 to 0.607) despite identical initial conditions, driven by stochastic inference billing compounded through debt interest dynamics.
+
+For AI-run companies, the core institutional lesson is that **metering alone is not governance**. Without deliberate policy around fee recycling, debt management, and contract activation, autonomous agents appear likely to converge to short-horizon survival loops rather than compounding organizational capability. The five governance interventions proposed—fee load bounds, reinvestment rewards, debt safety rails, contract scaffolding, and concentration monitoring—provide a starting design vocabulary for organizations building compute-metered AI agent economies.
+
+---
+
+## Reproducibility Appendix
+
+From the repository root:
+
+```bash
+# Build the engine
+cargo build --release --manifest-path sim-engine/Cargo.toml
+
+# Run experiments (24 runs × 60 turns)
+python3 paper/run_sim_experiments.py --runs 24 --turns 60 \
+    --engine-bin sim-engine/target/release/sim-engine \
+    --out-dir paper/results
+```
+
+Primary output artifacts:
+
+- `paper/results/run_metrics.csv` — per-run metrics (24 rows, 33 columns)
+- `paper/results/summary.json` — aggregate statistics with 95% CIs, world config
+
+All numbers in this paper were verified against `paper/results/summary.json`. The experiment runner is fully self-contained (no external model calls) and produces deterministic aggregate statistics given the same run count and turn count.
+
+---
+
+## References
+
+- Hannan, M. T., and Freeman, J. (1977). The population ecology of organizations. *American Journal of Sociology*, 82(5), 929-964.
+- Hart, O., and Holmstrom, B. (1987). The theory of contracts. In T. F. Bewley (Ed.), *Advances in Economic Theory: Fifth World Congress*. Cambridge University Press.
+- Jensen, M. C., and Meckling, W. H. (1976). Theory of the firm: Managerial behavior, agency costs and ownership structure. *Journal of Financial Economics*, 3(4), 305-360.
+- Ostrom, E. (1990). *Governing the Commons: The Evolution of Institutions for Collective Action*. Cambridge University Press.
+- Williamson, O. E. (1985). *The Economic Institutions of Capitalism*. Free Press.