coordinator/tests/test_orchestrator.py

"""Tests for the CEK-native review pipeline (meta-judge + 3 judges + consensus)."""
from __future__ import annotations

import logging
import pathlib
from datetime import datetime, timezone
from typing import Any

import pytest
import yaml

from coordinator.queue import DebateQueue, Round
from coordinator.orchestrator import (
    ACCEPT_MIN_SCORE,
    CONSENSUS_CRITERION_THRESHOLD,
    CONSENSUS_OVERALL_THRESHOLD,
    JUDGE_NAMES,
    MAX_DEBATE_ROUNDS,
    META_JUDGE_NAME,
    REWORK_INSTRUCTIONS,
    _advance_awaiting_debate,
    _advance_awaiting_judges,
    _advance_awaiting_rubric,
    _advance_round,
    _apply_verdict,
    _build_coordinator_note_no_agent,
    _build_debate_round_comment,
    _build_judge_mention_comment,
    _build_meta_judge_mention,
    _build_retrigger_comment,
    _check_consensus,
    _criterion_scores,
    _extract_yaml,
    _find_commit_url,
    _find_reply_by_agent,
    _overall_score,
    _parse_judge_report,
    _parse_rubric,
    _post_rejection_retrigger,
    _start_round,
    _utcnow,
)

_logger = logging.getLogger("test.orchestrator")


# ---------------------------------------------------------------------------
# Fakes
# ---------------------------------------------------------------------------

class FakeConfig:
    server_url = "http://x"
    workspace_id = "wid"
    token = "tok"
    poll_interval_s = 30
    round_timeout_s = 600
    max_concurrent_rounds = 3


class FakeClient:
    def __init__(self) -> None:
        self.issue: dict[str, Any] = {
            "id": "issue-1",
            "title": "Do the thing",
            "description": "Please do the thing clearly.",
            "status": "in_review",
            "assignee_type": None,
            "assignee_id": None,
        }
        self.comments: list[dict[str, Any]] = []
        self.posted_comments: list[str] = []
        self.agents: list[dict[str, Any]] = [
            {"name": META_JUDGE_NAME, "id": "agent-meta"},
            {"name": "Judge-GPT", "id": "agent-gpt"},
            {"name": "Judge-Claude", "id": "agent-claude"},
            {"name": "Judge-Gemini", "id": "agent-gemini"},
        ]
        self._next_comment_id = 1000

    def get_issue(self, issue_id: str) -> dict[str, Any]:
        return dict(self.issue)

    def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]:
        self.issue["status"] = status
        return {"id": issue_id, "status": status}

    def list_comments(self, issue_id: str) -> list[dict[str, Any]]:
        return list(self.comments)

    def post_comment(self, issue_id: str, content: str) -> dict[str, Any]:
        self.posted_comments.append(content)
        cid = f"posted-{self._next_comment_id}"
        self._next_comment_id += 1
        created = _utcnow()
        self.comments.append({
            "id": cid,
            "content": content,
            "author_id": "coord-user",
            "created_at": created,
        })
        return {"id": cid, "created_at": created}

    def list_agents(self) -> list[dict[str, Any]]:
        return list(self.agents)

    def find_agents_by_name(self, names):
        want = set(names)
        return {a["name"]: a["id"] for a in self.agents if a["name"] in want}

    def get_agent_name(self, agent_id: str) -> str | None:
        for a in self.agents:
            if a["id"] == agent_id:
                return a["name"]
        return None


def _past(seconds_ago: int) -> str:
    from datetime import timedelta
    t = datetime.now(timezone.utc) - timedelta(seconds=seconds_ago)
    return t.strftime("%Y-%m-%dT%H:%M:%SZ")


def _reply_comment(agent_id: str, content: str, created: str | None = None) -> dict[str, Any]:
    return {
        "id": f"reply-{agent_id}",
        "author_id": agent_id,
        "content": content,
        "created_at": created or _utcnow(),
    }


def _rubric_yaml_sample() -> str:
    spec = {
        "checklist": [
            {"question": "Does the code compile?", "category": "hard_rule", "importance": "essential", "rationale": "basic"},
            {"question": "Is documentation present?", "category": "principle", "importance": "important", "rationale": "quality"},
        ],
        "rubric_dimensions": [
            {"name": "Correctness", "description": "Does the code work", "scale": "1-5", "weight": 0.6, "score_definitions": {1: "no", 5: "perfect"}},
            {"name": "Clarity", "description": "Readability", "scale": "1-5", "weight": 0.4, "score_definitions": {1: "opaque", 5: "crystal"}},
        ],
    }
    return yaml.safe_dump(spec, sort_keys=False)


def _judge_report(final_score: float, criteria: dict[str, float]) -> str:
    report = {
        "evaluation_report": {
            "score_calculation": {"final_score": final_score},
            "rubric_scores": [
                {"name": k, "score": v, "weight": 0.5} for k, v in criteria.items()
            ],
            "executive_summary": f"score {final_score}",
        }
    }
    return "```yaml\n" + yaml.safe_dump(report, sort_keys=False) + "\n```"


def _make_round(tmp_path: pathlib.Path, **overrides) -> tuple[Round, DebateQueue]:
    r = Round(
        round_id="r1",
        issue_id="issue-1",
        identifier="WYL-X",
        title="Do the thing",
        enqueued_at=_utcnow(),
        status="running",
        phase="convened",
        phase_entered_at=_utcnow(),
    )
    for k, v in overrides.items():
        setattr(r, k, v)
    q = DebateQueue.load(tmp_path / "queue.json")
    q.rounds.append(r)
    q.save()
    return r, q


# ---------------------------------------------------------------------------
# Pure helpers
# ---------------------------------------------------------------------------

def test_extract_yaml_from_fenced_block():
    content = "Here is the rubric:\n\n```yaml\nchecklist:\n  - question: foo\n```\n\nDone."
    y = _extract_yaml(content)
    assert y.startswith("checklist:")
    assert "question: foo" in y


def test_extract_yaml_from_unfenced_content():
    content = "checklist:\n  - question: foo"
    y = _extract_yaml(content)
    assert y == content.strip()


def test_extract_yaml_unescapes_html_entities():
    # Multica REST API returns comment content with `"` as `&#34;`, `>` as `&gt;`, etc.
    content = "checklist:\n  - id: &#34;CK-001&#34;\n    question: &#34;does it work?&#34;"
    y = _extract_yaml(content)
    assert '"CK-001"' in y
    assert "&#34;" not in y


def test_parse_rubric_accepts_html_encoded_input():
    encoded = (
        "rrd_cycle_applied: true\n"
        "evaluation_specification:\n"
        "  checklist:\n"
        "    - id: &#34;CK-001&#34;\n"
        "      question: &#34;does it work?&#34;\n"
        "      category: &#34;hard_rule&#34;\n"
        "      importance: &#34;essential&#34;\n"
    )
    spec = _parse_rubric(encoded)
    assert spec is not None
    assert "checklist" in spec


def test_extract_yaml_repairs_backslash_backtick():
    # Gemini (and similar) emit \` inside double-quoted YAML strings, imitating
    # markdown escaping.  \` is not a valid YAML escape, so we repair it.
    content = "evaluation_report:\n  rubric_scores:\n    - name: X\n      score: 4\n      evidence: \"see \\`foo.py\\` and \\`bar.py\\`\"\n"
    y = _extract_yaml(content)
    assert "\\`" not in y
    assert "`foo.py`" in y


def test_parse_judge_report_tolerates_backslash_backtick():
    content = (
        "```yaml\n"
        "evaluation_report:\n"
        "  score_calculation:\n"
        "    final_score: 4.0\n"
        "  rubric_scores:\n"
        "    - name: Correctness\n"
        "      score: 4\n"
        "      weight: 1.0\n"
        "      evidence: \"see \\`foo.py\\`\"\n"
        "```"
    )
    r = _parse_judge_report(content)
    assert r is not None
    assert r["score_calculation"]["final_score"] == 4.0


def test_parse_rubric_valid_flat():
    spec = _parse_rubric(f"```yaml\n{_rubric_yaml_sample()}\n```")
    assert spec is not None
    assert "checklist" in spec or "rubric_dimensions" in spec


def test_parse_rubric_valid_wrapped():
    wrapped = yaml.safe_dump({"evaluation_specification": yaml.safe_load(_rubric_yaml_sample())})
    spec = _parse_rubric(f"```yaml\n{wrapped}\n```")
    assert spec is not None
    assert "rubric_dimensions" in spec


def test_parse_rubric_rejects_malformed_yaml():
    assert _parse_rubric("```yaml\nnot: valid: nested: without: quotes\n```") is None


def test_parse_rubric_rejects_yaml_without_expected_keys():
    assert _parse_rubric("```yaml\njust_some: random\n```") is None


def test_parse_judge_report_valid_with_final_score():
    content = _judge_report(3.7, {"Correctness": 4.0, "Clarity": 3.5})
    r = _parse_judge_report(content)
    assert r is not None
    assert r["score_calculation"]["final_score"] == 3.7


def test_parse_judge_report_valid_without_final_score_but_rubric_scores():
    report = {
        "evaluation_report": {
            "rubric_scores": [
                {"name": "Correctness", "score": 4.0, "weight": 1.0},
            ]
        }
    }
    content = "```yaml\n" + yaml.safe_dump(report) + "\n```"
    assert _parse_judge_report(content) is not None


def test_parse_judge_report_rejects_empty():
    assert _parse_judge_report("```yaml\nnothing: here\n```") is None


def test_overall_score_prefers_final_score():
    r = {"score_calculation": {"final_score": 2.8}, "rubric_scores": [{"name": "x", "score": 5, "weight": 1}]}
    assert _overall_score(r) == 2.8


def test_overall_score_falls_back_to_weighted_average():
    r = {"rubric_scores": [
        {"name": "a", "score": 4.0, "weight": 0.6},
        {"name": "b", "score": 2.0, "weight": 0.4},
    ]}
    assert _overall_score(r) == pytest.approx(3.2)


def test_overall_score_none_when_nothing_to_extract():
    assert _overall_score({}) is None


def test_criterion_scores_extracts_names_and_scores():
    r = {"rubric_scores": [
        {"name": "Correctness", "score": 4.0},
        {"name": "Clarity", "score": 3.0},
    ]}
    s = _criterion_scores(r)
    assert s == {"Correctness": 4.0, "Clarity": 3.0}


def test_check_consensus_converged_accept():
    reports = {
        "Judge-GPT":    {"score_calculation": {"final_score": 4.0}, "rubric_scores": [{"name": "C", "score": 4}]},
        "Judge-Claude": {"score_calculation": {"final_score": 4.2}, "rubric_scores": [{"name": "C", "score": 4}]},
        "Judge-Gemini": {"score_calculation": {"final_score": 4.1}, "rubric_scores": [{"name": "C", "score": 4}]},
    }
    converged, verdict, avg = _check_consensus(reports)
    assert converged is True
    assert verdict == "ACCEPT"
    assert avg == pytest.approx((4.0 + 4.2 + 4.1) / 3)


def test_check_consensus_converged_reject_low_score():
    reports = {
        n: {"score_calculation": {"final_score": 2.5}, "rubric_scores": [{"name": "C", "score": 2}]}
        for n in JUDGE_NAMES
    }
    converged, verdict, avg = _check_consensus(reports)
    assert converged is True
    assert verdict == "REJECT"


def test_check_consensus_not_converged_overall_spread():
    reports = {
        "Judge-GPT":    {"score_calculation": {"final_score": 2.0}},
        "Judge-Claude": {"score_calculation": {"final_score": 4.0}},
        "Judge-Gemini": {"score_calculation": {"final_score": 3.0}},
    }
    converged, verdict, avg = _check_consensus(reports)
    assert converged is False
    assert verdict is None
    assert avg == pytest.approx(3.0)


def test_check_consensus_not_converged_criterion_spread():
    reports = {
        "Judge-GPT":    {"score_calculation": {"final_score": 3.0}, "rubric_scores": [{"name": "C", "score": 2}]},
        "Judge-Claude": {"score_calculation": {"final_score": 3.1}, "rubric_scores": [{"name": "C", "score": 5}]},
        "Judge-Gemini": {"score_calculation": {"final_score": 3.0}, "rubric_scores": [{"name": "C", "score": 3}]},
    }
    converged, _, _ = _check_consensus(reports)
    assert converged is False


def test_check_consensus_no_overalls_returns_false():
    reports = {n: {} for n in JUDGE_NAMES}
    converged, verdict, avg = _check_consensus(reports)
    assert converged is False
    assert avg is None


# ---------------------------------------------------------------------------
# Comment builders
# ---------------------------------------------------------------------------

def test_meta_judge_mention_contains_mention_and_description():
    body = _build_meta_judge_mention("agent-meta", "Title", "Description line 1\nDescription line 2")
    assert "mention://agent/agent-meta" in body
    assert META_JUDGE_NAME in body
    assert "Description line 1" in body
    assert "Description line 2" in body


def test_judge_mention_contains_all_three_mentions_and_rubric():
    judge_ids = {"Judge-GPT": "a", "Judge-Claude": "b", "Judge-Gemini": "c"}
    body = _build_judge_mention_comment(judge_ids, "Title", "Desc", "https://example.com/commit/abc", "rubric: yes")
    for n in JUDGE_NAMES:
        assert n in body
    for agent_id in ("a", "b", "c"):
        assert f"mention://agent/{agent_id}" in body
    assert "rubric: yes" in body
    assert "https://example.com/commit/abc" in body


def test_debate_round_comment_quotes_all_prior_reports():
    judge_ids = {n: f"id-{n}" for n in JUDGE_NAMES}
    prior = {n: f"REPORT FROM {n}" for n in JUDGE_NAMES}
    body = _build_debate_round_comment(judge_ids, 1, prior)
    assert "Debate round 1" in body
    for n in JUDGE_NAMES:
        assert n in body
        assert f"REPORT FROM {n}" in body


def test_debate_round_comment_contains_cek_anti_sycophancy_language():
    """Regression: do not soften CEK's critical debate instructions.

    The first live run produced sycophantic convergence because the earlier,
    softer phrasing dropped CEK's explicit 'only revise if compelling / defend
    original if you still believe them' instructions.  Any future edit that
    removes these exact clauses should be caught here.
    """
    judge_ids = {n: f"id-{n}" for n in JUDGE_NAMES}
    body = _build_debate_round_comment(judge_ids, 1, {n: "x" for n in JUDGE_NAMES})
    # CEK's structural instructions
    assert "Identify disagreements (where your scores differ by >1 point)" in body
    assert "Defend your position with evidence" in body
    assert "Challenge the other judge's position with counter-evidence" in body
    # CEK's CRITICAL anti-sycophancy list
    assert "Only revise if you find their evidence compelling." in body
    assert "Defend your original scores if you still believe them." in body
    # APPEND not REVISE
    assert "APPENDS to your prior report" in body
    assert "REVISED" not in body  # the old softer phrasing is gone


def test_retrigger_comment_has_anchor_and_no_drift_instructions():
    body = _build_retrigger_comment("Worker", "agent-worker", "Original desc line.", "VERDICT: REJECT", "r1")
    assert "mention://agent/agent-worker" in body
    assert "ANCHOR" in body
    assert "Original desc line." in body
    assert REWORK_INSTRUCTIONS in body


def test_coordinator_note_no_agent_has_no_mention():
    body = _build_coordinator_note_no_agent("r1", "no assignee set")
    assert "mention://" not in body
    assert "Manual follow-up required" in body


# ---------------------------------------------------------------------------
# _find_commit_url
# ---------------------------------------------------------------------------

def test_find_commit_url_picks_latest():
    comments = [
        {"content": "older https://git.example/commit/abc123", "created_at": "2026-01-01T00:00:00Z"},
        {"content": "newer https://git.example/commit/def456", "created_at": "2026-01-02T00:00:00Z"},
    ]
    assert _find_commit_url(comments) == "https://git.example/commit/def456"


def test_find_commit_url_empty_when_absent():
    assert _find_commit_url([{"content": "no urls"}]) == ""


# ---------------------------------------------------------------------------
# _find_reply_by_agent
# ---------------------------------------------------------------------------

def test_find_reply_by_agent_respects_cutoff():
    comments = [
        {"id": "before", "author_id": "agent-x", "content": "early", "created_at": _past(100)},
        {"id": "cutoff", "author_id": "coord", "content": "mention", "created_at": _past(50)},
        {"id": "after",  "author_id": "agent-x", "content": "late",  "created_at": _past(10)},
    ]
    found = _find_reply_by_agent(comments, "agent-x", "cutoff")
    assert found == ("after", "late")


def test_find_reply_by_agent_none_when_no_match():
    comments = [{"id": "x", "author_id": "other", "content": "hi", "created_at": _utcnow()}]
    assert _find_reply_by_agent(comments, "agent-x", "somewhere") is None


# ---------------------------------------------------------------------------
# _start_round
# ---------------------------------------------------------------------------

def test_start_round_posts_meta_judge_mention_and_sets_phase(tmp_path):
    r, q = _make_round(tmp_path, phase="convened", status="pending")
    client = FakeClient()
    _start_round(r, client, q, FakeConfig(), _logger)

    assert r.status == "running"
    assert len(client.posted_comments) == 1
    body = client.posted_comments[0]
    assert "mention://agent/agent-meta" in body
    assert r.phase == "awaiting_rubric"
    assert r.meta_judge_comment_id != ""


def test_start_round_marks_error_on_api_failure(tmp_path):
    r, q = _make_round(tmp_path, phase="convened", status="pending")

    class FailingClient(FakeClient):
        def get_issue(self, issue_id):
            raise RuntimeError("no issue")

    client = FailingClient()
    _start_round(r, client, q, FakeConfig(), _logger)
    assert r.status == "error"


# ---------------------------------------------------------------------------
# _advance_awaiting_rubric
# ---------------------------------------------------------------------------

def test_advance_awaiting_rubric_waits_when_meta_judge_silent(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_rubric", status="running",
        meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.comments = [{"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}]
    _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_rubric"
    assert r.status == "running"


def test_advance_awaiting_rubric_errors_on_malformed_yaml(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_rubric", status="running",
        meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.comments = [
        {"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        {"id": "reply",  "author_id": "agent-meta", "content": "```yaml\nnot: valid: nested\n```", "created_at": _utcnow()},
    ]
    _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger)
    assert r.phase == "error"
    assert r.status == "error"


def test_advance_awaiting_rubric_moves_to_awaiting_judges_on_valid_rubric(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_rubric", status="running",
        meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.comments = [
        {"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        {"id": "reply",  "author_id": "agent-meta", "content": f"```yaml\n{_rubric_yaml_sample()}\n```", "created_at": _utcnow()},
    ]
    _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_judges"
    assert r.rubric_yaml != ""
    assert any("mention://agent/agent-gpt" in c for c in client.posted_comments)
    assert any("mention://agent/agent-claude" in c for c in client.posted_comments)
    assert any("mention://agent/agent-gemini" in c for c in client.posted_comments)


def test_advance_awaiting_rubric_errors_on_timeout_without_reply(tmp_path):
    cfg = FakeConfig()
    r, q = _make_round(
        tmp_path, phase="awaiting_rubric", status="running",
        meta_judge_comment_id="meta-c", phase_entered_at=_past(cfg.round_timeout_s + 5),
    )
    client = FakeClient()
    client.comments = [{"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(cfg.round_timeout_s + 5)}]
    _advance_awaiting_rubric(r, client, q, cfg, _logger)
    assert r.phase == "error"
    assert r.status == "error"


# ---------------------------------------------------------------------------
# _advance_awaiting_judges
# ---------------------------------------------------------------------------

def test_advance_awaiting_judges_waits_when_missing_reports(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_judges", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        _reply_comment("agent-gpt", _judge_report(3.5, {"Correctness": 4})),
    ]
    _advance_awaiting_judges(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_judges"


def test_advance_awaiting_judges_accepts_on_consensus(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_judges", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        _reply_comment("agent-gpt",    _judge_report(4.0, {"Correctness": 4})),
        _reply_comment("agent-claude", _judge_report(4.1, {"Correctness": 4})),
        _reply_comment("agent-gemini", _judge_report(4.2, {"Correctness": 4})),
    ]
    _advance_awaiting_judges(r, client, q, FakeConfig(), _logger)
    assert r.phase == "accepted"
    assert r.status == "done"
    assert client.issue["status"] == "done"


def test_advance_awaiting_judges_rejects_on_consensus_low(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_judges", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(),
    )
    client = FakeClient()
    client.issue["assignee_type"] = "agent"
    client.issue["assignee_id"]   = "agent-worker"
    client.agents.append({"name": "Worker", "id": "agent-worker"})
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        _reply_comment("agent-gpt",    _judge_report(2.0, {"Correctness": 2})),
        _reply_comment("agent-claude", _judge_report(2.2, {"Correctness": 2})),
        _reply_comment("agent-gemini", _judge_report(2.1, {"Correctness": 2})),
    ]
    _advance_awaiting_judges(r, client, q, FakeConfig(), _logger)
    assert r.phase == "rejected"
    assert r.status == "done"
    assert client.issue["status"] == "in_progress"
    assert any("mention://agent/agent-worker" in c for c in client.posted_comments)


def test_advance_awaiting_judges_starts_debate_round_when_spread(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_judges", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), debate_round=0,
    )
    client = FakeClient()
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        _reply_comment("agent-gpt",    _judge_report(2.0, {"Correctness": 2})),
        _reply_comment("agent-claude", _judge_report(4.0, {"Correctness": 4})),
        _reply_comment("agent-gemini", _judge_report(3.0, {"Correctness": 3})),
    ]
    _advance_awaiting_judges(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_debate"
    assert r.debate_round == 1


def test_advance_awaiting_judges_errors_when_no_parseable(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_judges", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_past(FakeConfig.round_timeout_s + 5),
    )
    client = FakeClient()
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(FakeConfig.round_timeout_s + 5)},
        _reply_comment("agent-gpt",    "not yaml at all"),
        _reply_comment("agent-claude", "also not yaml"),
        _reply_comment("agent-gemini", "garbage"),
    ]
    _advance_awaiting_judges(r, client, q, FakeConfig(), _logger)
    assert r.phase == "error"
    assert r.status == "error"


# ---------------------------------------------------------------------------
# Debate round cap
# ---------------------------------------------------------------------------

def test_awaiting_debate_errors_out_at_cap(tmp_path):
    r, q = _make_round(
        tmp_path, phase="awaiting_debate", status="running",
        judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(),
        debate_round=MAX_DEBATE_ROUNDS,
    )
    client = FakeClient()
    client.comments = [
        {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)},
        _reply_comment("agent-gpt",    _judge_report(2.0, {"Correctness": 2})),
        _reply_comment("agent-claude", _judge_report(4.0, {"Correctness": 4})),
        _reply_comment("agent-gemini", _judge_report(3.0, {"Correctness": 3})),
    ]
    _advance_awaiting_debate(r, client, q, FakeConfig(), _logger)
    assert r.phase == "error"
    assert r.status == "error"


# ---------------------------------------------------------------------------
# Race fix: issue status moves BEFORE round marked done
# ---------------------------------------------------------------------------

def test_apply_verdict_updates_issue_before_marking_round_done(tmp_path):
    call_order: list[str] = []

    class TrackingClient(FakeClient):
        def update_issue_status(self, issue_id, status):
            call_order.append(f"issue:{status}")
            return super().update_issue_status(issue_id, status)

    class TrackingQueue(DebateQueue):
        def update_status(self, round_id, status):
            call_order.append(f"round:{status}")
            super().update_status(round_id, status)

    r = Round(
        round_id="r1", issue_id="issue-1", identifier="WYL-X", title="t",
        enqueued_at=_utcnow(), status="running", phase="awaiting_judges",
        phase_entered_at=_utcnow(),
    )
    q = TrackingQueue.load(tmp_path / "queue.json")
    q.rounds.append(r)
    q.save()
    client = TrackingClient()

    _apply_verdict(r, client, q, "ACCEPT", "VERDICT: ACCEPT\nScore 4.0", _logger)

    assert "issue:done" in call_order
    assert "round:done" in call_order
    assert call_order.index("issue:done") < call_order.index("round:done")


# ---------------------------------------------------------------------------
# _post_rejection_retrigger corner cases
# ---------------------------------------------------------------------------

def test_reject_retrigger_skipped_for_member_assignee(tmp_path):
    r, q = _make_round(tmp_path)
    client = FakeClient()
    client.issue["assignee_type"] = "member"
    client.issue["assignee_id"]   = "user-1"
    cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger)
    assert cid is None
    assert len(client.posted_comments) == 1
    assert "mention://" not in client.posted_comments[0]
    assert "Manual follow-up" in client.posted_comments[0]


def test_reject_retrigger_skipped_when_no_assignee(tmp_path):
    r, q = _make_round(tmp_path)
    client = FakeClient()
    client.issue["assignee_type"] = None
    client.issue["assignee_id"]   = None
    cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger)
    assert cid is None


def test_reject_retrigger_includes_verbatim_description(tmp_path):
    r, q = _make_round(tmp_path)
    client = FakeClient()
    client.issue["assignee_type"] = "agent"
    client.issue["assignee_id"]   = "agent-worker"
    client.agents.append({"name": "Worker", "id": "agent-worker"})
    desc = "Line A.\n\nLine B with unique-marker-123.\n\nLine C."
    client.issue["description"] = desc
    cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger)
    assert cid is not None
    body = client.posted_comments[-1]
    for line in desc.splitlines():
        if line.strip():
            assert line in body
    assert REWORK_INSTRUCTIONS in body


# ---------------------------------------------------------------------------
# _advance_round dispatch
# ---------------------------------------------------------------------------

def test_advance_round_dispatches_convened_to_start(tmp_path):
    r, q = _make_round(tmp_path, phase="convened", status="pending")
    client = FakeClient()
    _advance_round(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_rubric"


def test_advance_round_corrects_terminal_phase_with_wrong_status(tmp_path):
    r, q = _make_round(tmp_path, phase="accepted", status="running")
    client = FakeClient()
    _advance_round(r, client, q, FakeConfig(), _logger)
    assert r.status == "done"