coordinator/tests/test_orchestrator.py

"""Unit tests for coordinator.orchestrator (WYL-45)."""
from __future__ import annotations

import logging
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone, timedelta
from pathlib import Path
from typing import Any

import pytest

from coordinator.orchestrator import (
    DEBATER_NAMES,
    JUDGE_NAME,
    _advance_awaiting_debaters,
    _advance_awaiting_judge,
    _collect_debater_replies,
    _collect_judge_reply,
    _find_commit_url,
    _parse_verdict,
    _start_round,
    orchestrate_pending,
)
from coordinator.queue import DebateQueue, Round


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _utcnow() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _past(seconds: float) -> str:
    t = datetime.now(timezone.utc) - timedelta(seconds=seconds)
    return t.strftime("%Y-%m-%dT%H:%M:%SZ")


@dataclass
class FakeConfig:
    server_url: str = "http://fake"
    workspace_id: str = "ws-1"
    token: str = "tok"
    poll_interval_s: int = 30
    round_timeout_s: int = 600
    max_concurrent_rounds: int = 3
    seen_file: Path = Path("/tmp/seen.json")
    queue_file: Path = Path("/tmp/queue.json")
    log_file: Path = Path("/tmp/coordinator.log")


@dataclass
class FakeClient:
    """Controllable stand-in for MulticaClient."""

    issue: dict[str, Any] = field(default_factory=lambda: {
        "id": "issue-1",
        "title": "Test issue",
        "description": "Test description. Commit: https://git.example.com/commit/abc123",
        "status": "in_review",
    })
    comments: list[dict[str, Any]] = field(default_factory=list)
    agents: list[dict[str, Any]] = field(default_factory=list)
    posted_comments: list[str] = field(default_factory=list)
    status_updates: list[tuple[str, str]] = field(default_factory=list)
    post_comment_returns_id: str = "comment-coord"

    def get_issue(self, issue_id: str) -> dict[str, Any]:
        return self.issue

    def list_comments(self, issue_id: str) -> list[dict[str, Any]]:
        return list(self.comments)

    def post_comment(self, issue_id: str, content: str) -> dict[str, Any]:
        self.posted_comments.append(content)
        cid = self.post_comment_returns_id
        # Advance the ID for subsequent calls
        self.post_comment_returns_id = "comment-" + str(len(self.posted_comments))
        ts = _utcnow()
        new_comment = {"id": cid, "content": content, "created_at": ts,
                       "author_id": "coord", "author_type": "member"}
        self.comments.append(new_comment)
        return new_comment

    def list_agents(self) -> list[dict[str, Any]]:
        return list(self.agents)

    def find_agents_by_name(self, names) -> dict[str, str]:
        wanted = set(names)
        return {a["name"]: a["id"] for a in self.agents if a["name"] in wanted}

    def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]:
        self.status_updates.append((issue_id, status))
        self.issue["status"] = status
        return self.issue

    def update_issue(self, issue_id: str, **fields: Any) -> dict[str, Any]:
        self.issue.update(fields)
        return self.issue


def _make_round(
    *,
    status: str = "pending",
    phase: str = "convened",
    phase_entered_at: str = "",
    coordinator_comment_id: str = "",
    judge_comment_id: str = "",
) -> Round:
    return Round(
        round_id=str(uuid.uuid4()),
        issue_id="issue-1",
        identifier="WYL-99",
        title="Test issue",
        enqueued_at=_utcnow(),
        status=status,
        phase=phase,
        phase_entered_at=phase_entered_at or _utcnow(),
        coordinator_comment_id=coordinator_comment_id,
        judge_comment_id=judge_comment_id,
    )


def _make_queue(tmp_path: Path, *rounds: Round) -> DebateQueue:
    q = DebateQueue.load(tmp_path / "queue.json")
    for r in rounds:
        q.rounds.append(r)
    q.save()
    return q


_logger = logging.getLogger("test.orchestrator")


# ---------------------------------------------------------------------------
# _parse_verdict
# ---------------------------------------------------------------------------

def test_parse_verdict_accept():
    assert _parse_verdict("VERDICT: ACCEPT\n\nGreat work.") == "ACCEPT"


def test_parse_verdict_reject():
    assert _parse_verdict("VERDICT: REJECT\n\nMissing tests.") == "REJECT"


def test_parse_verdict_case_insensitive():
    assert _parse_verdict("verdict: accept") == "ACCEPT"


def test_parse_verdict_none_when_absent():
    assert _parse_verdict("No verdict here.") is None


# ---------------------------------------------------------------------------
# _find_commit_url
# ---------------------------------------------------------------------------

def test_find_commit_url_found():
    comments = [
        {"content": "Watcher implemented. Commit: https://git.example.com/multica/foo/commit/abc123"},
    ]
    assert _find_commit_url(comments) == "https://git.example.com/multica/foo/commit/abc123"


def test_find_commit_url_returns_last():
    comments = [
        {"content": "Commit: https://git.example.com/multica/foo/commit/aaa111"},
        {"content": "Follow-up commit: https://git.example.com/multica/foo/commit/bbb222"},
    ]
    assert _find_commit_url(comments) == "https://git.example.com/multica/foo/commit/bbb222"


def test_find_commit_url_empty_when_absent():
    assert _find_commit_url([{"content": "No link here."}]) == ""


# ---------------------------------------------------------------------------
# _collect_debater_replies
# ---------------------------------------------------------------------------

def _make_comment(
    cid: str,
    content: str,
    author_id: str = "anon",
    ts: str = "",
) -> dict[str, Any]:
    return {
        "id": cid,
        "content": content,
        "author_id": author_id,
        "author_type": "agent",
        "created_at": ts or _utcnow(),
    }


def test_collect_replies_by_agent_id():
    cutoff_ts = _past(10)
    agent_map = {"Senior Developer": "agent-sd"}
    comments = [
        {"id": "coord", "content": "Debate opened", "author_id": "coord",
         "author_type": "member", "created_at": cutoff_ts},
        _make_comment("c1", "Evidence here.", author_id="agent-sd",
                      ts=_utcnow()),
    ]
    replies = _collect_debater_replies(comments, agent_map, "coord")
    assert "Senior Developer" in replies


def test_collect_replies_content_fallback():
    """Content-based marker accepted when agent ID not in map."""
    cutoff_ts = _past(10)
    agent_map = {}
    comments = [
        {"id": "coord", "content": "Debate opened", "author_id": "coord",
         "author_type": "member", "created_at": cutoff_ts},
        _make_comment(
            "c1",
            "[Senior Developer response]: grep output here.",
            author_id="someone-else",
            ts=_utcnow(),
        ),
    ]
    replies = _collect_debater_replies(comments, agent_map, "coord")
    assert "Senior Developer" in replies


def test_collect_replies_skips_before_cutoff():
    """Comments before coordinator's mention are ignored."""
    early = _past(20)
    cutoff_ts = _past(10)
    agent_map = {"Senior Developer": "agent-sd"}
    comments = [
        _make_comment("early", "Early reply", author_id="agent-sd", ts=early),
        {"id": "coord", "content": "Debate opened", "author_id": "coord",
         "author_type": "member", "created_at": cutoff_ts},
    ]
    replies = _collect_debater_replies(comments, agent_map, "coord")
    assert "Senior Developer" not in replies


def test_collect_replies_no_duplicate_per_debater():
    cutoff_ts = _past(10)
    agent_map = {"Senior Developer": "agent-sd"}
    comments = [
        {"id": "coord", "content": "x", "author_id": "coord",
         "author_type": "member", "created_at": cutoff_ts},
        _make_comment("c1", "First reply", author_id="agent-sd", ts=_utcnow()),
        _make_comment("c2", "Second reply", author_id="agent-sd", ts=_utcnow()),
    ]
    replies = _collect_debater_replies(comments, agent_map, "coord")
    assert replies["Senior Developer"] == "First reply"


# ---------------------------------------------------------------------------
# _collect_judge_reply
# ---------------------------------------------------------------------------

def test_collect_judge_reply_by_agent_id():
    cutoff_ts = _past(10)
    comments = [
        {"id": "jc", "content": "Verdict requested", "author_id": "coord",
         "created_at": cutoff_ts},
        _make_comment("j1", "VERDICT: ACCEPT\n\nGreat.", author_id="agent-judge",
                      ts=_utcnow()),
    ]
    result = _collect_judge_reply(comments, "agent-judge", "jc")
    assert result is not None
    assert "ACCEPT" in result


def test_collect_judge_reply_content_fallback():
    cutoff_ts = _past(10)
    comments = [
        {"id": "jc", "content": "x", "author_id": "coord", "created_at": cutoff_ts},
        _make_comment("j1", "VERDICT: REJECT\n\nMissing tests.", author_id="anyone",
                      ts=_utcnow()),
    ]
    result = _collect_judge_reply(comments, "", "jc")
    assert result is not None
    assert "REJECT" in result


def test_collect_judge_reply_none_when_absent():
    cutoff_ts = _past(10)
    comments = [
        {"id": "jc", "content": "x", "author_id": "coord", "created_at": cutoff_ts},
    ]
    assert _collect_judge_reply(comments, "agent-judge", "jc") is None


# ---------------------------------------------------------------------------
# _start_round
# ---------------------------------------------------------------------------

def test_start_round_marks_running_before_post(tmp_path):
    """Status must be 'running' before any API call (early guard against double-enqueue)."""
    call_order: list[str] = []

    class TrackingQueue(DebateQueue):
        def update_status(self, round_id, status):
            call_order.append(f"update_status:{status}")
            super().update_status(round_id, status)

    class TrackingClient(FakeClient):
        def post_comment(self, issue_id, content):
            call_order.append("post_comment")
            return super().post_comment(issue_id, content)

    r = _make_round()
    q = TrackingQueue.load(tmp_path / "queue.json")
    q.rounds.append(r)
    q.save()

    _start_round(r, TrackingClient(), q, FakeConfig(), _logger)

    # update_status("running") must precede post_comment
    running_idx = next(i for i, v in enumerate(call_order) if v == "update_status:running")
    post_idx = next(i for i, v in enumerate(call_order) if v == "post_comment")
    assert running_idx < post_idx


def test_start_round_posts_debater_comment(tmp_path):
    r = _make_round()
    q = _make_queue(tmp_path, r)
    client = FakeClient()

    _start_round(r, client, q, FakeConfig(), _logger)

    assert len(client.posted_comments) == 1
    comment = client.posted_comments[0]
    assert "Debate round opened" in comment
    for name in DEBATER_NAMES:
        assert name in comment


def test_start_round_sets_phase_awaiting_debaters(tmp_path):
    r = _make_round()
    q = _make_queue(tmp_path, r)

    _start_round(r, FakeClient(), q, FakeConfig(), _logger)

    assert r.phase == "awaiting_debaters"
    assert r.status == "running"
    assert r.coordinator_comment_id  # must be set


def test_start_round_error_on_api_failure(tmp_path):
    class BrokenClient(FakeClient):
        def post_comment(self, issue_id, content):
            raise RuntimeError("API down")

    r = _make_round()
    q = _make_queue(tmp_path, r)

    _start_round(r, BrokenClient(), q, FakeConfig(), _logger)

    assert r.status == "error"


# ---------------------------------------------------------------------------
# _advance_awaiting_debaters — debater replies
# ---------------------------------------------------------------------------

def _debater_round(tmp_path: Path) -> tuple[Round, DebateQueue, FakeClient]:
    """Return a round already in awaiting_debaters phase."""
    r = _make_round(
        status="running",
        phase="awaiting_debaters",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="coord-comment",
    )
    q = _make_queue(tmp_path, r)

    client = FakeClient()
    # Seed coordinator comment in the comment list
    client.comments.append({
        "id": "coord-comment",
        "content": "Debate opened",
        "author_id": "coord",
        "author_type": "member",
        "created_at": _past(5),
    })
    # Seed debater agents
    client.agents = [{"name": name, "id": f"agent-{i}"} for i, name in enumerate(DEBATER_NAMES)]
    client.agents.append({"name": JUDGE_NAME, "id": "agent-judge"})
    return r, q, client


def test_advance_debaters_waits_when_not_all_replied(tmp_path):
    r, q, client = _debater_round(tmp_path)

    _advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)

    # Not enough replies → no judge comment posted
    assert r.phase == "awaiting_debaters"
    assert all("Verdict requested" not in c for c in client.posted_comments)


def test_advance_debaters_proceeds_when_all_replied(tmp_path):
    r, q, client = _debater_round(tmp_path)

    # Add a reply from each debater
    for i, name in enumerate(DEBATER_NAMES):
        client.comments.append({
            "id": f"reply-{i}",
            "content": f"[{name} response]: Evidence here.",
            "author_id": f"agent-{i}",
            "author_type": "agent",
            "created_at": _utcnow(),
        })

    _advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)

    assert r.phase == "awaiting_judge"
    assert r.judge_comment_id
    judge_comment = client.posted_comments[-1]
    assert "Verdict requested" in judge_comment
    for name in DEBATER_NAMES:
        assert name in judge_comment


def test_advance_debaters_timeout_with_partial_evidence(tmp_path):
    """After timeout, proceed with partial evidence (missing debaters noted in transcript)."""
    r, q, client = _debater_round(tmp_path)
    # Only one debater replies
    client.comments.append({
        "id": "r1",
        "content": f"[{DEBATER_NAMES[0]} response]: My evidence.",
        "author_id": "agent-0",
        "author_type": "agent",
        "created_at": _utcnow(),
    })
    # Simulate timeout by setting phase_entered_at far in the past
    r.phase_entered_at = _past(700)
    q.save()

    cfg = FakeConfig()
    _advance_awaiting_debaters(r, client, q, cfg, _logger)

    assert r.phase == "awaiting_judge"
    judge_comment = client.posted_comments[-1]
    assert "timed out" in judge_comment or "no response" in judge_comment


# ---------------------------------------------------------------------------
# _advance_awaiting_judge — verdict handling
# ---------------------------------------------------------------------------

def _judge_round(tmp_path: Path, *, phase_entered_at: str = "") -> tuple[Round, DebateQueue, FakeClient]:
    r = _make_round(
        status="running",
        phase="awaiting_judge",
        phase_entered_at=phase_entered_at or _utcnow(),
        coordinator_comment_id="coord-comment",
        judge_comment_id="judge-comment",
    )
    q = _make_queue(tmp_path, r)

    client = FakeClient()
    client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
    client.comments = [
        {"id": "judge-comment", "content": "Verdict requested",
         "author_id": "coord", "author_type": "member", "created_at": _past(5)},
    ]
    return r, q, client


def test_advance_judge_waits_when_no_verdict(tmp_path):
    r, q, client = _judge_round(tmp_path)

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    assert r.phase == "awaiting_judge"
    assert r.status == "running"
    assert not client.status_updates


def test_advance_judge_accept_updates_issue_status_to_done(tmp_path):
    r, q, client = _judge_round(tmp_path)
    client.comments.append({
        "id": "verdict1",
        "content": "VERDICT: ACCEPT\n\nLooks good.",
        "author_id": "agent-judge",
        "author_type": "agent",
        "created_at": _utcnow(),
    })

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    assert r.status == "done"
    assert r.phase == "accepted"
    assert client.status_updates == [("issue-1", "done")]


def test_advance_judge_reject_updates_issue_status_to_in_progress(tmp_path):
    r, q, client = _judge_round(tmp_path)
    client.comments.append({
        "id": "verdict1",
        "content": "VERDICT: REJECT\n\nMissing tests.",
        "author_id": "agent-judge",
        "author_type": "agent",
        "created_at": _utcnow(),
    })

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    assert r.status == "done"
    assert r.phase == "rejected"
    assert client.status_updates == [("issue-1", "in_progress")]


def test_advance_judge_timeout_marks_error(tmp_path):
    """Judge timeout: round → error, issue left in_review for human escalation."""
    r, q, client = _judge_round(tmp_path, phase_entered_at=_past(700))

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    assert r.status == "error"
    assert r.phase == "error"
    # Issue status must NOT be changed — leave in_review for humans
    assert not client.status_updates


# ---------------------------------------------------------------------------
# Race condition: issue status before round done  (CRITICAL)
# ---------------------------------------------------------------------------

def test_issue_status_updated_before_round_marked_done(tmp_path):
    """RACE FIX: client.update_issue_status MUST precede queue.update_status('done')."""
    call_order: list[str] = []

    class TrackingClient(FakeClient):
        def update_issue_status(self, issue_id, status):
            call_order.append(f"issue:{status}")
            return super().update_issue_status(issue_id, status)

    class TrackingQueue(DebateQueue):
        def update_status(self, round_id, status):
            call_order.append(f"round:{status}")
            super().update_status(round_id, status)

    r = _make_round(
        status="running",
        phase="awaiting_judge",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="coord-comment",
        judge_comment_id="judge-comment",
    )
    q = TrackingQueue.load(tmp_path / "queue.json")
    q.rounds.append(r)
    q.save()

    client = TrackingClient()
    client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
    client.comments = [
        {"id": "judge-comment", "content": "x", "author_id": "coord",
         "created_at": _past(5)},
        {"id": "v1", "content": "VERDICT: ACCEPT\n\nAll good.",
         "author_id": "agent-judge", "author_type": "agent", "created_at": _utcnow()},
    ]

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    # Both calls must have happened
    assert "issue:done" in call_order
    assert "round:done" in call_order
    # Issue status MUST precede round-done
    assert call_order.index("issue:done") < call_order.index("round:done"), (
        f"Expected issue:done before round:done, got order: {call_order}"
    )


def test_round_not_marked_done_if_issue_update_fails(tmp_path):
    """If issue status update fails, don't mark round done (retry next cycle)."""

    class FailingClient(FakeClient):
        def update_issue_status(self, issue_id, status):
            raise RuntimeError("network error")

    r = _make_round(
        status="running",
        phase="awaiting_judge",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="coord-comment",
        judge_comment_id="judge-comment",
    )
    q = _make_queue(tmp_path, r)

    client = FailingClient()
    client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
    client.comments = [
        {"id": "judge-comment", "content": "x", "author_id": "coord", "created_at": _past(5)},
        {"id": "v1", "content": "VERDICT: ACCEPT", "author_id": "agent-judge",
         "created_at": _utcnow()},
    ]

    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)

    # Round must remain running for retry
    assert r.status == "running"


# ---------------------------------------------------------------------------
# Phase transitions: full cycle
# ---------------------------------------------------------------------------

def test_full_phase_cycle(tmp_path):
    """pending → running/awaiting_debaters → awaiting_judge → accepted/done."""
    r = _make_round()
    q = _make_queue(tmp_path, r)

    # Step 1: start_round
    client = FakeClient()
    client.agents = [
        *[{"name": n, "id": f"agent-{i}"} for i, n in enumerate(DEBATER_NAMES)],
        {"name": JUDGE_NAME, "id": "agent-judge"},
    ]
    _start_round(r, client, q, FakeConfig(), _logger)
    assert r.status == "running"
    assert r.phase == "awaiting_debaters"
    coord_cid = r.coordinator_comment_id

    # Step 2: debaters reply
    for i, name in enumerate(DEBATER_NAMES):
        client.comments.append({
            "id": f"reply-{i}", "content": f"[{name} response]: Evidence.",
            "author_id": f"agent-{i}", "author_type": "agent", "created_at": _utcnow(),
        })
    _advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)
    assert r.phase == "awaiting_judge"
    judge_cid = r.judge_comment_id

    # Step 3: judge replies
    client.comments.append({
        "id": "verdict1", "content": "VERDICT: ACCEPT\n\nShipped.",
        "author_id": "agent-judge", "author_type": "agent", "created_at": _utcnow(),
    })
    _advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
    assert r.phase == "accepted"
    assert r.status == "done"
    assert client.issue["status"] == "done"


# ---------------------------------------------------------------------------
# Restart resume: in-flight rounds resume from correct phase
# ---------------------------------------------------------------------------

def test_restart_resumes_awaiting_debaters(tmp_path):
    """On restart, a running/awaiting_debaters round picks up without re-posting comment."""
    r = _make_round(
        status="running",
        phase="awaiting_debaters",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="existing-coord-comment",
    )
    q = _make_queue(tmp_path, r)

    client = FakeClient()
    client.comments = [
        {"id": "existing-coord-comment", "content": "Debate opened",
         "author_id": "coord", "created_at": _past(60)},
    ]

    orchestrate_pending(q, FakeConfig(), _logger, client=client)

    # Must NOT post another debater comment
    assert all("Debate round opened" not in c for c in client.posted_comments)
    # Phase should still be awaiting_debaters (no replies)
    assert r.phase == "awaiting_debaters"


def test_restart_resumes_awaiting_judge(tmp_path):
    """On restart, a running/awaiting_judge round resumes without re-posting judge comment."""
    r = _make_round(
        status="running",
        phase="awaiting_judge",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="coord-c",
        judge_comment_id="judge-c",
    )
    q = _make_queue(tmp_path, r)

    client = FakeClient()
    client.comments = [
        {"id": "judge-c", "content": "Verdict requested",
         "author_id": "coord", "created_at": _past(30)},
    ]

    orchestrate_pending(q, FakeConfig(), _logger, client=client)

    # Must NOT post another judge comment
    assert all("Verdict requested" not in c for c in client.posted_comments)
    assert r.phase == "awaiting_judge"


# ---------------------------------------------------------------------------
# orchestrate_pending: pending rounds are started, running rounds advanced
# ---------------------------------------------------------------------------

def test_orchestrate_pending_starts_pending(tmp_path):
    r = _make_round()
    q = _make_queue(tmp_path, r)
    client = FakeClient()

    orchestrate_pending(q, FakeConfig(), _logger, client=client)

    assert r.status == "running"
    assert any("Debate round opened" in c for c in client.posted_comments)


def test_orchestrate_pending_advances_running(tmp_path):
    """Running/awaiting_judge round with a verdict is completed."""
    r = _make_round(
        status="running",
        phase="awaiting_judge",
        phase_entered_at=_utcnow(),
        coordinator_comment_id="coord-c",
        judge_comment_id="judge-c",
    )
    q = _make_queue(tmp_path, r)

    client = FakeClient()
    client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
    client.comments = [
        {"id": "judge-c", "content": "x", "author_id": "coord", "created_at": _past(10)},
        {"id": "v1", "content": "VERDICT: REJECT\n\nNeeds work.",
         "author_id": "agent-judge", "created_at": _utcnow()},
    ]

    orchestrate_pending(q, FakeConfig(), _logger, client=client)

    assert r.status == "done"
    assert r.phase == "rejected"