0e44846032
New module: src/coordinator/orchestrator.py
- DEBATER_NAMES, JUDGE_NAME, DEBATER_PROMPTS, JUDGE_PROMPT_TEMPLATE hardcoded for v1
- Per-debater prompts tell each debater exactly which tool output to ground evidence in
- orchestrate_pending() is the main entry point called from watch_loop
- _start_round(): pending→running, posts debater mention comment, phase→awaiting_debaters
- _advance_awaiting_debaters(): polls for replies, handles timeout with partial evidence,
posts judge comment, phase→awaiting_judge
- _advance_awaiting_judge(): polls for verdict; RACE FIX — update_issue_status() called
BEFORE queue.update_status("done") so poll_once can never double-enqueue
- Detection: primary=author_id match, fallback=[{name} response]: content marker (enables tests)
- Restart-safe: phase field persisted on every mutation; in-flight rounds resume correctly
Extended src/coordinator/queue.py:
- Round gains phase, phase_entered_at, coordinator_comment_id, judge_comment_id fields
- DebateQueue.update_phase() and running() added
- All new fields default-empty so existing queue.json files load cleanly
Extended src/coordinator/multica_client.py:
- update_issue_status() convenience wrapper
- create_issue() for integration / smoke tests
Updated src/coordinator/__main__.py:
- _orchestrate_pending stub replaced with real import from orchestrator
Tests:
- tests/test_orchestrator.py: 32 new unit tests covering phase transitions, timeouts,
race fix ordering, restart resume, full lifecycle
- tests/test_integration.py: @pytest.mark.integration test against real API
- smoke_test.py: standalone end-to-end script; ran against real API, verdict OK
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
737 lines
24 KiB
Python
737 lines
24 KiB
Python
"""Unit tests for coordinator.orchestrator (WYL-45)."""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from coordinator.orchestrator import (
|
|
DEBATER_NAMES,
|
|
JUDGE_NAME,
|
|
_advance_awaiting_debaters,
|
|
_advance_awaiting_judge,
|
|
_collect_debater_replies,
|
|
_collect_judge_reply,
|
|
_find_commit_url,
|
|
_parse_verdict,
|
|
_start_round,
|
|
orchestrate_pending,
|
|
)
|
|
from coordinator.queue import DebateQueue, Round
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _utcnow() -> str:
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
def _past(seconds: float) -> str:
|
|
t = datetime.now(timezone.utc) - timedelta(seconds=seconds)
|
|
return t.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
@dataclass
|
|
class FakeConfig:
|
|
server_url: str = "http://fake"
|
|
workspace_id: str = "ws-1"
|
|
token: str = "tok"
|
|
poll_interval_s: int = 30
|
|
round_timeout_s: int = 600
|
|
max_concurrent_rounds: int = 3
|
|
seen_file: Path = Path("/tmp/seen.json")
|
|
queue_file: Path = Path("/tmp/queue.json")
|
|
log_file: Path = Path("/tmp/coordinator.log")
|
|
|
|
|
|
@dataclass
|
|
class FakeClient:
|
|
"""Controllable stand-in for MulticaClient."""
|
|
|
|
issue: dict[str, Any] = field(default_factory=lambda: {
|
|
"id": "issue-1",
|
|
"title": "Test issue",
|
|
"description": "Test description. Commit: https://git.example.com/commit/abc123",
|
|
"status": "in_review",
|
|
})
|
|
comments: list[dict[str, Any]] = field(default_factory=list)
|
|
agents: list[dict[str, Any]] = field(default_factory=list)
|
|
posted_comments: list[str] = field(default_factory=list)
|
|
status_updates: list[tuple[str, str]] = field(default_factory=list)
|
|
post_comment_returns_id: str = "comment-coord"
|
|
|
|
def get_issue(self, issue_id: str) -> dict[str, Any]:
|
|
return self.issue
|
|
|
|
def list_comments(self, issue_id: str) -> list[dict[str, Any]]:
|
|
return list(self.comments)
|
|
|
|
def post_comment(self, issue_id: str, content: str) -> dict[str, Any]:
|
|
self.posted_comments.append(content)
|
|
cid = self.post_comment_returns_id
|
|
# Advance the ID for subsequent calls
|
|
self.post_comment_returns_id = "comment-" + str(len(self.posted_comments))
|
|
ts = _utcnow()
|
|
new_comment = {"id": cid, "content": content, "created_at": ts,
|
|
"author_id": "coord", "author_type": "member"}
|
|
self.comments.append(new_comment)
|
|
return new_comment
|
|
|
|
def list_agents(self) -> list[dict[str, Any]]:
|
|
return list(self.agents)
|
|
|
|
def find_agents_by_name(self, names) -> dict[str, str]:
|
|
wanted = set(names)
|
|
return {a["name"]: a["id"] for a in self.agents if a["name"] in wanted}
|
|
|
|
def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]:
|
|
self.status_updates.append((issue_id, status))
|
|
self.issue["status"] = status
|
|
return self.issue
|
|
|
|
def update_issue(self, issue_id: str, **fields: Any) -> dict[str, Any]:
|
|
self.issue.update(fields)
|
|
return self.issue
|
|
|
|
|
|
def _make_round(
|
|
*,
|
|
status: str = "pending",
|
|
phase: str = "convened",
|
|
phase_entered_at: str = "",
|
|
coordinator_comment_id: str = "",
|
|
judge_comment_id: str = "",
|
|
) -> Round:
|
|
return Round(
|
|
round_id=str(uuid.uuid4()),
|
|
issue_id="issue-1",
|
|
identifier="WYL-99",
|
|
title="Test issue",
|
|
enqueued_at=_utcnow(),
|
|
status=status,
|
|
phase=phase,
|
|
phase_entered_at=phase_entered_at or _utcnow(),
|
|
coordinator_comment_id=coordinator_comment_id,
|
|
judge_comment_id=judge_comment_id,
|
|
)
|
|
|
|
|
|
def _make_queue(tmp_path: Path, *rounds: Round) -> DebateQueue:
|
|
q = DebateQueue.load(tmp_path / "queue.json")
|
|
for r in rounds:
|
|
q.rounds.append(r)
|
|
q.save()
|
|
return q
|
|
|
|
|
|
_logger = logging.getLogger("test.orchestrator")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_verdict
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_parse_verdict_accept():
|
|
assert _parse_verdict("VERDICT: ACCEPT\n\nGreat work.") == "ACCEPT"
|
|
|
|
|
|
def test_parse_verdict_reject():
|
|
assert _parse_verdict("VERDICT: REJECT\n\nMissing tests.") == "REJECT"
|
|
|
|
|
|
def test_parse_verdict_case_insensitive():
|
|
assert _parse_verdict("verdict: accept") == "ACCEPT"
|
|
|
|
|
|
def test_parse_verdict_none_when_absent():
|
|
assert _parse_verdict("No verdict here.") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _find_commit_url
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_find_commit_url_found():
|
|
comments = [
|
|
{"content": "Watcher implemented. Commit: https://git.example.com/multica/foo/commit/abc123"},
|
|
]
|
|
assert _find_commit_url(comments) == "https://git.example.com/multica/foo/commit/abc123"
|
|
|
|
|
|
def test_find_commit_url_returns_last():
|
|
comments = [
|
|
{"content": "Commit: https://git.example.com/multica/foo/commit/aaa111"},
|
|
{"content": "Follow-up commit: https://git.example.com/multica/foo/commit/bbb222"},
|
|
]
|
|
assert _find_commit_url(comments) == "https://git.example.com/multica/foo/commit/bbb222"
|
|
|
|
|
|
def test_find_commit_url_empty_when_absent():
|
|
assert _find_commit_url([{"content": "No link here."}]) == ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _collect_debater_replies
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_comment(
|
|
cid: str,
|
|
content: str,
|
|
author_id: str = "anon",
|
|
ts: str = "",
|
|
) -> dict[str, Any]:
|
|
return {
|
|
"id": cid,
|
|
"content": content,
|
|
"author_id": author_id,
|
|
"author_type": "agent",
|
|
"created_at": ts or _utcnow(),
|
|
}
|
|
|
|
|
|
def test_collect_replies_by_agent_id():
|
|
cutoff_ts = _past(10)
|
|
agent_map = {"Senior Developer": "agent-sd"}
|
|
comments = [
|
|
{"id": "coord", "content": "Debate opened", "author_id": "coord",
|
|
"author_type": "member", "created_at": cutoff_ts},
|
|
_make_comment("c1", "Evidence here.", author_id="agent-sd",
|
|
ts=_utcnow()),
|
|
]
|
|
replies = _collect_debater_replies(comments, agent_map, "coord")
|
|
assert "Senior Developer" in replies
|
|
|
|
|
|
def test_collect_replies_content_fallback():
|
|
"""Content-based marker accepted when agent ID not in map."""
|
|
cutoff_ts = _past(10)
|
|
agent_map = {}
|
|
comments = [
|
|
{"id": "coord", "content": "Debate opened", "author_id": "coord",
|
|
"author_type": "member", "created_at": cutoff_ts},
|
|
_make_comment(
|
|
"c1",
|
|
"[Senior Developer response]: grep output here.",
|
|
author_id="someone-else",
|
|
ts=_utcnow(),
|
|
),
|
|
]
|
|
replies = _collect_debater_replies(comments, agent_map, "coord")
|
|
assert "Senior Developer" in replies
|
|
|
|
|
|
def test_collect_replies_skips_before_cutoff():
|
|
"""Comments before coordinator's mention are ignored."""
|
|
early = _past(20)
|
|
cutoff_ts = _past(10)
|
|
agent_map = {"Senior Developer": "agent-sd"}
|
|
comments = [
|
|
_make_comment("early", "Early reply", author_id="agent-sd", ts=early),
|
|
{"id": "coord", "content": "Debate opened", "author_id": "coord",
|
|
"author_type": "member", "created_at": cutoff_ts},
|
|
]
|
|
replies = _collect_debater_replies(comments, agent_map, "coord")
|
|
assert "Senior Developer" not in replies
|
|
|
|
|
|
def test_collect_replies_no_duplicate_per_debater():
|
|
cutoff_ts = _past(10)
|
|
agent_map = {"Senior Developer": "agent-sd"}
|
|
comments = [
|
|
{"id": "coord", "content": "x", "author_id": "coord",
|
|
"author_type": "member", "created_at": cutoff_ts},
|
|
_make_comment("c1", "First reply", author_id="agent-sd", ts=_utcnow()),
|
|
_make_comment("c2", "Second reply", author_id="agent-sd", ts=_utcnow()),
|
|
]
|
|
replies = _collect_debater_replies(comments, agent_map, "coord")
|
|
assert replies["Senior Developer"] == "First reply"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _collect_judge_reply
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_collect_judge_reply_by_agent_id():
|
|
cutoff_ts = _past(10)
|
|
comments = [
|
|
{"id": "jc", "content": "Verdict requested", "author_id": "coord",
|
|
"created_at": cutoff_ts},
|
|
_make_comment("j1", "VERDICT: ACCEPT\n\nGreat.", author_id="agent-judge",
|
|
ts=_utcnow()),
|
|
]
|
|
result = _collect_judge_reply(comments, "agent-judge", "jc")
|
|
assert result is not None
|
|
assert "ACCEPT" in result
|
|
|
|
|
|
def test_collect_judge_reply_content_fallback():
|
|
cutoff_ts = _past(10)
|
|
comments = [
|
|
{"id": "jc", "content": "x", "author_id": "coord", "created_at": cutoff_ts},
|
|
_make_comment("j1", "VERDICT: REJECT\n\nMissing tests.", author_id="anyone",
|
|
ts=_utcnow()),
|
|
]
|
|
result = _collect_judge_reply(comments, "", "jc")
|
|
assert result is not None
|
|
assert "REJECT" in result
|
|
|
|
|
|
def test_collect_judge_reply_none_when_absent():
|
|
cutoff_ts = _past(10)
|
|
comments = [
|
|
{"id": "jc", "content": "x", "author_id": "coord", "created_at": cutoff_ts},
|
|
]
|
|
assert _collect_judge_reply(comments, "agent-judge", "jc") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _start_round
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_start_round_marks_running_before_post(tmp_path):
|
|
"""Status must be 'running' before any API call (early guard against double-enqueue)."""
|
|
call_order: list[str] = []
|
|
|
|
class TrackingQueue(DebateQueue):
|
|
def update_status(self, round_id, status):
|
|
call_order.append(f"update_status:{status}")
|
|
super().update_status(round_id, status)
|
|
|
|
class TrackingClient(FakeClient):
|
|
def post_comment(self, issue_id, content):
|
|
call_order.append("post_comment")
|
|
return super().post_comment(issue_id, content)
|
|
|
|
r = _make_round()
|
|
q = TrackingQueue.load(tmp_path / "queue.json")
|
|
q.rounds.append(r)
|
|
q.save()
|
|
|
|
_start_round(r, TrackingClient(), q, FakeConfig(), _logger)
|
|
|
|
# update_status("running") must precede post_comment
|
|
running_idx = next(i for i, v in enumerate(call_order) if v == "update_status:running")
|
|
post_idx = next(i for i, v in enumerate(call_order) if v == "post_comment")
|
|
assert running_idx < post_idx
|
|
|
|
|
|
def test_start_round_posts_debater_comment(tmp_path):
|
|
r = _make_round()
|
|
q = _make_queue(tmp_path, r)
|
|
client = FakeClient()
|
|
|
|
_start_round(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert len(client.posted_comments) == 1
|
|
comment = client.posted_comments[0]
|
|
assert "Debate round opened" in comment
|
|
for name in DEBATER_NAMES:
|
|
assert name in comment
|
|
|
|
|
|
def test_start_round_sets_phase_awaiting_debaters(tmp_path):
|
|
r = _make_round()
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
_start_round(r, FakeClient(), q, FakeConfig(), _logger)
|
|
|
|
assert r.phase == "awaiting_debaters"
|
|
assert r.status == "running"
|
|
assert r.coordinator_comment_id # must be set
|
|
|
|
|
|
def test_start_round_error_on_api_failure(tmp_path):
|
|
class BrokenClient(FakeClient):
|
|
def post_comment(self, issue_id, content):
|
|
raise RuntimeError("API down")
|
|
|
|
r = _make_round()
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
_start_round(r, BrokenClient(), q, FakeConfig(), _logger)
|
|
|
|
assert r.status == "error"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _advance_awaiting_debaters — debater replies
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _debater_round(tmp_path: Path) -> tuple[Round, DebateQueue, FakeClient]:
|
|
"""Return a round already in awaiting_debaters phase."""
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_debaters",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="coord-comment",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FakeClient()
|
|
# Seed coordinator comment in the comment list
|
|
client.comments.append({
|
|
"id": "coord-comment",
|
|
"content": "Debate opened",
|
|
"author_id": "coord",
|
|
"author_type": "member",
|
|
"created_at": _past(5),
|
|
})
|
|
# Seed debater agents
|
|
client.agents = [{"name": name, "id": f"agent-{i}"} for i, name in enumerate(DEBATER_NAMES)]
|
|
client.agents.append({"name": JUDGE_NAME, "id": "agent-judge"})
|
|
return r, q, client
|
|
|
|
|
|
def test_advance_debaters_waits_when_not_all_replied(tmp_path):
|
|
r, q, client = _debater_round(tmp_path)
|
|
|
|
_advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)
|
|
|
|
# Not enough replies → no judge comment posted
|
|
assert r.phase == "awaiting_debaters"
|
|
assert all("Verdict requested" not in c for c in client.posted_comments)
|
|
|
|
|
|
def test_advance_debaters_proceeds_when_all_replied(tmp_path):
|
|
r, q, client = _debater_round(tmp_path)
|
|
|
|
# Add a reply from each debater
|
|
for i, name in enumerate(DEBATER_NAMES):
|
|
client.comments.append({
|
|
"id": f"reply-{i}",
|
|
"content": f"[{name} response]: Evidence here.",
|
|
"author_id": f"agent-{i}",
|
|
"author_type": "agent",
|
|
"created_at": _utcnow(),
|
|
})
|
|
|
|
_advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert r.phase == "awaiting_judge"
|
|
assert r.judge_comment_id
|
|
judge_comment = client.posted_comments[-1]
|
|
assert "Verdict requested" in judge_comment
|
|
for name in DEBATER_NAMES:
|
|
assert name in judge_comment
|
|
|
|
|
|
def test_advance_debaters_timeout_with_partial_evidence(tmp_path):
|
|
"""After timeout, proceed with partial evidence (missing debaters noted in transcript)."""
|
|
r, q, client = _debater_round(tmp_path)
|
|
# Only one debater replies
|
|
client.comments.append({
|
|
"id": "r1",
|
|
"content": f"[{DEBATER_NAMES[0]} response]: My evidence.",
|
|
"author_id": "agent-0",
|
|
"author_type": "agent",
|
|
"created_at": _utcnow(),
|
|
})
|
|
# Simulate timeout by setting phase_entered_at far in the past
|
|
r.phase_entered_at = _past(700)
|
|
q.save()
|
|
|
|
cfg = FakeConfig()
|
|
_advance_awaiting_debaters(r, client, q, cfg, _logger)
|
|
|
|
assert r.phase == "awaiting_judge"
|
|
judge_comment = client.posted_comments[-1]
|
|
assert "timed out" in judge_comment or "no response" in judge_comment
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _advance_awaiting_judge — verdict handling
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _judge_round(tmp_path: Path, *, phase_entered_at: str = "") -> tuple[Round, DebateQueue, FakeClient]:
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_judge",
|
|
phase_entered_at=phase_entered_at or _utcnow(),
|
|
coordinator_comment_id="coord-comment",
|
|
judge_comment_id="judge-comment",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FakeClient()
|
|
client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
|
|
client.comments = [
|
|
{"id": "judge-comment", "content": "Verdict requested",
|
|
"author_id": "coord", "author_type": "member", "created_at": _past(5)},
|
|
]
|
|
return r, q, client
|
|
|
|
|
|
def test_advance_judge_waits_when_no_verdict(tmp_path):
|
|
r, q, client = _judge_round(tmp_path)
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert r.phase == "awaiting_judge"
|
|
assert r.status == "running"
|
|
assert not client.status_updates
|
|
|
|
|
|
def test_advance_judge_accept_updates_issue_status_to_done(tmp_path):
|
|
r, q, client = _judge_round(tmp_path)
|
|
client.comments.append({
|
|
"id": "verdict1",
|
|
"content": "VERDICT: ACCEPT\n\nLooks good.",
|
|
"author_id": "agent-judge",
|
|
"author_type": "agent",
|
|
"created_at": _utcnow(),
|
|
})
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert r.status == "done"
|
|
assert r.phase == "accepted"
|
|
assert client.status_updates == [("issue-1", "done")]
|
|
|
|
|
|
def test_advance_judge_reject_updates_issue_status_to_in_progress(tmp_path):
|
|
r, q, client = _judge_round(tmp_path)
|
|
client.comments.append({
|
|
"id": "verdict1",
|
|
"content": "VERDICT: REJECT\n\nMissing tests.",
|
|
"author_id": "agent-judge",
|
|
"author_type": "agent",
|
|
"created_at": _utcnow(),
|
|
})
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert r.status == "done"
|
|
assert r.phase == "rejected"
|
|
assert client.status_updates == [("issue-1", "in_progress")]
|
|
|
|
|
|
def test_advance_judge_timeout_marks_error(tmp_path):
|
|
"""Judge timeout: round → error, issue left in_review for human escalation."""
|
|
r, q, client = _judge_round(tmp_path, phase_entered_at=_past(700))
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
assert r.status == "error"
|
|
assert r.phase == "error"
|
|
# Issue status must NOT be changed — leave in_review for humans
|
|
assert not client.status_updates
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Race condition: issue status before round done (CRITICAL)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_issue_status_updated_before_round_marked_done(tmp_path):
|
|
"""RACE FIX: client.update_issue_status MUST precede queue.update_status('done')."""
|
|
call_order: list[str] = []
|
|
|
|
class TrackingClient(FakeClient):
|
|
def update_issue_status(self, issue_id, status):
|
|
call_order.append(f"issue:{status}")
|
|
return super().update_issue_status(issue_id, status)
|
|
|
|
class TrackingQueue(DebateQueue):
|
|
def update_status(self, round_id, status):
|
|
call_order.append(f"round:{status}")
|
|
super().update_status(round_id, status)
|
|
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_judge",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="coord-comment",
|
|
judge_comment_id="judge-comment",
|
|
)
|
|
q = TrackingQueue.load(tmp_path / "queue.json")
|
|
q.rounds.append(r)
|
|
q.save()
|
|
|
|
client = TrackingClient()
|
|
client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
|
|
client.comments = [
|
|
{"id": "judge-comment", "content": "x", "author_id": "coord",
|
|
"created_at": _past(5)},
|
|
{"id": "v1", "content": "VERDICT: ACCEPT\n\nAll good.",
|
|
"author_id": "agent-judge", "author_type": "agent", "created_at": _utcnow()},
|
|
]
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
# Both calls must have happened
|
|
assert "issue:done" in call_order
|
|
assert "round:done" in call_order
|
|
# Issue status MUST precede round-done
|
|
assert call_order.index("issue:done") < call_order.index("round:done"), (
|
|
f"Expected issue:done before round:done, got order: {call_order}"
|
|
)
|
|
|
|
|
|
def test_round_not_marked_done_if_issue_update_fails(tmp_path):
|
|
"""If issue status update fails, don't mark round done (retry next cycle)."""
|
|
|
|
class FailingClient(FakeClient):
|
|
def update_issue_status(self, issue_id, status):
|
|
raise RuntimeError("network error")
|
|
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_judge",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="coord-comment",
|
|
judge_comment_id="judge-comment",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FailingClient()
|
|
client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
|
|
client.comments = [
|
|
{"id": "judge-comment", "content": "x", "author_id": "coord", "created_at": _past(5)},
|
|
{"id": "v1", "content": "VERDICT: ACCEPT", "author_id": "agent-judge",
|
|
"created_at": _utcnow()},
|
|
]
|
|
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
|
|
# Round must remain running for retry
|
|
assert r.status == "running"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Phase transitions: full cycle
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_full_phase_cycle(tmp_path):
|
|
"""pending → running/awaiting_debaters → awaiting_judge → accepted/done."""
|
|
r = _make_round()
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
# Step 1: start_round
|
|
client = FakeClient()
|
|
client.agents = [
|
|
*[{"name": n, "id": f"agent-{i}"} for i, n in enumerate(DEBATER_NAMES)],
|
|
{"name": JUDGE_NAME, "id": "agent-judge"},
|
|
]
|
|
_start_round(r, client, q, FakeConfig(), _logger)
|
|
assert r.status == "running"
|
|
assert r.phase == "awaiting_debaters"
|
|
coord_cid = r.coordinator_comment_id
|
|
|
|
# Step 2: debaters reply
|
|
for i, name in enumerate(DEBATER_NAMES):
|
|
client.comments.append({
|
|
"id": f"reply-{i}", "content": f"[{name} response]: Evidence.",
|
|
"author_id": f"agent-{i}", "author_type": "agent", "created_at": _utcnow(),
|
|
})
|
|
_advance_awaiting_debaters(r, client, q, FakeConfig(), _logger)
|
|
assert r.phase == "awaiting_judge"
|
|
judge_cid = r.judge_comment_id
|
|
|
|
# Step 3: judge replies
|
|
client.comments.append({
|
|
"id": "verdict1", "content": "VERDICT: ACCEPT\n\nShipped.",
|
|
"author_id": "agent-judge", "author_type": "agent", "created_at": _utcnow(),
|
|
})
|
|
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
|
assert r.phase == "accepted"
|
|
assert r.status == "done"
|
|
assert client.issue["status"] == "done"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Restart resume: in-flight rounds resume from correct phase
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_restart_resumes_awaiting_debaters(tmp_path):
|
|
"""On restart, a running/awaiting_debaters round picks up without re-posting comment."""
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_debaters",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="existing-coord-comment",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FakeClient()
|
|
client.comments = [
|
|
{"id": "existing-coord-comment", "content": "Debate opened",
|
|
"author_id": "coord", "created_at": _past(60)},
|
|
]
|
|
|
|
orchestrate_pending(q, FakeConfig(), _logger, client=client)
|
|
|
|
# Must NOT post another debater comment
|
|
assert all("Debate round opened" not in c for c in client.posted_comments)
|
|
# Phase should still be awaiting_debaters (no replies)
|
|
assert r.phase == "awaiting_debaters"
|
|
|
|
|
|
def test_restart_resumes_awaiting_judge(tmp_path):
|
|
"""On restart, a running/awaiting_judge round resumes without re-posting judge comment."""
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_judge",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="coord-c",
|
|
judge_comment_id="judge-c",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FakeClient()
|
|
client.comments = [
|
|
{"id": "judge-c", "content": "Verdict requested",
|
|
"author_id": "coord", "created_at": _past(30)},
|
|
]
|
|
|
|
orchestrate_pending(q, FakeConfig(), _logger, client=client)
|
|
|
|
# Must NOT post another judge comment
|
|
assert all("Verdict requested" not in c for c in client.posted_comments)
|
|
assert r.phase == "awaiting_judge"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# orchestrate_pending: pending rounds are started, running rounds advanced
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_orchestrate_pending_starts_pending(tmp_path):
|
|
r = _make_round()
|
|
q = _make_queue(tmp_path, r)
|
|
client = FakeClient()
|
|
|
|
orchestrate_pending(q, FakeConfig(), _logger, client=client)
|
|
|
|
assert r.status == "running"
|
|
assert any("Debate round opened" in c for c in client.posted_comments)
|
|
|
|
|
|
def test_orchestrate_pending_advances_running(tmp_path):
|
|
"""Running/awaiting_judge round with a verdict is completed."""
|
|
r = _make_round(
|
|
status="running",
|
|
phase="awaiting_judge",
|
|
phase_entered_at=_utcnow(),
|
|
coordinator_comment_id="coord-c",
|
|
judge_comment_id="judge-c",
|
|
)
|
|
q = _make_queue(tmp_path, r)
|
|
|
|
client = FakeClient()
|
|
client.agents = [{"name": JUDGE_NAME, "id": "agent-judge"}]
|
|
client.comments = [
|
|
{"id": "judge-c", "content": "x", "author_id": "coord", "created_at": _past(10)},
|
|
{"id": "v1", "content": "VERDICT: REJECT\n\nNeeds work.",
|
|
"author_id": "agent-judge", "created_at": _utcnow()},
|
|
]
|
|
|
|
orchestrate_pending(q, FakeConfig(), _logger, client=client)
|
|
|
|
assert r.status == "done"
|
|
assert r.phase == "rejected"
|