"""Tests for the CEK-native review pipeline (meta-judge + 3 judges + consensus).""" from __future__ import annotations import logging import pathlib from datetime import datetime, timezone from typing import Any import pytest import yaml from coordinator.queue import DebateQueue, Round from coordinator.orchestrator import ( ACCEPT_MIN_SCORE, CONSENSUS_CRITERION_THRESHOLD, CONSENSUS_OVERALL_THRESHOLD, JUDGE_NAMES, MAX_DEBATE_ROUNDS, META_JUDGE_NAME, REWORK_INSTRUCTIONS, _advance_awaiting_debate, _advance_awaiting_judges, _advance_awaiting_rubric, _advance_round, _apply_verdict, _build_coordinator_note_no_agent, _build_debate_round_comment, _build_judge_mention_comment, _build_meta_judge_mention, _build_retrigger_comment, _check_consensus, _criterion_scores, _extract_yaml, _find_commit_url, _find_reply_by_agent, _overall_score, _parse_judge_report, _parse_rubric, _post_rejection_retrigger, _start_round, _utcnow, ) _logger = logging.getLogger("test.orchestrator") # --------------------------------------------------------------------------- # Fakes # --------------------------------------------------------------------------- class FakeConfig: server_url = "http://x" workspace_id = "wid" token = "tok" poll_interval_s = 30 round_timeout_s = 600 max_concurrent_rounds = 3 class FakeClient: def __init__(self) -> None: self.issue: dict[str, Any] = { "id": "issue-1", "title": "Do the thing", "description": "Please do the thing clearly.", "status": "in_review", "assignee_type": None, "assignee_id": None, } self.comments: list[dict[str, Any]] = [] self.posted_comments: list[str] = [] self.agents: list[dict[str, Any]] = [ {"name": META_JUDGE_NAME, "id": "agent-meta"}, {"name": "Judge-GPT", "id": "agent-gpt"}, {"name": "Judge-Claude", "id": "agent-claude"}, {"name": "Judge-Gemini", "id": "agent-gemini"}, ] self._next_comment_id = 1000 def get_issue(self, issue_id: str) -> dict[str, Any]: return dict(self.issue) def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]: self.issue["status"] = status return {"id": issue_id, "status": status} def list_comments(self, issue_id: str) -> list[dict[str, Any]]: return list(self.comments) def post_comment(self, issue_id: str, content: str) -> dict[str, Any]: self.posted_comments.append(content) cid = f"posted-{self._next_comment_id}" self._next_comment_id += 1 created = _utcnow() self.comments.append({ "id": cid, "content": content, "author_id": "coord-user", "created_at": created, }) return {"id": cid, "created_at": created} def list_agents(self) -> list[dict[str, Any]]: return list(self.agents) def find_agents_by_name(self, names): want = set(names) return {a["name"]: a["id"] for a in self.agents if a["name"] in want} def get_agent_name(self, agent_id: str) -> str | None: for a in self.agents: if a["id"] == agent_id: return a["name"] return None def _past(seconds_ago: int) -> str: from datetime import timedelta t = datetime.now(timezone.utc) - timedelta(seconds=seconds_ago) return t.strftime("%Y-%m-%dT%H:%M:%SZ") def _reply_comment(agent_id: str, content: str, created: str | None = None) -> dict[str, Any]: return { "id": f"reply-{agent_id}", "author_id": agent_id, "content": content, "created_at": created or _utcnow(), } def _rubric_yaml_sample() -> str: spec = { "checklist": [ {"question": "Does the code compile?", "category": "hard_rule", "importance": "essential", "rationale": "basic"}, {"question": "Is documentation present?", "category": "principle", "importance": "important", "rationale": "quality"}, ], "rubric_dimensions": [ {"name": "Correctness", "description": "Does the code work", "scale": "1-5", "weight": 0.6, "score_definitions": {1: "no", 5: "perfect"}}, {"name": "Clarity", "description": "Readability", "scale": "1-5", "weight": 0.4, "score_definitions": {1: "opaque", 5: "crystal"}}, ], } return yaml.safe_dump(spec, sort_keys=False) def _judge_report(final_score: float, criteria: dict[str, float]) -> str: report = { "evaluation_report": { "score_calculation": {"final_score": final_score}, "rubric_scores": [ {"name": k, "score": v, "weight": 0.5} for k, v in criteria.items() ], "executive_summary": f"score {final_score}", } } return "```yaml\n" + yaml.safe_dump(report, sort_keys=False) + "\n```" def _make_round(tmp_path: pathlib.Path, **overrides) -> tuple[Round, DebateQueue]: r = Round( round_id="r1", issue_id="issue-1", identifier="WYL-X", title="Do the thing", enqueued_at=_utcnow(), status="running", phase="convened", phase_entered_at=_utcnow(), ) for k, v in overrides.items(): setattr(r, k, v) q = DebateQueue.load(tmp_path / "queue.json") q.rounds.append(r) q.save() return r, q # --------------------------------------------------------------------------- # Pure helpers # --------------------------------------------------------------------------- def test_extract_yaml_from_fenced_block(): content = "Here is the rubric:\n\n```yaml\nchecklist:\n - question: foo\n```\n\nDone." y = _extract_yaml(content) assert y.startswith("checklist:") assert "question: foo" in y def test_extract_yaml_from_unfenced_content(): content = "checklist:\n - question: foo" y = _extract_yaml(content) assert y == content.strip() def test_extract_yaml_unescapes_html_entities(): # Multica REST API returns comment content with `"` as `"`, `>` as `>`, etc. content = "checklist:\n - id: "CK-001"\n question: "does it work?"" y = _extract_yaml(content) assert '"CK-001"' in y assert """ not in y def test_parse_rubric_accepts_html_encoded_input(): encoded = ( "rrd_cycle_applied: true\n" "evaluation_specification:\n" " checklist:\n" " - id: "CK-001"\n" " question: "does it work?"\n" " category: "hard_rule"\n" " importance: "essential"\n" ) spec = _parse_rubric(encoded) assert spec is not None assert "checklist" in spec def test_extract_yaml_repairs_backslash_backtick(): # Gemini (and similar) emit \` inside double-quoted YAML strings, imitating # markdown escaping. \` is not a valid YAML escape, so we repair it. content = "evaluation_report:\n rubric_scores:\n - name: X\n score: 4\n evidence: \"see \\`foo.py\\` and \\`bar.py\\`\"\n" y = _extract_yaml(content) assert "\\`" not in y assert "`foo.py`" in y def test_parse_judge_report_tolerates_backslash_backtick(): content = ( "```yaml\n" "evaluation_report:\n" " score_calculation:\n" " final_score: 4.0\n" " rubric_scores:\n" " - name: Correctness\n" " score: 4\n" " weight: 1.0\n" " evidence: \"see \\`foo.py\\`\"\n" "```" ) r = _parse_judge_report(content) assert r is not None assert r["score_calculation"]["final_score"] == 4.0 def test_parse_rubric_valid_flat(): spec = _parse_rubric(f"```yaml\n{_rubric_yaml_sample()}\n```") assert spec is not None assert "checklist" in spec or "rubric_dimensions" in spec def test_parse_rubric_valid_wrapped(): wrapped = yaml.safe_dump({"evaluation_specification": yaml.safe_load(_rubric_yaml_sample())}) spec = _parse_rubric(f"```yaml\n{wrapped}\n```") assert spec is not None assert "rubric_dimensions" in spec def test_parse_rubric_rejects_malformed_yaml(): assert _parse_rubric("```yaml\nnot: valid: nested: without: quotes\n```") is None def test_parse_rubric_rejects_yaml_without_expected_keys(): assert _parse_rubric("```yaml\njust_some: random\n```") is None def test_parse_judge_report_valid_with_final_score(): content = _judge_report(3.7, {"Correctness": 4.0, "Clarity": 3.5}) r = _parse_judge_report(content) assert r is not None assert r["score_calculation"]["final_score"] == 3.7 def test_parse_judge_report_valid_without_final_score_but_rubric_scores(): report = { "evaluation_report": { "rubric_scores": [ {"name": "Correctness", "score": 4.0, "weight": 1.0}, ] } } content = "```yaml\n" + yaml.safe_dump(report) + "\n```" assert _parse_judge_report(content) is not None def test_parse_judge_report_rejects_empty(): assert _parse_judge_report("```yaml\nnothing: here\n```") is None def test_overall_score_prefers_final_score(): r = {"score_calculation": {"final_score": 2.8}, "rubric_scores": [{"name": "x", "score": 5, "weight": 1}]} assert _overall_score(r) == 2.8 def test_overall_score_falls_back_to_weighted_average(): r = {"rubric_scores": [ {"name": "a", "score": 4.0, "weight": 0.6}, {"name": "b", "score": 2.0, "weight": 0.4}, ]} assert _overall_score(r) == pytest.approx(3.2) def test_overall_score_none_when_nothing_to_extract(): assert _overall_score({}) is None def test_criterion_scores_extracts_names_and_scores(): r = {"rubric_scores": [ {"name": "Correctness", "score": 4.0}, {"name": "Clarity", "score": 3.0}, ]} s = _criterion_scores(r) assert s == {"Correctness": 4.0, "Clarity": 3.0} def test_check_consensus_converged_accept(): reports = { "Judge-GPT": {"score_calculation": {"final_score": 4.0}, "rubric_scores": [{"name": "C", "score": 4}]}, "Judge-Claude": {"score_calculation": {"final_score": 4.2}, "rubric_scores": [{"name": "C", "score": 4}]}, "Judge-Gemini": {"score_calculation": {"final_score": 4.1}, "rubric_scores": [{"name": "C", "score": 4}]}, } converged, verdict, avg = _check_consensus(reports) assert converged is True assert verdict == "ACCEPT" assert avg == pytest.approx((4.0 + 4.2 + 4.1) / 3) def test_check_consensus_converged_reject_low_score(): reports = { n: {"score_calculation": {"final_score": 2.5}, "rubric_scores": [{"name": "C", "score": 2}]} for n in JUDGE_NAMES } converged, verdict, avg = _check_consensus(reports) assert converged is True assert verdict == "REJECT" def test_check_consensus_not_converged_overall_spread(): reports = { "Judge-GPT": {"score_calculation": {"final_score": 2.0}}, "Judge-Claude": {"score_calculation": {"final_score": 4.0}}, "Judge-Gemini": {"score_calculation": {"final_score": 3.0}}, } converged, verdict, avg = _check_consensus(reports) assert converged is False assert verdict is None assert avg == pytest.approx(3.0) def test_check_consensus_not_converged_criterion_spread(): reports = { "Judge-GPT": {"score_calculation": {"final_score": 3.0}, "rubric_scores": [{"name": "C", "score": 2}]}, "Judge-Claude": {"score_calculation": {"final_score": 3.1}, "rubric_scores": [{"name": "C", "score": 5}]}, "Judge-Gemini": {"score_calculation": {"final_score": 3.0}, "rubric_scores": [{"name": "C", "score": 3}]}, } converged, _, _ = _check_consensus(reports) assert converged is False def test_check_consensus_no_overalls_returns_false(): reports = {n: {} for n in JUDGE_NAMES} converged, verdict, avg = _check_consensus(reports) assert converged is False assert avg is None # --------------------------------------------------------------------------- # Comment builders # --------------------------------------------------------------------------- def test_meta_judge_mention_contains_mention_and_description(): body = _build_meta_judge_mention("agent-meta", "Title", "Description line 1\nDescription line 2") assert "mention://agent/agent-meta" in body assert META_JUDGE_NAME in body assert "Description line 1" in body assert "Description line 2" in body def test_judge_mention_contains_all_three_mentions_and_rubric(): judge_ids = {"Judge-GPT": "a", "Judge-Claude": "b", "Judge-Gemini": "c"} body = _build_judge_mention_comment(judge_ids, "Title", "Desc", "https://example.com/commit/abc", "rubric: yes") for n in JUDGE_NAMES: assert n in body for agent_id in ("a", "b", "c"): assert f"mention://agent/{agent_id}" in body assert "rubric: yes" in body assert "https://example.com/commit/abc" in body def test_debate_round_comment_quotes_all_prior_reports(): judge_ids = {n: f"id-{n}" for n in JUDGE_NAMES} prior = {n: f"REPORT FROM {n}" for n in JUDGE_NAMES} body = _build_debate_round_comment(judge_ids, 1, prior) assert "Debate round 1" in body for n in JUDGE_NAMES: assert n in body assert f"REPORT FROM {n}" in body def test_debate_round_comment_contains_cek_anti_sycophancy_language(): """Regression: do not soften CEK's critical debate instructions. The first live run produced sycophantic convergence because the earlier, softer phrasing dropped CEK's explicit 'only revise if compelling / defend original if you still believe them' instructions. Any future edit that removes these exact clauses should be caught here. """ judge_ids = {n: f"id-{n}" for n in JUDGE_NAMES} body = _build_debate_round_comment(judge_ids, 1, {n: "x" for n in JUDGE_NAMES}) # CEK's structural instructions assert "Identify disagreements (where your scores differ by >1 point)" in body assert "Defend your position with evidence" in body assert "Challenge the other judge's position with counter-evidence" in body # CEK's CRITICAL anti-sycophancy list assert "Only revise if you find their evidence compelling." in body assert "Defend your original scores if you still believe them." in body # APPEND not REVISE assert "APPENDS to your prior report" in body assert "REVISED" not in body # the old softer phrasing is gone def test_retrigger_comment_has_anchor_and_no_drift_instructions(): body = _build_retrigger_comment("Worker", "agent-worker", "Original desc line.", "VERDICT: REJECT", "r1") assert "mention://agent/agent-worker" in body assert "ANCHOR" in body assert "Original desc line." in body assert REWORK_INSTRUCTIONS in body def test_coordinator_note_no_agent_has_no_mention(): body = _build_coordinator_note_no_agent("r1", "no assignee set") assert "mention://" not in body assert "Manual follow-up required" in body # --------------------------------------------------------------------------- # _find_commit_url # --------------------------------------------------------------------------- def test_find_commit_url_picks_latest(): comments = [ {"content": "older https://git.example/commit/abc123", "created_at": "2026-01-01T00:00:00Z"}, {"content": "newer https://git.example/commit/def456", "created_at": "2026-01-02T00:00:00Z"}, ] assert _find_commit_url(comments) == "https://git.example/commit/def456" def test_find_commit_url_empty_when_absent(): assert _find_commit_url([{"content": "no urls"}]) == "" # --------------------------------------------------------------------------- # _find_reply_by_agent # --------------------------------------------------------------------------- def test_find_reply_by_agent_respects_cutoff(): comments = [ {"id": "before", "author_id": "agent-x", "content": "early", "created_at": _past(100)}, {"id": "cutoff", "author_id": "coord", "content": "mention", "created_at": _past(50)}, {"id": "after", "author_id": "agent-x", "content": "late", "created_at": _past(10)}, ] found = _find_reply_by_agent(comments, "agent-x", "cutoff") assert found == ("after", "late") def test_find_reply_by_agent_none_when_no_match(): comments = [{"id": "x", "author_id": "other", "content": "hi", "created_at": _utcnow()}] assert _find_reply_by_agent(comments, "agent-x", "somewhere") is None # --------------------------------------------------------------------------- # _start_round # --------------------------------------------------------------------------- def test_start_round_posts_meta_judge_mention_and_sets_phase(tmp_path): r, q = _make_round(tmp_path, phase="convened", status="pending") client = FakeClient() _start_round(r, client, q, FakeConfig(), _logger) assert r.status == "running" assert len(client.posted_comments) == 1 body = client.posted_comments[0] assert "mention://agent/agent-meta" in body assert r.phase == "awaiting_rubric" assert r.meta_judge_comment_id != "" def test_start_round_marks_error_on_api_failure(tmp_path): r, q = _make_round(tmp_path, phase="convened", status="pending") class FailingClient(FakeClient): def get_issue(self, issue_id): raise RuntimeError("no issue") client = FailingClient() _start_round(r, client, q, FakeConfig(), _logger) assert r.status == "error" # --------------------------------------------------------------------------- # _advance_awaiting_rubric # --------------------------------------------------------------------------- def test_advance_awaiting_rubric_waits_when_meta_judge_silent(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_rubric", status="running", meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.comments = [{"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}] _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger) assert r.phase == "awaiting_rubric" assert r.status == "running" def test_advance_awaiting_rubric_errors_on_malformed_yaml(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_rubric", status="running", meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.comments = [ {"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, {"id": "reply", "author_id": "agent-meta", "content": "```yaml\nnot: valid: nested\n```", "created_at": _utcnow()}, ] _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger) assert r.phase == "error" assert r.status == "error" def test_advance_awaiting_rubric_moves_to_awaiting_judges_on_valid_rubric(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_rubric", status="running", meta_judge_comment_id="meta-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.comments = [ {"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, {"id": "reply", "author_id": "agent-meta", "content": f"```yaml\n{_rubric_yaml_sample()}\n```", "created_at": _utcnow()}, ] _advance_awaiting_rubric(r, client, q, FakeConfig(), _logger) assert r.phase == "awaiting_judges" assert r.rubric_yaml != "" assert any("mention://agent/agent-gpt" in c for c in client.posted_comments) assert any("mention://agent/agent-claude" in c for c in client.posted_comments) assert any("mention://agent/agent-gemini" in c for c in client.posted_comments) def test_advance_awaiting_rubric_errors_on_timeout_without_reply(tmp_path): cfg = FakeConfig() r, q = _make_round( tmp_path, phase="awaiting_rubric", status="running", meta_judge_comment_id="meta-c", phase_entered_at=_past(cfg.round_timeout_s + 5), ) client = FakeClient() client.comments = [{"id": "meta-c", "author_id": "coord", "content": "mention", "created_at": _past(cfg.round_timeout_s + 5)}] _advance_awaiting_rubric(r, client, q, cfg, _logger) assert r.phase == "error" assert r.status == "error" # --------------------------------------------------------------------------- # _advance_awaiting_judges # --------------------------------------------------------------------------- def test_advance_awaiting_judges_waits_when_missing_reports(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_judges", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, _reply_comment("agent-gpt", _judge_report(3.5, {"Correctness": 4})), ] _advance_awaiting_judges(r, client, q, FakeConfig(), _logger) assert r.phase == "awaiting_judges" def test_advance_awaiting_judges_accepts_on_consensus(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_judges", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, _reply_comment("agent-gpt", _judge_report(4.0, {"Correctness": 4})), _reply_comment("agent-claude", _judge_report(4.1, {"Correctness": 4})), _reply_comment("agent-gemini", _judge_report(4.2, {"Correctness": 4})), ] _advance_awaiting_judges(r, client, q, FakeConfig(), _logger) assert r.phase == "accepted" assert r.status == "done" assert client.issue["status"] == "done" def test_advance_awaiting_judges_rejects_on_consensus_low(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_judges", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), ) client = FakeClient() client.issue["assignee_type"] = "agent" client.issue["assignee_id"] = "agent-worker" client.agents.append({"name": "Worker", "id": "agent-worker"}) client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, _reply_comment("agent-gpt", _judge_report(2.0, {"Correctness": 2})), _reply_comment("agent-claude", _judge_report(2.2, {"Correctness": 2})), _reply_comment("agent-gemini", _judge_report(2.1, {"Correctness": 2})), ] _advance_awaiting_judges(r, client, q, FakeConfig(), _logger) assert r.phase == "rejected" assert r.status == "done" assert client.issue["status"] == "in_progress" assert any("mention://agent/agent-worker" in c for c in client.posted_comments) def test_advance_awaiting_judges_starts_debate_round_when_spread(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_judges", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), debate_round=0, ) client = FakeClient() client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, _reply_comment("agent-gpt", _judge_report(2.0, {"Correctness": 2})), _reply_comment("agent-claude", _judge_report(4.0, {"Correctness": 4})), _reply_comment("agent-gemini", _judge_report(3.0, {"Correctness": 3})), ] _advance_awaiting_judges(r, client, q, FakeConfig(), _logger) assert r.phase == "awaiting_debate" assert r.debate_round == 1 def test_advance_awaiting_judges_errors_when_no_parseable(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_judges", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_past(FakeConfig.round_timeout_s + 5), ) client = FakeClient() client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(FakeConfig.round_timeout_s + 5)}, _reply_comment("agent-gpt", "not yaml at all"), _reply_comment("agent-claude", "also not yaml"), _reply_comment("agent-gemini", "garbage"), ] _advance_awaiting_judges(r, client, q, FakeConfig(), _logger) assert r.phase == "error" assert r.status == "error" # --------------------------------------------------------------------------- # Debate round cap # --------------------------------------------------------------------------- def test_awaiting_debate_errors_out_at_cap(tmp_path): r, q = _make_round( tmp_path, phase="awaiting_debate", status="running", judge_mention_comment_id="jm-c", phase_entered_at=_utcnow(), debate_round=MAX_DEBATE_ROUNDS, ) client = FakeClient() client.comments = [ {"id": "jm-c", "author_id": "coord", "content": "mention", "created_at": _past(5)}, _reply_comment("agent-gpt", _judge_report(2.0, {"Correctness": 2})), _reply_comment("agent-claude", _judge_report(4.0, {"Correctness": 4})), _reply_comment("agent-gemini", _judge_report(3.0, {"Correctness": 3})), ] _advance_awaiting_debate(r, client, q, FakeConfig(), _logger) assert r.phase == "error" assert r.status == "error" # --------------------------------------------------------------------------- # Race fix: issue status moves BEFORE round marked done # --------------------------------------------------------------------------- def test_apply_verdict_updates_issue_before_marking_round_done(tmp_path): call_order: list[str] = [] class TrackingClient(FakeClient): def update_issue_status(self, issue_id, status): call_order.append(f"issue:{status}") return super().update_issue_status(issue_id, status) class TrackingQueue(DebateQueue): def update_status(self, round_id, status): call_order.append(f"round:{status}") super().update_status(round_id, status) r = Round( round_id="r1", issue_id="issue-1", identifier="WYL-X", title="t", enqueued_at=_utcnow(), status="running", phase="awaiting_judges", phase_entered_at=_utcnow(), ) q = TrackingQueue.load(tmp_path / "queue.json") q.rounds.append(r) q.save() client = TrackingClient() _apply_verdict(r, client, q, "ACCEPT", "VERDICT: ACCEPT\nScore 4.0", _logger) assert "issue:done" in call_order assert "round:done" in call_order assert call_order.index("issue:done") < call_order.index("round:done") # --------------------------------------------------------------------------- # _post_rejection_retrigger corner cases # --------------------------------------------------------------------------- def test_reject_retrigger_skipped_for_member_assignee(tmp_path): r, q = _make_round(tmp_path) client = FakeClient() client.issue["assignee_type"] = "member" client.issue["assignee_id"] = "user-1" cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger) assert cid is None assert len(client.posted_comments) == 1 assert "mention://" not in client.posted_comments[0] assert "Manual follow-up" in client.posted_comments[0] def test_reject_retrigger_skipped_when_no_assignee(tmp_path): r, q = _make_round(tmp_path) client = FakeClient() client.issue["assignee_type"] = None client.issue["assignee_id"] = None cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger) assert cid is None def test_reject_retrigger_includes_verbatim_description(tmp_path): r, q = _make_round(tmp_path) client = FakeClient() client.issue["assignee_type"] = "agent" client.issue["assignee_id"] = "agent-worker" client.agents.append({"name": "Worker", "id": "agent-worker"}) desc = "Line A.\n\nLine B with unique-marker-123.\n\nLine C." client.issue["description"] = desc cid = _post_rejection_retrigger(r, client, client.issue, "VERDICT: REJECT", _logger) assert cid is not None body = client.posted_comments[-1] for line in desc.splitlines(): if line.strip(): assert line in body assert REWORK_INSTRUCTIONS in body # --------------------------------------------------------------------------- # _advance_round dispatch # --------------------------------------------------------------------------- def test_advance_round_dispatches_convened_to_start(tmp_path): r, q = _make_round(tmp_path, phase="convened", status="pending") client = FakeClient() _advance_round(r, client, q, FakeConfig(), _logger) assert r.phase == "awaiting_rubric" def test_advance_round_corrects_terminal_phase_with_wrong_status(tmp_path): r, q = _make_round(tmp_path, phase="accepted", status="running") client = FakeClient() _advance_round(r, client, q, FakeConfig(), _logger) assert r.status == "done"