WYL-51 round 2: requirement anchoring retrigger comment

Replace the round-1 summariser approach with the full anchor pattern:

A. _post_rejection_retrigger(round_, client, issue, verdict_comment_content, logger)
   - Renamed from _notify_assignee_on_reject
   - Non-agent/no-assignee path: post a non-mentioning coordinator note and return
   - Agent path: build full anchor comment via _build_retrigger_comment

B. Anchor comment structure (verbatim, no summarising):
   1. [@AssigneeName](mention://agent/<id>)
   2. Verdict: REJECT (round <id>)
   3. ## ANCHOR — Original requirements + full issue description in blockquote
   4. ## Why this was rejected + full judge verdict in blockquote
   5. ## Instructions for rework + REWORK_INSTRUCTIONS constant (verbatim)
   6. Trailing audit line

C. Round.retrigger_comment_id + DebateQueue.set_retrigger_comment_id

D. 8 required tests (D1–D8): mention, verbatim description, no-drift constant,
   member skip, no-assignee skip, accept no-op, id persistence, race regression

E. test_retrigger_on_reject_end_to_end integration test

Removed: _extract_rejection_reasons, _build_rejection_followup (summarisers)
Added: REWORK_INSTRUCTIONS constant, MulticaClient.get_agent_name

Unit: 63 passed. Integration: 1 passed.
This commit is contained in:
2026-04-15 22:24:39 +00:00
parent cb6156a4a1
commit 11d1d675dd
5 changed files with 453 additions and 220 deletions
+7
View File
@@ -118,3 +118,10 @@ class MulticaClient:
if agent.get("name") in wanted:
found[agent["name"]] = agent["id"]
return found
def get_agent_name(self, agent_id: str) -> str | None:
"""Return the display name of an agent by ID, or None if not found."""
for agent in self.list_agents():
if agent.get("id") == agent_id:
return agent.get("name") or None
return None
+141 -91
View File
@@ -106,6 +106,27 @@ _VERDICT_RE = re.compile(
r"^VERDICT:\s*(ACCEPT|REJECT)", re.MULTILINE | re.IGNORECASE
)
# ---------------------------------------------------------------------------
# Requirement anchoring — the single most important coordinator property.
# DO NOT paraphrase, summarise, or rewrite this constant. It is copied
# verbatim into every rejection retrigger comment so that the worker sees
# exactly the same text every time it is bounced back.
# ---------------------------------------------------------------------------
REWORK_INSTRUCTIONS = (
"You are being re-fired on this issue because the previous delivery did not satisfy "
"the ANCHOR requirements quoted above. You MUST compare your next delivery against "
"the ANCHOR text verbatim \u2014 not against your earlier framing, and not against "
"what you think the issue meant. If you find that the anchor cannot be satisfied with "
"the tools you have, set the issue status to `blocked` and post a comment explaining "
"exactly which anchor requirement cannot be met and why. Do NOT redefine scope. Do "
"NOT substitute an easier experiment, artifact, or deliverable for a harder one. Do "
"NOT ship a reinterpretation as if it were the requested thing. If any previous "
'debater evidence suggested your work was "mostly correct just with one gap," that '
"is advice about THIS rejection, not a license to make a one-line patch \u2014 "
"re-check the anchor first, then decide what the minimum change is."
)
# ---------------------------------------------------------------------------
# Internal helpers
@@ -258,121 +279,132 @@ def _build_debater_comment(
return "\n".join(parts)
def _extract_rejection_reasons(judge_reply: str) -> list[str]:
"""Extract up to 3 failure reasons from a REJECT verdict text.
Prefers explicit bullet / numbered list items; falls back to the first
two sentences of the reasoning block.
"""
# Strip the VERDICT line itself
text = _VERDICT_RE.sub("", judge_reply, count=1).strip()
bullets: list[str] = []
for line in text.splitlines():
stripped = line.strip()
if re.match(r"^[-*•]\s+\S", stripped) or re.match(r"^\d+[.)]\s+\S", stripped):
reason = re.sub(r"^[-*•\d.)\s]+", "", stripped).strip()
if reason:
bullets.append(reason)
if len(bullets) >= 3:
break
if bullets:
return bullets[:3]
# Fallback: first two non-empty sentences
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
return sentences[:2]
def _to_blockquote(text: str) -> str:
"""Wrap each line of text in a Markdown blockquote (``> `` prefix)."""
return "\n".join(f"> {line}" if line else ">" for line in text.splitlines())
def _build_rejection_followup(
def _build_retrigger_comment(
assignee_name: str,
assignee_agent_id: str,
judge_reply: str,
issue_description: str,
verdict_content: str,
round_id: str,
) -> str:
"""Short comment @-mentioning the assignee with the REJECT summary."""
mention = (
f"[@{assignee_name}](mention://agent/{assignee_agent_id})"
if assignee_agent_id
else f"@{assignee_name}"
)
reasons = _extract_rejection_reasons(judge_reply)
if reasons:
reason_block = "\n".join(f"- {r}" for r in reasons)
body = f"Top issues:\n{reason_block}"
else:
body = judge_reply[:400].strip()
"""Build the full anchor-bearing retrigger comment body.
return (
f"{mention} — VERDICT: REJECT\n\n"
f"{body}\n\n"
"Please address these issues and resubmit."
)
def _notify_assignee_on_reject(
round_: Round,
judge_reply: str,
client: MulticaClient,
logger: logging.Logger,
) -> None:
"""Post a rejection followup comment @-mentioning the original assignee.
Only fires when ``assignee_type == 'agent'``. Any failure is logged and
swallowed — round completion must not depend on this side-effect.
Structure (verbatim — do not rearrange or summarise any section):
1. @-mention to fire the agent
2. One-line verdict summary
3. ## ANCHOR — Original requirements (verbatim description, blockquoted)
4. ## Why this was rejected (verbatim verdict, blockquoted)
5. ## Instructions for rework (REWORK_INSTRUCTIONS constant)
6. Trailing audit line
"""
try:
issue = client.get_issue(round_.issue_id)
except Exception as exc:
logger.warning(
"round %s: cannot fetch issue for reject notification: %s",
round_.round_id, exc,
)
return
mention = f"[@{assignee_name}](mention://agent/{assignee_agent_id})"
desc_bq = _to_blockquote(issue_description.strip())
verdict_bq = _to_blockquote(verdict_content.strip())
return "\n\n".join([
mention,
f"Verdict: REJECT (round {round_id})",
f"## ANCHOR \u2014 Original requirements\n\n{desc_bq}",
f"## Why this was rejected\n\n{verdict_bq}",
f"## Instructions for rework\n\n{REWORK_INSTRUCTIONS}",
f"\u2014 Coordinator (auto-retrigger on reject, round {round_id})",
])
def _build_coordinator_note_no_agent(round_id: str, reason: str) -> str:
"""Coordinator-only note (no @-mention) when the assignee is not an agent."""
return (
f"Verdict: REJECT (round {round_id}) \u2014 {reason}. "
"Manual follow-up required."
)
def _post_rejection_retrigger(
round_: Round,
client: MulticaClient,
issue: dict[str, Any],
verdict_comment_content: str,
logger: logging.Logger,
) -> str | None:
"""Post the retrigger comment after a REJECT verdict.
Returns the comment ID of the retrigger comment when the assignee is an
agent (caller should persist it), or ``None`` when a non-mentioning
coordinator note was posted instead (member/no-assignee) or when posting
failed entirely.
Any failure is logged and swallowed — round completion must not depend on
this side-effect.
"""
assignee_type = issue.get("assignee_type", "")
assignee_id = issue.get("assignee_id", "")
assignee_id = issue.get("assignee_id", "") or ""
if assignee_type != "agent" or not assignee_id:
logger.debug(
"round %s: assignee_type=%r — skipping reject notification",
round_.round_id, assignee_type,
reason = (
f"original assignee is not an agent (assignee_type: {assignee_type!r})"
if assignee_type
else "no assignee set"
)
return
# Resolve the agent name from their ID
try:
agents = client.list_agents()
except Exception as exc:
logger.warning(
"round %s: cannot list agents for reject notification: %s",
round_.round_id, exc,
logger.info(
"round %s: %s — posting non-mentioning coordinator note",
round_.round_id, reason,
)
return
try:
client.post_comment(
round_.issue_id,
_build_coordinator_note_no_agent(round_.round_id, reason),
)
except Exception as exc:
logger.warning(
"round %s: cannot post coordinator note: %s",
round_.round_id, exc,
)
return None
assignee_name = next(
(a.get("name", "") for a in agents if a.get("id") == assignee_id),
"",
)
# Agent path — look up name (calls list_agents once)
assignee_name = client.get_agent_name(assignee_id)
if not assignee_name:
logger.warning(
"round %s: assignee agent %s not found in workspace — skipping reject notification",
"round %s: assignee agent %s not found in workspace — posting note without mention",
round_.round_id, assignee_id,
)
return
try:
client.post_comment(
round_.issue_id,
_build_coordinator_note_no_agent(
round_.round_id,
f"assignee agent {assignee_id!r} not found in workspace",
),
)
except Exception as exc:
logger.warning(
"round %s: cannot post coordinator note: %s",
round_.round_id, exc,
)
return None
body = _build_rejection_followup(assignee_name, assignee_id, judge_reply)
issue_description = issue.get("description", "") or ""
body = _build_retrigger_comment(
assignee_name, assignee_id, issue_description, verdict_comment_content, round_.round_id,
)
try:
client.post_comment(round_.issue_id, body)
posted = client.post_comment(round_.issue_id, body)
cid = posted.get("id", "")
logger.info(
"round %s: reject notification posted to @%s",
round_.round_id, assignee_name,
"round %s: retrigger comment posted to @%s (id=%s)",
round_.round_id, assignee_name, cid,
)
return cid
except Exception as exc:
logger.warning(
"round %s: cannot post reject notification: %s",
"round %s: cannot post retrigger comment: %s",
round_.round_id, exc,
)
return None
def _build_judge_comment(
@@ -625,9 +657,27 @@ def _advance_awaiting_judge(
)
return # do NOT mark done; retry next cycle
# On REJECT: re-trigger the original assignee via a short followup comment.
# On REJECT: fetch issue and post the full anchor retrigger comment.
if verdict == "REJECT":
_notify_assignee_on_reject(round_, judge_reply, client, logger)
try:
issue = client.get_issue(round_.issue_id)
except Exception as exc:
logger.warning(
"round %s: cannot fetch issue for retrigger — using empty dict: %s",
round_.round_id, exc,
)
issue = {}
retrigger_cid = _post_rejection_retrigger(
round_, client, issue, judge_reply, logger,
)
if retrigger_cid:
try:
queue.set_retrigger_comment_id(round_.round_id, retrigger_cid)
except Exception as exc:
logger.warning(
"round %s: cannot persist retrigger_comment_id: %s",
round_.round_id, exc,
)
phase = "accepted" if verdict == "ACCEPT" else "rejected"
queue.update_phase(round_.round_id, phase)
+11
View File
@@ -53,6 +53,7 @@ class Round:
phase_entered_at: str = "" # ISO8601 when current phase was entered (for timeout)
coordinator_comment_id: str = "" # ID of debater-mention comment posted by coordinator
judge_comment_id: str = "" # ID of judge-mention comment posted by coordinator
retrigger_comment_id: str = "" # ID of retrigger comment posted after REJECT (audit)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@@ -70,6 +71,7 @@ class Round:
phase_entered_at=d.get("phase_entered_at", ""),
coordinator_comment_id=d.get("coordinator_comment_id", ""),
judge_comment_id=d.get("judge_comment_id", ""),
retrigger_comment_id=d.get("retrigger_comment_id", ""),
)
@@ -128,6 +130,15 @@ class DebateQueue:
return
raise KeyError(f"round {round_id!r} not found in queue")
def set_retrigger_comment_id(self, round_id: str, comment_id: str) -> None:
"""Persist the retrigger comment ID for audit purposes."""
for r in self.rounds:
if r.round_id == round_id:
r.retrigger_comment_id = comment_id
self.save()
return
raise KeyError(f"round {round_id!r} not found in queue")
def update_phase(self, round_id: str, phase: str, **kwargs: str) -> None:
"""Update round phase (and optionally other string fields) in place and persist.
+143
View File
@@ -15,6 +15,7 @@ using the content-based fallback detector), verifies the result, then cleans up.
"""
from __future__ import annotations
import html
import logging
import os
import time
@@ -209,3 +210,145 @@ def test_full_debate_round_lifecycle(cfg, client, tmp_path):
logger.info("cleanup: issue %s set to done", issue_id)
except Exception as exc:
logger.warning("cleanup failed for issue %s: %s", issue_id, exc)
# ---------------------------------------------------------------------------
# Integration test: REJECT retrigger + anchor (WYL-51)
# ---------------------------------------------------------------------------
@pytest.mark.integration
def test_retrigger_on_reject_end_to_end(cfg, client, tmp_path):
"""End-to-end test: REJECT verdict triggers an anchor retrigger comment.
Steps:
1. Create scratch issue with multi-paragraph description + an agent assignee
2. Manually enqueue a Round at phase=awaiting_judge
3. Inject a VERDICT: REJECT comment via the content-marker fallback
4. Run _advance_awaiting_judge
5. Assert: issue=in_progress, retrigger comment exists with @mention,
verbatim description, and no-drift instruction
6. Cleanup
"""
from coordinator.orchestrator import (
REWORK_INSTRUCTIONS,
_advance_awaiting_judge,
)
from coordinator.queue import DebateQueue, Round
logger = logging.getLogger("test.integration.retrigger")
issue_id = None
# Multi-paragraph description with unique sentinel values for verbatim check
DESCRIPTION = (
"Integration test anchor paragraph one. Sentinel: ANCHOR-INTEG-PARA1.\n\n"
"Integration test anchor paragraph two. Sentinel: ANCHOR-INTEG-PARA2.\n\n"
"Acceptance criteria: must contain ANCHOR-INTEG-CRITERIA."
)
try:
# --- 1. Create scratch issue ---
issue = client.create_issue(
title="[DEBATE-SMOKE-TEST] WYL-51 retrigger smoke — auto-delete",
description=DESCRIPTION,
status="in_review",
)
issue_id = issue["id"]
logger.info("created scratch issue %s", issue_id)
# Set an agent assignee so retrigger fires with a mention.
# Pick the first available agent in the workspace.
agents = client.list_agents()
assignee = agents[0] if agents else None
if assignee:
client.update_issue(
issue_id,
assignee_id=assignee["id"],
assignee_type="agent",
)
logger.info("set assignee to %s (%s)", assignee.get("name"), assignee["id"])
# --- 2. Manually enqueue a Round at phase=awaiting_judge ---
queue = DebateQueue.load(tmp_path / "queue.json")
round_ = queue.enqueue(issue_id, "WYL-51-SMOKE", "Retrigger smoke test")
queue.update_status(round_.round_id, "running")
queue.update_phase(
round_.round_id,
"awaiting_judge",
phase_entered_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
coordinator_comment_id="",
)
# --- 3. Inject VERDICT: REJECT comment ---
verdict_body = (
"VERDICT: REJECT\n\n"
"Integration test rejection. The smoke test intentionally rejects this "
"delivery to verify the retrigger mechanism fires correctly."
)
client.post_comment(issue_id, verdict_body)
# Set judge_comment_id to empty so content-fallback triggers
round_.judge_comment_id = ""
queue.save()
# --- 4. Run _advance_awaiting_judge ---
_advance_awaiting_judge(round_, client, queue, cfg, logger)
# --- 5. Assertions ---
assert round_.status == "done", f"Expected round done, got {round_.status}"
assert round_.phase == "rejected", f"Expected rejected, got {round_.phase}"
# (a) Issue status must be in_progress
refreshed = client.get_issue(issue_id)
assert refreshed.get("status") == "in_progress", (
f"Issue should be in_progress after REJECT, got {refreshed.get('status')!r}"
)
# (b) A retrigger comment must exist on the issue
real_comments = client.list_comments(issue_id)
retrigger_cid = round_.retrigger_comment_id
if assignee:
# Agent path: retrigger_comment_id must be set
assert retrigger_cid, "retrigger_comment_id must be persisted on the round"
retrigger_comment = next(
(c for c in real_comments if c.get("id") == retrigger_cid),
None,
)
assert retrigger_comment is not None, (
f"Retrigger comment {retrigger_cid} not found in issue comments"
)
# The API HTML-encodes some chars (e.g. " → &#34;, > → &gt;) in returned
# content. Decode before comparing so assertions match plain-string constants.
body = html.unescape(retrigger_comment["content"])
# (b) @-mention present
assert assignee["id"] in body, (
"Retrigger comment must contain the assignee agent ID"
)
# (c) Full verbatim description present
for line in DESCRIPTION.splitlines():
if line.strip():
assert line in body, (
f"Description line {line!r} not found verbatim in retrigger comment"
)
# (d) No-drift instruction present
assert REWORK_INSTRUCTIONS in body, (
"REWORK_INSTRUCTIONS must appear verbatim in retrigger comment"
)
logger.info(
"retrigger comment (first 400 chars):\n%s",
body[:400],
)
logger.info("VERDICT OK — retrigger end-to-end passed")
finally:
# --- 6. Cleanup ---
if issue_id:
try:
client.update_issue_status(issue_id, "done")
logger.info("cleanup: issue %s set to done", issue_id)
except Exception as exc:
logger.warning("cleanup failed for issue %s: %s", issue_id, exc)
+151 -129
View File
@@ -13,13 +13,14 @@ import pytest
from coordinator.orchestrator import (
DEBATER_NAMES,
JUDGE_NAME,
REWORK_INSTRUCTIONS,
_advance_awaiting_debaters,
_advance_awaiting_judge,
_collect_debater_replies,
_collect_judge_reply,
_extract_rejection_reasons,
_find_commit_url,
_parse_verdict,
_post_rejection_retrigger,
_start_round,
orchestrate_pending,
)
@@ -92,6 +93,12 @@ class FakeClient:
wanted = set(names)
return {a["name"]: a["id"] for a in self.agents if a["name"] in wanted}
def get_agent_name(self, agent_id: str) -> str | None:
for a in self.agents:
if a.get("id") == agent_id:
return a.get("name")
return None
def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]:
self.status_updates.append((issue_id, status))
self.issue["status"] = status
@@ -738,40 +745,7 @@ def test_orchestrate_pending_advances_running(tmp_path):
# ---------------------------------------------------------------------------
# _extract_rejection_reasons
# ---------------------------------------------------------------------------
def test_extract_reasons_from_bullets():
text = "VERDICT: REJECT\n\n- Missing tests\n- No error handling\n- Bad performance"
reasons = _extract_rejection_reasons(text)
assert reasons == ["Missing tests", "No error handling", "Bad performance"]
def test_extract_reasons_from_numbered_list():
text = "VERDICT: REJECT\n\n1. Unit tests missing\n2. Docs incomplete"
reasons = _extract_rejection_reasons(text)
assert reasons == ["Unit tests missing", "Docs incomplete"]
def test_extract_reasons_capped_at_three():
text = "VERDICT: REJECT\n\n- R1\n- R2\n- R3\n- R4\n- R5"
assert len(_extract_rejection_reasons(text)) == 3
def test_extract_reasons_fallback_to_sentences():
text = "VERDICT: REJECT\n\nImplementation is broken. Tests are absent. Nothing works."
reasons = _extract_rejection_reasons(text)
assert len(reasons) <= 2
assert any("broken" in r.lower() for r in reasons)
def test_extract_reasons_empty_text():
reasons = _extract_rejection_reasons("VERDICT: REJECT")
assert isinstance(reasons, list)
# ---------------------------------------------------------------------------
# _advance_awaiting_judge — REJECT assignee notification (WYL-51)
# _advance_awaiting_judge — REJECT retrigger + anchor (WYL-51)
# ---------------------------------------------------------------------------
def _judge_round_with_agent_assignee(
@@ -779,128 +753,204 @@ def _judge_round_with_agent_assignee(
*,
assignee_id: str = "agent-worker",
assignee_name: str = "Senior Developer",
description: str = "Test issue description.",
) -> tuple[Round, DebateQueue, FakeClient]:
"""_judge_round helper pre-configured with an agent assignee."""
r, q, client = _judge_round(tmp_path)
client.issue["assignee_type"] = "agent"
client.issue["assignee_id"] = assignee_id
client.issue["description"] = description
client.agents.append({"name": assignee_name, "id": assignee_id})
return r, q, client
def test_reject_posts_followup_with_assignee_mention(tmp_path):
"""REJECT path: a followup comment is posted that @-mentions the agent assignee."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append({
def _verdict_comment(content: str = "VERDICT: REJECT\n\nDoes not satisfy requirements.") -> dict:
return {
"id": "verdict1",
"content": "VERDICT: REJECT\n\n- Missing tests\n- No error handling",
"content": content,
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
}
# D1
def test_reject_posts_retrigger_with_assignee_mention(tmp_path):
"""REJECT path: a retrigger comment is posted that @-mentions the agent assignee."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.phase == "rejected"
followup = next(
retrigger = next(
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
None,
)
assert followup is not None, "Expected followup comment with @-mention of assignee"
assert "REJECT" in followup
assert retrigger is not None, "Expected retrigger comment with @-mention of assignee"
assert "Senior Developer" in retrigger
assert "REJECT" in retrigger
def test_reject_followup_contains_verdict_reasoning(tmp_path):
"""REJECT followup contains extracted failure reasons from the judge reply."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append({
"id": "verdict1",
"content": (
"VERDICT: REJECT\n\n"
"- Unit tests are missing\n"
"- Error handling is incomplete\n"
"- Performance is unacceptable"
),
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
# D2
def test_reject_retrigger_includes_verbatim_original_description(tmp_path):
"""REJECT retrigger: full verbatim issue description appears in comment body."""
desc = (
"First paragraph of specific requirements.\n\n"
"Second paragraph with unique detail: abc-xyz-123.\n\n"
"Third paragraph with acceptance criteria: must-pass-qa-check."
)
r, q, client = _judge_round_with_agent_assignee(tmp_path, description=desc)
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
followup = next(
retrigger = next(
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
None,
)
assert followup is not None
assert "tests" in followup.lower() or "error" in followup.lower() or "performance" in followup.lower()
assert retrigger is not None
# Every non-empty line of the description must appear verbatim
for line in desc.splitlines():
if line.strip():
assert line in retrigger, (
f"Line {line!r} not found verbatim in retrigger comment"
)
def test_reject_followup_includes_retry_request(tmp_path):
"""REJECT followup asks the assignee to retry."""
# D3
def test_reject_retrigger_includes_no_drift_instruction(tmp_path):
"""REJECT retrigger: REWORK_INSTRUCTIONS constant appears verbatim in comment."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append({
"id": "verdict1",
"content": "VERDICT: REJECT\n\nDoes not meet acceptance criteria.",
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
followup = next(
retrigger = next(
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
None,
)
assert followup is not None
assert "resubmit" in followup.lower() or "retry" in followup.lower()
assert retrigger is not None
assert REWORK_INSTRUCTIONS in retrigger, (
"REWORK_INSTRUCTIONS constant must appear verbatim in retrigger comment"
)
assert "Do NOT redefine scope" in retrigger
def test_reject_no_followup_for_member_assignee(tmp_path):
"""REJECT path: no followup when assignee_type == 'member' (leave to human)."""
# D4
def test_reject_retrigger_skipped_if_assignee_is_member(tmp_path):
"""REJECT: member assignee → non-mentioning coordinator comment, no @-mention."""
r, q, client = _judge_round(tmp_path)
client.issue["assignee_type"] = "member"
client.issue["assignee_id"] = "user-123"
client.comments.append({
"id": "verdict1",
"content": "VERDICT: REJECT\n\nNeeds more work.",
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.phase == "rejected"
assert all("mention://" not in c for c in client.posted_comments)
# A coordinator comment IS posted (for audit), but without any @-mention
assert len(client.posted_comments) == 1
assert "mention://" not in client.posted_comments[0]
assert "Manual follow-up required" in client.posted_comments[0]
def test_reject_no_followup_when_no_assignee(tmp_path):
"""REJECT path: no followup when issue has no assignee set."""
# D5
def test_reject_retrigger_skipped_if_no_assignee(tmp_path):
"""REJECT: no assignee → non-mentioning coordinator comment, no @-mention."""
r, q, client = _judge_round(tmp_path)
# Default FakeClient issue has no assignee fields
client.comments.append({
"id": "verdict1",
"content": "VERDICT: REJECT\n\nNo work done.",
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.phase == "rejected"
assert len(client.posted_comments) == 0
assert len(client.posted_comments) == 1
assert "mention://" not in client.posted_comments[0]
assert "Manual follow-up required" in client.posted_comments[0]
def test_reject_notification_failure_does_not_block_round_completion(tmp_path):
"""REJECT followup: round completes even if posting the notification fails."""
# D6
def test_accept_does_not_post_retrigger(tmp_path):
"""ACCEPT path: no retrigger comment is posted."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append(_verdict_comment("VERDICT: ACCEPT\n\nExcellent work!"))
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.phase == "accepted"
assert all("mention://agent/agent-worker" not in c for c in client.posted_comments)
# D7
def test_retrigger_comment_id_persisted_on_round(tmp_path):
"""After REJECT, round.retrigger_comment_id is persisted to queue.json."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.retrigger_comment_id, "retrigger_comment_id should be set on the round"
# Reload from disk and verify persistence
q2 = DebateQueue.load(tmp_path / "queue.json")
r2 = q2.rounds[0]
assert r2.retrigger_comment_id == r.retrigger_comment_id, (
"retrigger_comment_id must survive a queue reload"
)
# D8
def test_retrigger_happens_after_status_update_not_before(tmp_path):
"""Race regression: issue:in_progress must precede the retrigger post_comment."""
call_order: list[str] = []
class TrackingClient(FakeClient):
def update_issue_status(self, issue_id, status):
call_order.append(f"issue:{status}")
return super().update_issue_status(issue_id, status)
def post_comment(self, issue_id, content):
call_order.append("post_comment")
return super().post_comment(issue_id, content)
r = _make_round(
status="running",
phase="awaiting_judge",
phase_entered_at=_utcnow(),
coordinator_comment_id="coord-c",
judge_comment_id="judge-c",
)
q = _make_queue(tmp_path, r)
client = TrackingClient()
client.issue["assignee_type"] = "agent"
client.issue["assignee_id"] = "agent-worker"
client.agents = [
{"name": JUDGE_NAME, "id": "agent-judge"},
{"name": "Senior Developer", "id": "agent-worker"},
]
client.comments = [
{"id": "judge-c", "content": "x", "author_id": "coord", "created_at": _past(5)},
{"id": "v1", "content": "VERDICT: REJECT\n\nFailed.", "author_id": "agent-judge",
"author_type": "agent", "created_at": _utcnow()},
]
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert "issue:in_progress" in call_order, "issue status update must happen"
assert "post_comment" in call_order, "retrigger post_comment must happen"
assert call_order.index("issue:in_progress") < call_order.index("post_comment"), (
f"issue:in_progress must precede post_comment, got: {call_order}"
)
def test_reject_retrigger_failure_does_not_block_round_completion(tmp_path):
"""REJECT retrigger: round completes even if posting the retrigger comment fails."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
original_post = FakeClient.post_comment
@@ -909,42 +959,14 @@ def test_reject_notification_failure_does_not_block_round_completion(tmp_path):
def post_with_failure(self, issue_id, content):
call_count[0] += 1
if call_count[0] > 1:
raise RuntimeError("API error on notification")
raise RuntimeError("API error on retrigger")
return original_post(self, issue_id, content)
FakeClient.post_comment = post_with_failure
try:
client.comments.append({
"id": "verdict1",
"content": "VERDICT: REJECT\n\nFails acceptance criteria.",
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
client.comments.append(_verdict_comment())
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done", "Round must complete even if notification fails"
assert r.status == "done", "Round must complete even if retrigger posting fails"
assert r.phase == "rejected"
finally:
FakeClient.post_comment = original_post
def test_accept_does_not_post_rejection_followup(tmp_path):
"""ACCEPT path: no assignee followup comment is posted."""
r, q, client = _judge_round_with_agent_assignee(tmp_path)
client.comments.append({
"id": "verdict1",
"content": "VERDICT: ACCEPT\n\nExcellent work!",
"author_id": "agent-judge",
"author_type": "agent",
"created_at": _utcnow(),
})
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
assert r.status == "done"
assert r.phase == "accepted"
# No followup @-mention to the assignee
assert all("mention://agent/agent-worker" not in c for c in client.posted_comments)