WYL-51 round 2: requirement anchoring retrigger comment
Replace the round-1 summariser approach with the full anchor pattern: A. _post_rejection_retrigger(round_, client, issue, verdict_comment_content, logger) - Renamed from _notify_assignee_on_reject - Non-agent/no-assignee path: post a non-mentioning coordinator note and return - Agent path: build full anchor comment via _build_retrigger_comment B. Anchor comment structure (verbatim, no summarising): 1. [@AssigneeName](mention://agent/<id>) 2. Verdict: REJECT (round <id>) 3. ## ANCHOR — Original requirements + full issue description in blockquote 4. ## Why this was rejected + full judge verdict in blockquote 5. ## Instructions for rework + REWORK_INSTRUCTIONS constant (verbatim) 6. Trailing audit line C. Round.retrigger_comment_id + DebateQueue.set_retrigger_comment_id D. 8 required tests (D1–D8): mention, verbatim description, no-drift constant, member skip, no-assignee skip, accept no-op, id persistence, race regression E. test_retrigger_on_reject_end_to_end integration test Removed: _extract_rejection_reasons, _build_rejection_followup (summarisers) Added: REWORK_INSTRUCTIONS constant, MulticaClient.get_agent_name Unit: 63 passed. Integration: 1 passed.
This commit is contained in:
@@ -118,3 +118,10 @@ class MulticaClient:
|
||||
if agent.get("name") in wanted:
|
||||
found[agent["name"]] = agent["id"]
|
||||
return found
|
||||
|
||||
def get_agent_name(self, agent_id: str) -> str | None:
|
||||
"""Return the display name of an agent by ID, or None if not found."""
|
||||
for agent in self.list_agents():
|
||||
if agent.get("id") == agent_id:
|
||||
return agent.get("name") or None
|
||||
return None
|
||||
|
||||
+141
-91
@@ -106,6 +106,27 @@ _VERDICT_RE = re.compile(
|
||||
r"^VERDICT:\s*(ACCEPT|REJECT)", re.MULTILINE | re.IGNORECASE
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Requirement anchoring — the single most important coordinator property.
|
||||
# DO NOT paraphrase, summarise, or rewrite this constant. It is copied
|
||||
# verbatim into every rejection retrigger comment so that the worker sees
|
||||
# exactly the same text every time it is bounced back.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
REWORK_INSTRUCTIONS = (
|
||||
"You are being re-fired on this issue because the previous delivery did not satisfy "
|
||||
"the ANCHOR requirements quoted above. You MUST compare your next delivery against "
|
||||
"the ANCHOR text verbatim \u2014 not against your earlier framing, and not against "
|
||||
"what you think the issue meant. If you find that the anchor cannot be satisfied with "
|
||||
"the tools you have, set the issue status to `blocked` and post a comment explaining "
|
||||
"exactly which anchor requirement cannot be met and why. Do NOT redefine scope. Do "
|
||||
"NOT substitute an easier experiment, artifact, or deliverable for a harder one. Do "
|
||||
"NOT ship a reinterpretation as if it were the requested thing. If any previous "
|
||||
'debater evidence suggested your work was "mostly correct just with one gap," that '
|
||||
"is advice about THIS rejection, not a license to make a one-line patch \u2014 "
|
||||
"re-check the anchor first, then decide what the minimum change is."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
@@ -258,121 +279,132 @@ def _build_debater_comment(
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _extract_rejection_reasons(judge_reply: str) -> list[str]:
|
||||
"""Extract up to 3 failure reasons from a REJECT verdict text.
|
||||
|
||||
Prefers explicit bullet / numbered list items; falls back to the first
|
||||
two sentences of the reasoning block.
|
||||
"""
|
||||
# Strip the VERDICT line itself
|
||||
text = _VERDICT_RE.sub("", judge_reply, count=1).strip()
|
||||
|
||||
bullets: list[str] = []
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if re.match(r"^[-*•]\s+\S", stripped) or re.match(r"^\d+[.)]\s+\S", stripped):
|
||||
reason = re.sub(r"^[-*•\d.)\s]+", "", stripped).strip()
|
||||
if reason:
|
||||
bullets.append(reason)
|
||||
if len(bullets) >= 3:
|
||||
break
|
||||
|
||||
if bullets:
|
||||
return bullets[:3]
|
||||
|
||||
# Fallback: first two non-empty sentences
|
||||
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
|
||||
return sentences[:2]
|
||||
def _to_blockquote(text: str) -> str:
|
||||
"""Wrap each line of text in a Markdown blockquote (``> `` prefix)."""
|
||||
return "\n".join(f"> {line}" if line else ">" for line in text.splitlines())
|
||||
|
||||
|
||||
def _build_rejection_followup(
|
||||
def _build_retrigger_comment(
|
||||
assignee_name: str,
|
||||
assignee_agent_id: str,
|
||||
judge_reply: str,
|
||||
issue_description: str,
|
||||
verdict_content: str,
|
||||
round_id: str,
|
||||
) -> str:
|
||||
"""Short comment @-mentioning the assignee with the REJECT summary."""
|
||||
mention = (
|
||||
f"[@{assignee_name}](mention://agent/{assignee_agent_id})"
|
||||
if assignee_agent_id
|
||||
else f"@{assignee_name}"
|
||||
)
|
||||
reasons = _extract_rejection_reasons(judge_reply)
|
||||
if reasons:
|
||||
reason_block = "\n".join(f"- {r}" for r in reasons)
|
||||
body = f"Top issues:\n{reason_block}"
|
||||
else:
|
||||
body = judge_reply[:400].strip()
|
||||
"""Build the full anchor-bearing retrigger comment body.
|
||||
|
||||
return (
|
||||
f"{mention} — VERDICT: REJECT\n\n"
|
||||
f"{body}\n\n"
|
||||
"Please address these issues and resubmit."
|
||||
)
|
||||
|
||||
|
||||
def _notify_assignee_on_reject(
|
||||
round_: Round,
|
||||
judge_reply: str,
|
||||
client: MulticaClient,
|
||||
logger: logging.Logger,
|
||||
) -> None:
|
||||
"""Post a rejection followup comment @-mentioning the original assignee.
|
||||
|
||||
Only fires when ``assignee_type == 'agent'``. Any failure is logged and
|
||||
swallowed — round completion must not depend on this side-effect.
|
||||
Structure (verbatim — do not rearrange or summarise any section):
|
||||
1. @-mention to fire the agent
|
||||
2. One-line verdict summary
|
||||
3. ## ANCHOR — Original requirements (verbatim description, blockquoted)
|
||||
4. ## Why this was rejected (verbatim verdict, blockquoted)
|
||||
5. ## Instructions for rework (REWORK_INSTRUCTIONS constant)
|
||||
6. Trailing audit line
|
||||
"""
|
||||
try:
|
||||
issue = client.get_issue(round_.issue_id)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot fetch issue for reject notification: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
return
|
||||
mention = f"[@{assignee_name}](mention://agent/{assignee_agent_id})"
|
||||
desc_bq = _to_blockquote(issue_description.strip())
|
||||
verdict_bq = _to_blockquote(verdict_content.strip())
|
||||
|
||||
return "\n\n".join([
|
||||
mention,
|
||||
f"Verdict: REJECT (round {round_id})",
|
||||
f"## ANCHOR \u2014 Original requirements\n\n{desc_bq}",
|
||||
f"## Why this was rejected\n\n{verdict_bq}",
|
||||
f"## Instructions for rework\n\n{REWORK_INSTRUCTIONS}",
|
||||
f"\u2014 Coordinator (auto-retrigger on reject, round {round_id})",
|
||||
])
|
||||
|
||||
|
||||
def _build_coordinator_note_no_agent(round_id: str, reason: str) -> str:
|
||||
"""Coordinator-only note (no @-mention) when the assignee is not an agent."""
|
||||
return (
|
||||
f"Verdict: REJECT (round {round_id}) \u2014 {reason}. "
|
||||
"Manual follow-up required."
|
||||
)
|
||||
|
||||
|
||||
def _post_rejection_retrigger(
|
||||
round_: Round,
|
||||
client: MulticaClient,
|
||||
issue: dict[str, Any],
|
||||
verdict_comment_content: str,
|
||||
logger: logging.Logger,
|
||||
) -> str | None:
|
||||
"""Post the retrigger comment after a REJECT verdict.
|
||||
|
||||
Returns the comment ID of the retrigger comment when the assignee is an
|
||||
agent (caller should persist it), or ``None`` when a non-mentioning
|
||||
coordinator note was posted instead (member/no-assignee) or when posting
|
||||
failed entirely.
|
||||
|
||||
Any failure is logged and swallowed — round completion must not depend on
|
||||
this side-effect.
|
||||
"""
|
||||
assignee_type = issue.get("assignee_type", "")
|
||||
assignee_id = issue.get("assignee_id", "")
|
||||
assignee_id = issue.get("assignee_id", "") or ""
|
||||
|
||||
if assignee_type != "agent" or not assignee_id:
|
||||
logger.debug(
|
||||
"round %s: assignee_type=%r — skipping reject notification",
|
||||
round_.round_id, assignee_type,
|
||||
reason = (
|
||||
f"original assignee is not an agent (assignee_type: {assignee_type!r})"
|
||||
if assignee_type
|
||||
else "no assignee set"
|
||||
)
|
||||
return
|
||||
|
||||
# Resolve the agent name from their ID
|
||||
try:
|
||||
agents = client.list_agents()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot list agents for reject notification: %s",
|
||||
round_.round_id, exc,
|
||||
logger.info(
|
||||
"round %s: %s — posting non-mentioning coordinator note",
|
||||
round_.round_id, reason,
|
||||
)
|
||||
return
|
||||
try:
|
||||
client.post_comment(
|
||||
round_.issue_id,
|
||||
_build_coordinator_note_no_agent(round_.round_id, reason),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot post coordinator note: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
assignee_name = next(
|
||||
(a.get("name", "") for a in agents if a.get("id") == assignee_id),
|
||||
"",
|
||||
)
|
||||
# Agent path — look up name (calls list_agents once)
|
||||
assignee_name = client.get_agent_name(assignee_id)
|
||||
if not assignee_name:
|
||||
logger.warning(
|
||||
"round %s: assignee agent %s not found in workspace — skipping reject notification",
|
||||
"round %s: assignee agent %s not found in workspace — posting note without mention",
|
||||
round_.round_id, assignee_id,
|
||||
)
|
||||
return
|
||||
try:
|
||||
client.post_comment(
|
||||
round_.issue_id,
|
||||
_build_coordinator_note_no_agent(
|
||||
round_.round_id,
|
||||
f"assignee agent {assignee_id!r} not found in workspace",
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot post coordinator note: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
body = _build_rejection_followup(assignee_name, assignee_id, judge_reply)
|
||||
issue_description = issue.get("description", "") or ""
|
||||
body = _build_retrigger_comment(
|
||||
assignee_name, assignee_id, issue_description, verdict_comment_content, round_.round_id,
|
||||
)
|
||||
try:
|
||||
client.post_comment(round_.issue_id, body)
|
||||
posted = client.post_comment(round_.issue_id, body)
|
||||
cid = posted.get("id", "")
|
||||
logger.info(
|
||||
"round %s: reject notification posted to @%s",
|
||||
round_.round_id, assignee_name,
|
||||
"round %s: retrigger comment posted to @%s (id=%s)",
|
||||
round_.round_id, assignee_name, cid,
|
||||
)
|
||||
return cid
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot post reject notification: %s",
|
||||
"round %s: cannot post retrigger comment: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _build_judge_comment(
|
||||
@@ -625,9 +657,27 @@ def _advance_awaiting_judge(
|
||||
)
|
||||
return # do NOT mark done; retry next cycle
|
||||
|
||||
# On REJECT: re-trigger the original assignee via a short followup comment.
|
||||
# On REJECT: fetch issue and post the full anchor retrigger comment.
|
||||
if verdict == "REJECT":
|
||||
_notify_assignee_on_reject(round_, judge_reply, client, logger)
|
||||
try:
|
||||
issue = client.get_issue(round_.issue_id)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot fetch issue for retrigger — using empty dict: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
issue = {}
|
||||
retrigger_cid = _post_rejection_retrigger(
|
||||
round_, client, issue, judge_reply, logger,
|
||||
)
|
||||
if retrigger_cid:
|
||||
try:
|
||||
queue.set_retrigger_comment_id(round_.round_id, retrigger_cid)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"round %s: cannot persist retrigger_comment_id: %s",
|
||||
round_.round_id, exc,
|
||||
)
|
||||
|
||||
phase = "accepted" if verdict == "ACCEPT" else "rejected"
|
||||
queue.update_phase(round_.round_id, phase)
|
||||
|
||||
@@ -53,6 +53,7 @@ class Round:
|
||||
phase_entered_at: str = "" # ISO8601 when current phase was entered (for timeout)
|
||||
coordinator_comment_id: str = "" # ID of debater-mention comment posted by coordinator
|
||||
judge_comment_id: str = "" # ID of judge-mention comment posted by coordinator
|
||||
retrigger_comment_id: str = "" # ID of retrigger comment posted after REJECT (audit)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
@@ -70,6 +71,7 @@ class Round:
|
||||
phase_entered_at=d.get("phase_entered_at", ""),
|
||||
coordinator_comment_id=d.get("coordinator_comment_id", ""),
|
||||
judge_comment_id=d.get("judge_comment_id", ""),
|
||||
retrigger_comment_id=d.get("retrigger_comment_id", ""),
|
||||
)
|
||||
|
||||
|
||||
@@ -128,6 +130,15 @@ class DebateQueue:
|
||||
return
|
||||
raise KeyError(f"round {round_id!r} not found in queue")
|
||||
|
||||
def set_retrigger_comment_id(self, round_id: str, comment_id: str) -> None:
|
||||
"""Persist the retrigger comment ID for audit purposes."""
|
||||
for r in self.rounds:
|
||||
if r.round_id == round_id:
|
||||
r.retrigger_comment_id = comment_id
|
||||
self.save()
|
||||
return
|
||||
raise KeyError(f"round {round_id!r} not found in queue")
|
||||
|
||||
def update_phase(self, round_id: str, phase: str, **kwargs: str) -> None:
|
||||
"""Update round phase (and optionally other string fields) in place and persist.
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ using the content-based fallback detector), verifies the result, then cleans up.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
@@ -209,3 +210,145 @@ def test_full_debate_round_lifecycle(cfg, client, tmp_path):
|
||||
logger.info("cleanup: issue %s set to done", issue_id)
|
||||
except Exception as exc:
|
||||
logger.warning("cleanup failed for issue %s: %s", issue_id, exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration test: REJECT retrigger + anchor (WYL-51)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_retrigger_on_reject_end_to_end(cfg, client, tmp_path):
|
||||
"""End-to-end test: REJECT verdict triggers an anchor retrigger comment.
|
||||
|
||||
Steps:
|
||||
1. Create scratch issue with multi-paragraph description + an agent assignee
|
||||
2. Manually enqueue a Round at phase=awaiting_judge
|
||||
3. Inject a VERDICT: REJECT comment via the content-marker fallback
|
||||
4. Run _advance_awaiting_judge
|
||||
5. Assert: issue=in_progress, retrigger comment exists with @mention,
|
||||
verbatim description, and no-drift instruction
|
||||
6. Cleanup
|
||||
"""
|
||||
from coordinator.orchestrator import (
|
||||
REWORK_INSTRUCTIONS,
|
||||
_advance_awaiting_judge,
|
||||
)
|
||||
from coordinator.queue import DebateQueue, Round
|
||||
|
||||
logger = logging.getLogger("test.integration.retrigger")
|
||||
issue_id = None
|
||||
|
||||
# Multi-paragraph description with unique sentinel values for verbatim check
|
||||
DESCRIPTION = (
|
||||
"Integration test anchor paragraph one. Sentinel: ANCHOR-INTEG-PARA1.\n\n"
|
||||
"Integration test anchor paragraph two. Sentinel: ANCHOR-INTEG-PARA2.\n\n"
|
||||
"Acceptance criteria: must contain ANCHOR-INTEG-CRITERIA."
|
||||
)
|
||||
|
||||
try:
|
||||
# --- 1. Create scratch issue ---
|
||||
issue = client.create_issue(
|
||||
title="[DEBATE-SMOKE-TEST] WYL-51 retrigger smoke — auto-delete",
|
||||
description=DESCRIPTION,
|
||||
status="in_review",
|
||||
)
|
||||
issue_id = issue["id"]
|
||||
logger.info("created scratch issue %s", issue_id)
|
||||
|
||||
# Set an agent assignee so retrigger fires with a mention.
|
||||
# Pick the first available agent in the workspace.
|
||||
agents = client.list_agents()
|
||||
assignee = agents[0] if agents else None
|
||||
if assignee:
|
||||
client.update_issue(
|
||||
issue_id,
|
||||
assignee_id=assignee["id"],
|
||||
assignee_type="agent",
|
||||
)
|
||||
logger.info("set assignee to %s (%s)", assignee.get("name"), assignee["id"])
|
||||
|
||||
# --- 2. Manually enqueue a Round at phase=awaiting_judge ---
|
||||
queue = DebateQueue.load(tmp_path / "queue.json")
|
||||
round_ = queue.enqueue(issue_id, "WYL-51-SMOKE", "Retrigger smoke test")
|
||||
queue.update_status(round_.round_id, "running")
|
||||
queue.update_phase(
|
||||
round_.round_id,
|
||||
"awaiting_judge",
|
||||
phase_entered_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
coordinator_comment_id="",
|
||||
)
|
||||
|
||||
# --- 3. Inject VERDICT: REJECT comment ---
|
||||
verdict_body = (
|
||||
"VERDICT: REJECT\n\n"
|
||||
"Integration test rejection. The smoke test intentionally rejects this "
|
||||
"delivery to verify the retrigger mechanism fires correctly."
|
||||
)
|
||||
client.post_comment(issue_id, verdict_body)
|
||||
|
||||
# Set judge_comment_id to empty so content-fallback triggers
|
||||
round_.judge_comment_id = ""
|
||||
queue.save()
|
||||
|
||||
# --- 4. Run _advance_awaiting_judge ---
|
||||
_advance_awaiting_judge(round_, client, queue, cfg, logger)
|
||||
|
||||
# --- 5. Assertions ---
|
||||
assert round_.status == "done", f"Expected round done, got {round_.status}"
|
||||
assert round_.phase == "rejected", f"Expected rejected, got {round_.phase}"
|
||||
|
||||
# (a) Issue status must be in_progress
|
||||
refreshed = client.get_issue(issue_id)
|
||||
assert refreshed.get("status") == "in_progress", (
|
||||
f"Issue should be in_progress after REJECT, got {refreshed.get('status')!r}"
|
||||
)
|
||||
|
||||
# (b) A retrigger comment must exist on the issue
|
||||
real_comments = client.list_comments(issue_id)
|
||||
retrigger_cid = round_.retrigger_comment_id
|
||||
if assignee:
|
||||
# Agent path: retrigger_comment_id must be set
|
||||
assert retrigger_cid, "retrigger_comment_id must be persisted on the round"
|
||||
retrigger_comment = next(
|
||||
(c for c in real_comments if c.get("id") == retrigger_cid),
|
||||
None,
|
||||
)
|
||||
assert retrigger_comment is not None, (
|
||||
f"Retrigger comment {retrigger_cid} not found in issue comments"
|
||||
)
|
||||
# The API HTML-encodes some chars (e.g. " → ", > → >) in returned
|
||||
# content. Decode before comparing so assertions match plain-string constants.
|
||||
body = html.unescape(retrigger_comment["content"])
|
||||
|
||||
# (b) @-mention present
|
||||
assert assignee["id"] in body, (
|
||||
"Retrigger comment must contain the assignee agent ID"
|
||||
)
|
||||
|
||||
# (c) Full verbatim description present
|
||||
for line in DESCRIPTION.splitlines():
|
||||
if line.strip():
|
||||
assert line in body, (
|
||||
f"Description line {line!r} not found verbatim in retrigger comment"
|
||||
)
|
||||
|
||||
# (d) No-drift instruction present
|
||||
assert REWORK_INSTRUCTIONS in body, (
|
||||
"REWORK_INSTRUCTIONS must appear verbatim in retrigger comment"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"retrigger comment (first 400 chars):\n%s",
|
||||
body[:400],
|
||||
)
|
||||
|
||||
logger.info("VERDICT OK — retrigger end-to-end passed")
|
||||
|
||||
finally:
|
||||
# --- 6. Cleanup ---
|
||||
if issue_id:
|
||||
try:
|
||||
client.update_issue_status(issue_id, "done")
|
||||
logger.info("cleanup: issue %s set to done", issue_id)
|
||||
except Exception as exc:
|
||||
logger.warning("cleanup failed for issue %s: %s", issue_id, exc)
|
||||
|
||||
+151
-129
@@ -13,13 +13,14 @@ import pytest
|
||||
from coordinator.orchestrator import (
|
||||
DEBATER_NAMES,
|
||||
JUDGE_NAME,
|
||||
REWORK_INSTRUCTIONS,
|
||||
_advance_awaiting_debaters,
|
||||
_advance_awaiting_judge,
|
||||
_collect_debater_replies,
|
||||
_collect_judge_reply,
|
||||
_extract_rejection_reasons,
|
||||
_find_commit_url,
|
||||
_parse_verdict,
|
||||
_post_rejection_retrigger,
|
||||
_start_round,
|
||||
orchestrate_pending,
|
||||
)
|
||||
@@ -92,6 +93,12 @@ class FakeClient:
|
||||
wanted = set(names)
|
||||
return {a["name"]: a["id"] for a in self.agents if a["name"] in wanted}
|
||||
|
||||
def get_agent_name(self, agent_id: str) -> str | None:
|
||||
for a in self.agents:
|
||||
if a.get("id") == agent_id:
|
||||
return a.get("name")
|
||||
return None
|
||||
|
||||
def update_issue_status(self, issue_id: str, status: str) -> dict[str, Any]:
|
||||
self.status_updates.append((issue_id, status))
|
||||
self.issue["status"] = status
|
||||
@@ -738,40 +745,7 @@ def test_orchestrate_pending_advances_running(tmp_path):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_rejection_reasons
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_extract_reasons_from_bullets():
|
||||
text = "VERDICT: REJECT\n\n- Missing tests\n- No error handling\n- Bad performance"
|
||||
reasons = _extract_rejection_reasons(text)
|
||||
assert reasons == ["Missing tests", "No error handling", "Bad performance"]
|
||||
|
||||
|
||||
def test_extract_reasons_from_numbered_list():
|
||||
text = "VERDICT: REJECT\n\n1. Unit tests missing\n2. Docs incomplete"
|
||||
reasons = _extract_rejection_reasons(text)
|
||||
assert reasons == ["Unit tests missing", "Docs incomplete"]
|
||||
|
||||
|
||||
def test_extract_reasons_capped_at_three():
|
||||
text = "VERDICT: REJECT\n\n- R1\n- R2\n- R3\n- R4\n- R5"
|
||||
assert len(_extract_rejection_reasons(text)) == 3
|
||||
|
||||
|
||||
def test_extract_reasons_fallback_to_sentences():
|
||||
text = "VERDICT: REJECT\n\nImplementation is broken. Tests are absent. Nothing works."
|
||||
reasons = _extract_rejection_reasons(text)
|
||||
assert len(reasons) <= 2
|
||||
assert any("broken" in r.lower() for r in reasons)
|
||||
|
||||
|
||||
def test_extract_reasons_empty_text():
|
||||
reasons = _extract_rejection_reasons("VERDICT: REJECT")
|
||||
assert isinstance(reasons, list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _advance_awaiting_judge — REJECT assignee notification (WYL-51)
|
||||
# _advance_awaiting_judge — REJECT retrigger + anchor (WYL-51)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _judge_round_with_agent_assignee(
|
||||
@@ -779,128 +753,204 @@ def _judge_round_with_agent_assignee(
|
||||
*,
|
||||
assignee_id: str = "agent-worker",
|
||||
assignee_name: str = "Senior Developer",
|
||||
description: str = "Test issue description.",
|
||||
) -> tuple[Round, DebateQueue, FakeClient]:
|
||||
"""_judge_round helper pre-configured with an agent assignee."""
|
||||
r, q, client = _judge_round(tmp_path)
|
||||
client.issue["assignee_type"] = "agent"
|
||||
client.issue["assignee_id"] = assignee_id
|
||||
client.issue["description"] = description
|
||||
client.agents.append({"name": assignee_name, "id": assignee_id})
|
||||
return r, q, client
|
||||
|
||||
|
||||
def test_reject_posts_followup_with_assignee_mention(tmp_path):
|
||||
"""REJECT path: a followup comment is posted that @-mentions the agent assignee."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append({
|
||||
def _verdict_comment(content: str = "VERDICT: REJECT\n\nDoes not satisfy requirements.") -> dict:
|
||||
return {
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: REJECT\n\n- Missing tests\n- No error handling",
|
||||
"content": content,
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
# D1
|
||||
def test_reject_posts_retrigger_with_assignee_mention(tmp_path):
|
||||
"""REJECT path: a retrigger comment is posted that @-mentions the agent assignee."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.phase == "rejected"
|
||||
followup = next(
|
||||
retrigger = next(
|
||||
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
|
||||
None,
|
||||
)
|
||||
assert followup is not None, "Expected followup comment with @-mention of assignee"
|
||||
assert "REJECT" in followup
|
||||
assert retrigger is not None, "Expected retrigger comment with @-mention of assignee"
|
||||
assert "Senior Developer" in retrigger
|
||||
assert "REJECT" in retrigger
|
||||
|
||||
|
||||
def test_reject_followup_contains_verdict_reasoning(tmp_path):
|
||||
"""REJECT followup contains extracted failure reasons from the judge reply."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": (
|
||||
"VERDICT: REJECT\n\n"
|
||||
"- Unit tests are missing\n"
|
||||
"- Error handling is incomplete\n"
|
||||
"- Performance is unacceptable"
|
||||
),
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
# D2
|
||||
def test_reject_retrigger_includes_verbatim_original_description(tmp_path):
|
||||
"""REJECT retrigger: full verbatim issue description appears in comment body."""
|
||||
desc = (
|
||||
"First paragraph of specific requirements.\n\n"
|
||||
"Second paragraph with unique detail: abc-xyz-123.\n\n"
|
||||
"Third paragraph with acceptance criteria: must-pass-qa-check."
|
||||
)
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path, description=desc)
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
followup = next(
|
||||
retrigger = next(
|
||||
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
|
||||
None,
|
||||
)
|
||||
assert followup is not None
|
||||
assert "tests" in followup.lower() or "error" in followup.lower() or "performance" in followup.lower()
|
||||
assert retrigger is not None
|
||||
# Every non-empty line of the description must appear verbatim
|
||||
for line in desc.splitlines():
|
||||
if line.strip():
|
||||
assert line in retrigger, (
|
||||
f"Line {line!r} not found verbatim in retrigger comment"
|
||||
)
|
||||
|
||||
|
||||
def test_reject_followup_includes_retry_request(tmp_path):
|
||||
"""REJECT followup asks the assignee to retry."""
|
||||
# D3
|
||||
def test_reject_retrigger_includes_no_drift_instruction(tmp_path):
|
||||
"""REJECT retrigger: REWORK_INSTRUCTIONS constant appears verbatim in comment."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: REJECT\n\nDoes not meet acceptance criteria.",
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
followup = next(
|
||||
retrigger = next(
|
||||
(c for c in client.posted_comments if "mention://agent/agent-worker" in c),
|
||||
None,
|
||||
)
|
||||
assert followup is not None
|
||||
assert "resubmit" in followup.lower() or "retry" in followup.lower()
|
||||
assert retrigger is not None
|
||||
assert REWORK_INSTRUCTIONS in retrigger, (
|
||||
"REWORK_INSTRUCTIONS constant must appear verbatim in retrigger comment"
|
||||
)
|
||||
assert "Do NOT redefine scope" in retrigger
|
||||
|
||||
|
||||
def test_reject_no_followup_for_member_assignee(tmp_path):
|
||||
"""REJECT path: no followup when assignee_type == 'member' (leave to human)."""
|
||||
# D4
|
||||
def test_reject_retrigger_skipped_if_assignee_is_member(tmp_path):
|
||||
"""REJECT: member assignee → non-mentioning coordinator comment, no @-mention."""
|
||||
r, q, client = _judge_round(tmp_path)
|
||||
client.issue["assignee_type"] = "member"
|
||||
client.issue["assignee_id"] = "user-123"
|
||||
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: REJECT\n\nNeeds more work.",
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.phase == "rejected"
|
||||
assert all("mention://" not in c for c in client.posted_comments)
|
||||
# A coordinator comment IS posted (for audit), but without any @-mention
|
||||
assert len(client.posted_comments) == 1
|
||||
assert "mention://" not in client.posted_comments[0]
|
||||
assert "Manual follow-up required" in client.posted_comments[0]
|
||||
|
||||
|
||||
def test_reject_no_followup_when_no_assignee(tmp_path):
|
||||
"""REJECT path: no followup when issue has no assignee set."""
|
||||
# D5
|
||||
def test_reject_retrigger_skipped_if_no_assignee(tmp_path):
|
||||
"""REJECT: no assignee → non-mentioning coordinator comment, no @-mention."""
|
||||
r, q, client = _judge_round(tmp_path)
|
||||
# Default FakeClient issue has no assignee fields
|
||||
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: REJECT\n\nNo work done.",
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.phase == "rejected"
|
||||
assert len(client.posted_comments) == 0
|
||||
assert len(client.posted_comments) == 1
|
||||
assert "mention://" not in client.posted_comments[0]
|
||||
assert "Manual follow-up required" in client.posted_comments[0]
|
||||
|
||||
|
||||
def test_reject_notification_failure_does_not_block_round_completion(tmp_path):
|
||||
"""REJECT followup: round completes even if posting the notification fails."""
|
||||
# D6
|
||||
def test_accept_does_not_post_retrigger(tmp_path):
|
||||
"""ACCEPT path: no retrigger comment is posted."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append(_verdict_comment("VERDICT: ACCEPT\n\nExcellent work!"))
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.phase == "accepted"
|
||||
assert all("mention://agent/agent-worker" not in c for c in client.posted_comments)
|
||||
|
||||
|
||||
# D7
|
||||
def test_retrigger_comment_id_persisted_on_round(tmp_path):
|
||||
"""After REJECT, round.retrigger_comment_id is persisted to queue.json."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
client.comments.append(_verdict_comment())
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.retrigger_comment_id, "retrigger_comment_id should be set on the round"
|
||||
|
||||
# Reload from disk and verify persistence
|
||||
q2 = DebateQueue.load(tmp_path / "queue.json")
|
||||
r2 = q2.rounds[0]
|
||||
assert r2.retrigger_comment_id == r.retrigger_comment_id, (
|
||||
"retrigger_comment_id must survive a queue reload"
|
||||
)
|
||||
|
||||
|
||||
# D8
|
||||
def test_retrigger_happens_after_status_update_not_before(tmp_path):
|
||||
"""Race regression: issue:in_progress must precede the retrigger post_comment."""
|
||||
call_order: list[str] = []
|
||||
|
||||
class TrackingClient(FakeClient):
|
||||
def update_issue_status(self, issue_id, status):
|
||||
call_order.append(f"issue:{status}")
|
||||
return super().update_issue_status(issue_id, status)
|
||||
|
||||
def post_comment(self, issue_id, content):
|
||||
call_order.append("post_comment")
|
||||
return super().post_comment(issue_id, content)
|
||||
|
||||
r = _make_round(
|
||||
status="running",
|
||||
phase="awaiting_judge",
|
||||
phase_entered_at=_utcnow(),
|
||||
coordinator_comment_id="coord-c",
|
||||
judge_comment_id="judge-c",
|
||||
)
|
||||
q = _make_queue(tmp_path, r)
|
||||
|
||||
client = TrackingClient()
|
||||
client.issue["assignee_type"] = "agent"
|
||||
client.issue["assignee_id"] = "agent-worker"
|
||||
client.agents = [
|
||||
{"name": JUDGE_NAME, "id": "agent-judge"},
|
||||
{"name": "Senior Developer", "id": "agent-worker"},
|
||||
]
|
||||
client.comments = [
|
||||
{"id": "judge-c", "content": "x", "author_id": "coord", "created_at": _past(5)},
|
||||
{"id": "v1", "content": "VERDICT: REJECT\n\nFailed.", "author_id": "agent-judge",
|
||||
"author_type": "agent", "created_at": _utcnow()},
|
||||
]
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert "issue:in_progress" in call_order, "issue status update must happen"
|
||||
assert "post_comment" in call_order, "retrigger post_comment must happen"
|
||||
assert call_order.index("issue:in_progress") < call_order.index("post_comment"), (
|
||||
f"issue:in_progress must precede post_comment, got: {call_order}"
|
||||
)
|
||||
|
||||
|
||||
def test_reject_retrigger_failure_does_not_block_round_completion(tmp_path):
|
||||
"""REJECT retrigger: round completes even if posting the retrigger comment fails."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
|
||||
original_post = FakeClient.post_comment
|
||||
@@ -909,42 +959,14 @@ def test_reject_notification_failure_does_not_block_round_completion(tmp_path):
|
||||
def post_with_failure(self, issue_id, content):
|
||||
call_count[0] += 1
|
||||
if call_count[0] > 1:
|
||||
raise RuntimeError("API error on notification")
|
||||
raise RuntimeError("API error on retrigger")
|
||||
return original_post(self, issue_id, content)
|
||||
|
||||
FakeClient.post_comment = post_with_failure
|
||||
try:
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: REJECT\n\nFails acceptance criteria.",
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
|
||||
client.comments.append(_verdict_comment())
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done", "Round must complete even if notification fails"
|
||||
assert r.status == "done", "Round must complete even if retrigger posting fails"
|
||||
assert r.phase == "rejected"
|
||||
finally:
|
||||
FakeClient.post_comment = original_post
|
||||
|
||||
|
||||
def test_accept_does_not_post_rejection_followup(tmp_path):
|
||||
"""ACCEPT path: no assignee followup comment is posted."""
|
||||
r, q, client = _judge_round_with_agent_assignee(tmp_path)
|
||||
|
||||
client.comments.append({
|
||||
"id": "verdict1",
|
||||
"content": "VERDICT: ACCEPT\n\nExcellent work!",
|
||||
"author_id": "agent-judge",
|
||||
"author_type": "agent",
|
||||
"created_at": _utcnow(),
|
||||
})
|
||||
|
||||
_advance_awaiting_judge(r, client, q, FakeConfig(), _logger)
|
||||
|
||||
assert r.status == "done"
|
||||
assert r.phase == "accepted"
|
||||
# No followup @-mention to the assignee
|
||||
assert all("mention://agent/agent-worker" not in c for c in client.posted_comments)
|
||||
|
||||
Reference in New Issue
Block a user