fix(server/child-done): trigger agent parent assignee on child done (MUL-2808) (#3507)

Remove the agent-path self-trigger guard in triggerChildDoneAgent so a child going done wakes its parent agent even when the same agent owns both — a serial sub-task handoff across two different issues, not a loop. Runaway re-triggering stays bounded by HasPendingTaskForIssueAndAgent. Squad path unchanged. Closes #3374.
2026-06-17 03:38:32 +02:00 · 2026-05-29 12:49:07 +08:00
parent c730e906b9
commit ca1ea5716e
2 changed files with 50 additions and 37 deletions
--- a/server/internal/handler/issue_child_done.go
+++ b/server/internal/handler/issue_child_done.go
@@ -226,13 +226,19 @@ func sanitizeMentionLabel(name string) string {
 //
 // Guards applied here:
 //   - No-op when the parent has no assignee row.
-//   - Loop guard: skip when the agent that effectively "owns" the child
-//     (its agent assignee, or the leader of its squad assignee) is the same
-//     agent the parent would trigger (the parent agent itself, or the
-//     parent squad's leader). Without this an agent that drives both child
-//     and parent immediately re-runs on the parent and can post another
-//     child, looping — including the cross-squad case where two different
-//     squads share a leader.
+//   - Squad loop guard (squad parent only): skip when the finished child is
+//     the same squad, or its effective owner is the parent squad's leader. A
+//     squad leader already observes same-squad work through its own
+//     coordination cycle — the worker's completion comment wakes the leader
+//     via shouldEnqueueSquadLeaderOnComment — so the child-done trigger would
+//     be redundant; this also closes the cross-squad shared-leader loop. The
+//     AGENT parent path intentionally has NO such guard (MUL-2808): a lone
+//     agent that decomposes its parent into sub-issues it owns itself has no
+//     other wake path, and waking the parent agent when its child finishes is
+//     a serial sub-task handoff across two DIFFERENT issues — explicitly not a
+//     self-loop per isAgentRunningOnIssue, and consistent with the @mention
+//     self-trigger path (enqueueMentionedAgentTasks). Runaway re-triggering is
+//     bounded by the idempotency guard below, not by suppressing the trigger.
 //   - Idempotency: HasPendingTaskForIssueAndAgent dedupes rapid-fire enqueues
 //     for the same parent (e.g. two children finishing back-to-back).
 //   - Readiness: archived agents / missing runtimes are silently skipped
@@ -244,21 +250,26 @@ func (h *Handler) dispatchParentAssigneeTrigger(ctx context.Context, parent, chi

 	switch parent.AssigneeType.String {
 	case "agent":
-		h.triggerChildDoneAgent(ctx, parent, child, systemComment.ID)
+		h.triggerChildDoneAgent(ctx, parent, systemComment.ID)
 	case "squad":
 		h.triggerChildDoneSquad(ctx, parent, child, systemComment.ID)
 	}
 }

 // triggerChildDoneAgent enqueues a mention-style task for the parent's
-// agent assignee, applying the self-trigger guard documented on
-// dispatchParentAssigneeTrigger.
-func (h *Handler) triggerChildDoneAgent(ctx context.Context, parent, child db.Issue, triggerCommentID pgtype.UUID) {
-	if owner := h.effectiveChildAgentOwner(ctx, child); owner.Valid &&
-		uuidToString(owner) == uuidToString(parent.AssigneeID) {
-		return
-	}
-
+// agent assignee.
+//
+// There is intentionally NO same-agent self-trigger guard here, unlike the
+// squad path. Waking the parent agent when one of its children finishes is a
+// serial sub-task handoff between two DIFFERENT issues, which the platform
+// loop model treats as legitimate ("not a loop and must fire" — see
+// isAgentRunningOnIssue); only re-entering the SAME issue is a loop. A lone
+// agent that decomposes its parent into sub-issues it owns itself has no
+// other wake path, so the old "child owner == parent agent" guard silently
+// stranded those parents (MUL-2808). Runaway re-triggering is prevented by
+// the HasPendingTaskForIssueAndAgent dedup below, exactly as the @mention
+// self-trigger path relies on it (see enqueueMentionedAgentTasks).
+func (h *Handler) triggerChildDoneAgent(ctx context.Context, parent db.Issue, triggerCommentID pgtype.UUID) {
 	agent, err := h.Queries.GetAgentInWorkspace(ctx, db.GetAgentInWorkspaceParams{
 		ID:          parent.AssigneeID,
 		WorkspaceID: parent.WorkspaceID,
--- a/server/internal/handler/issue_child_done_test.go
+++ b/server/internal/handler/issue_child_done_test.go
@@ -388,12 +388,16 @@ func TestChildDoneMentionsParentAssignee_Squad(t *testing.T) {
 	}
 }

-// TestChildDoneSelfTriggerGuard_SameAgent — when the parent assignee is
-// the same agent that owns the child, the comment still records the
-// completion (so the timeline tells the full story) but no new task is
-// enqueued. Without this guard the agent immediately re-runs on the
-// parent and can post another child, looping.
-func TestChildDoneSelfTriggerGuard_SameAgent(t *testing.T) {
+// TestChildDoneTriggersParentAgentWhenSameAgentOwnsChild — when the parent
+// agent assignee is the SAME agent that owns the just-finished child, the
+// parent agent must still be triggered (MUL-2808). A child finishing and
+// waking its parent is a serial sub-task handoff between two different
+// issues, not a self-loop — and the lone-agent decomposition pattern (one
+// agent owns both the parent and the sub-issues it created) has no other
+// wake path. The comment is created AND exactly one task is enqueued on the
+// parent; runaway re-triggering is bounded by the HasPendingTaskForIssueAndAgent
+// dedup, not by suppressing the trigger.
+func TestChildDoneTriggersParentAgentWhenSameAgentOwnsChild(t *testing.T) {
 	fx := newChildDoneFixture(t, "in_progress")

 	var agentID string
@@ -416,26 +420,24 @@ func TestChildDoneSelfTriggerGuard_SameAgent(t *testing.T) {

 	updateChildStatus(t, fx.child.ID, "done")

-	// The comment should still be created (the human user reading the
-	// timeline still benefits from seeing that the child finished). The
-	// task enqueue is what gets skipped.
 	content := parentSystemCommentContent(t, fx.parent.ID)
 	if !strings.Contains(content, "mention://agent/"+agentID) {
 		t.Errorf("expected parent-assignee mention in system comment, got: %s", content)
 	}
-	if got := countPendingTasksForAgent(t, fx.parent.ID, agentID); got != 0 {
-		t.Errorf("expected 0 pending tasks on parent (self-trigger guard), got %d", got)
+	if got := countPendingTasksForAgent(t, fx.parent.ID, agentID); got != 1 {
+		t.Errorf("expected 1 pending task on parent (serial sub-task handoff), got %d", got)
 	}
 }

-// TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader — the
-// cross-type loop case Elon called out. Parent is assigned to agent A
-// directly; child is assigned to a squad whose leader is also agent A.
-// Without `effectiveChildAgentOwner`, the parent-agent path only checked
-// the child's direct assignee and would happily enqueue agent A on the
-// parent, restarting the loop through the squad. The system comment is
-// still created (timeline parity); only the task enqueue is suppressed.
-func TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader(t *testing.T) {
+// TestChildDoneTriggersParentAgentWhenChildSquadSharesLeader — parent is
+// assigned to agent A directly; the finished child is assigned to a squad
+// whose leader is also agent A. Because the parent is an AGENT, dispatch
+// routes through the agent path, which (post-MUL-2808) has no self-trigger
+// guard: A coordinates the parent and must be woken to advance it when the
+// child completes, regardless of who executed the child. The genuinely
+// loop-prone case — BOTH sides squads sharing a leader — is still guarded on
+// the squad path (see TestChildDoneSelfTriggerGuard_SquadParentDifferentSquadSameLeader).
+func TestChildDoneTriggersParentAgentWhenChildSquadSharesLeader(t *testing.T) {
 	fx := newChildDoneFixture(t, "in_progress")
 	sq := newSquadCommentTriggerFixture(t)

@@ -454,8 +456,8 @@ func TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader(t *testing.T)
 	if !strings.Contains(content, "mention://agent/"+sq.LeaderID) {
 		t.Errorf("expected parent-agent mention in system comment, got: %s", content)
 	}
-	if got := countPendingTasksForAgent(t, fx.parent.ID, sq.LeaderID); got != 0 {
-		t.Errorf("expected 0 pending tasks on parent (shared-leader guard), got %d", got)
+	if got := countPendingTasksForAgent(t, fx.parent.ID, sq.LeaderID); got != 1 {
+		t.Errorf("expected 1 pending task on parent (serial sub-task handoff), got %d", got)
 	}
 }