diff --git a/server/internal/handler/issue_child_done.go b/server/internal/handler/issue_child_done.go index d9d1ca63b..5d137f48d 100644 --- a/server/internal/handler/issue_child_done.go +++ b/server/internal/handler/issue_child_done.go @@ -226,13 +226,19 @@ func sanitizeMentionLabel(name string) string { // // Guards applied here: // - No-op when the parent has no assignee row. -// - Loop guard: skip when the agent that effectively "owns" the child -// (its agent assignee, or the leader of its squad assignee) is the same -// agent the parent would trigger (the parent agent itself, or the -// parent squad's leader). Without this an agent that drives both child -// and parent immediately re-runs on the parent and can post another -// child, looping — including the cross-squad case where two different -// squads share a leader. +// - Squad loop guard (squad parent only): skip when the finished child is +// the same squad, or its effective owner is the parent squad's leader. A +// squad leader already observes same-squad work through its own +// coordination cycle — the worker's completion comment wakes the leader +// via shouldEnqueueSquadLeaderOnComment — so the child-done trigger would +// be redundant; this also closes the cross-squad shared-leader loop. The +// AGENT parent path intentionally has NO such guard (MUL-2808): a lone +// agent that decomposes its parent into sub-issues it owns itself has no +// other wake path, and waking the parent agent when its child finishes is +// a serial sub-task handoff across two DIFFERENT issues — explicitly not a +// self-loop per isAgentRunningOnIssue, and consistent with the @mention +// self-trigger path (enqueueMentionedAgentTasks). Runaway re-triggering is +// bounded by the idempotency guard below, not by suppressing the trigger. // - Idempotency: HasPendingTaskForIssueAndAgent dedupes rapid-fire enqueues // for the same parent (e.g. two children finishing back-to-back). // - Readiness: archived agents / missing runtimes are silently skipped @@ -244,21 +250,26 @@ func (h *Handler) dispatchParentAssigneeTrigger(ctx context.Context, parent, chi switch parent.AssigneeType.String { case "agent": - h.triggerChildDoneAgent(ctx, parent, child, systemComment.ID) + h.triggerChildDoneAgent(ctx, parent, systemComment.ID) case "squad": h.triggerChildDoneSquad(ctx, parent, child, systemComment.ID) } } // triggerChildDoneAgent enqueues a mention-style task for the parent's -// agent assignee, applying the self-trigger guard documented on -// dispatchParentAssigneeTrigger. -func (h *Handler) triggerChildDoneAgent(ctx context.Context, parent, child db.Issue, triggerCommentID pgtype.UUID) { - if owner := h.effectiveChildAgentOwner(ctx, child); owner.Valid && - uuidToString(owner) == uuidToString(parent.AssigneeID) { - return - } - +// agent assignee. +// +// There is intentionally NO same-agent self-trigger guard here, unlike the +// squad path. Waking the parent agent when one of its children finishes is a +// serial sub-task handoff between two DIFFERENT issues, which the platform +// loop model treats as legitimate ("not a loop and must fire" — see +// isAgentRunningOnIssue); only re-entering the SAME issue is a loop. A lone +// agent that decomposes its parent into sub-issues it owns itself has no +// other wake path, so the old "child owner == parent agent" guard silently +// stranded those parents (MUL-2808). Runaway re-triggering is prevented by +// the HasPendingTaskForIssueAndAgent dedup below, exactly as the @mention +// self-trigger path relies on it (see enqueueMentionedAgentTasks). +func (h *Handler) triggerChildDoneAgent(ctx context.Context, parent db.Issue, triggerCommentID pgtype.UUID) { agent, err := h.Queries.GetAgentInWorkspace(ctx, db.GetAgentInWorkspaceParams{ ID: parent.AssigneeID, WorkspaceID: parent.WorkspaceID, diff --git a/server/internal/handler/issue_child_done_test.go b/server/internal/handler/issue_child_done_test.go index 61da70b79..2bea9e8a8 100644 --- a/server/internal/handler/issue_child_done_test.go +++ b/server/internal/handler/issue_child_done_test.go @@ -388,12 +388,16 @@ func TestChildDoneMentionsParentAssignee_Squad(t *testing.T) { } } -// TestChildDoneSelfTriggerGuard_SameAgent — when the parent assignee is -// the same agent that owns the child, the comment still records the -// completion (so the timeline tells the full story) but no new task is -// enqueued. Without this guard the agent immediately re-runs on the -// parent and can post another child, looping. -func TestChildDoneSelfTriggerGuard_SameAgent(t *testing.T) { +// TestChildDoneTriggersParentAgentWhenSameAgentOwnsChild — when the parent +// agent assignee is the SAME agent that owns the just-finished child, the +// parent agent must still be triggered (MUL-2808). A child finishing and +// waking its parent is a serial sub-task handoff between two different +// issues, not a self-loop — and the lone-agent decomposition pattern (one +// agent owns both the parent and the sub-issues it created) has no other +// wake path. The comment is created AND exactly one task is enqueued on the +// parent; runaway re-triggering is bounded by the HasPendingTaskForIssueAndAgent +// dedup, not by suppressing the trigger. +func TestChildDoneTriggersParentAgentWhenSameAgentOwnsChild(t *testing.T) { fx := newChildDoneFixture(t, "in_progress") var agentID string @@ -416,26 +420,24 @@ func TestChildDoneSelfTriggerGuard_SameAgent(t *testing.T) { updateChildStatus(t, fx.child.ID, "done") - // The comment should still be created (the human user reading the - // timeline still benefits from seeing that the child finished). The - // task enqueue is what gets skipped. content := parentSystemCommentContent(t, fx.parent.ID) if !strings.Contains(content, "mention://agent/"+agentID) { t.Errorf("expected parent-assignee mention in system comment, got: %s", content) } - if got := countPendingTasksForAgent(t, fx.parent.ID, agentID); got != 0 { - t.Errorf("expected 0 pending tasks on parent (self-trigger guard), got %d", got) + if got := countPendingTasksForAgent(t, fx.parent.ID, agentID); got != 1 { + t.Errorf("expected 1 pending task on parent (serial sub-task handoff), got %d", got) } } -// TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader — the -// cross-type loop case Elon called out. Parent is assigned to agent A -// directly; child is assigned to a squad whose leader is also agent A. -// Without `effectiveChildAgentOwner`, the parent-agent path only checked -// the child's direct assignee and would happily enqueue agent A on the -// parent, restarting the loop through the squad. The system comment is -// still created (timeline parity); only the task enqueue is suppressed. -func TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader(t *testing.T) { +// TestChildDoneTriggersParentAgentWhenChildSquadSharesLeader — parent is +// assigned to agent A directly; the finished child is assigned to a squad +// whose leader is also agent A. Because the parent is an AGENT, dispatch +// routes through the agent path, which (post-MUL-2808) has no self-trigger +// guard: A coordinates the parent and must be woken to advance it when the +// child completes, regardless of who executed the child. The genuinely +// loop-prone case — BOTH sides squads sharing a leader — is still guarded on +// the squad path (see TestChildDoneSelfTriggerGuard_SquadParentDifferentSquadSameLeader). +func TestChildDoneTriggersParentAgentWhenChildSquadSharesLeader(t *testing.T) { fx := newChildDoneFixture(t, "in_progress") sq := newSquadCommentTriggerFixture(t) @@ -454,8 +456,8 @@ func TestChildDoneSelfTriggerGuard_AgentParentSquadChildSameLeader(t *testing.T) if !strings.Contains(content, "mention://agent/"+sq.LeaderID) { t.Errorf("expected parent-agent mention in system comment, got: %s", content) } - if got := countPendingTasksForAgent(t, fx.parent.ID, sq.LeaderID); got != 0 { - t.Errorf("expected 0 pending tasks on parent (shared-leader guard), got %d", got) + if got := countPendingTasksForAgent(t, fx.parent.ID, sq.LeaderID); got != 1 { + t.Errorf("expected 1 pending task on parent (serial sub-task handoff), got %d", got) } }