Files
multica/server/internal/handler/agent_test.go
Bohan Jiang 270fb6aa73 MUL-2792 fix(agent): preserve skills in update/archive/restore response (#3464)
* MUL-2792 fix(agent): preserve skills in update/archive/restore response (#3459)

agentToResponse always initialises Skills as []; the mutation handlers
relied on the caller to refresh it, but only GetAgent and ListAgents
actually did. UpdateAgent / ArchiveAgent / RestoreAgent therefore
returned "skills": [] regardless of what the agent_skill junction table
contained.

The DB write path was never wrong — skills weren't actually deleted —
but the misleading response (and its matching agent:status / archived /
restored WS broadcast) scared users into manually re-running
`agent skills set` and risked scripted clients writing the empty set
back as truth.

Extract the existing GetAgent skill-reload block into attachAgentSkills
and call it from the three buggy handlers. Add regression tests that
attach skills, hit each mutation endpoint, and assert both the response
and the junction table.

Co-authored-by: multica-agent <github@multica.ai>

* fix(agent): attach skills before env/template broadcasts (#3459)

Two follow-up sites flagged in PR #3464 review that shared the same
"agentToResponse zeroes Skills, callers forget to reload" pattern as the
mutation handlers:

- agent_env.go: the agent:status broadcast after UpdateAgentEnv used a
  bare agentToResponse, so subscribers saw skills wiped on every env
  rotation. HTTP body is AgentEnvResponse so the response itself is
  unaffected, but the WS event still misleads any cache that ingests it.
- agent_template.go: CreateAgentFromTemplate attaches imported and extra
  skills inside the tx, then builds the response/agent:created broadcast
  without reloading them — so callers (and any client tracking the
  create event) see the freshly created agent as skill-less despite the
  template having just imported them.

Both call sites now reuse attachAgentSkills introduced for UpdateAgent.

Co-authored-by: multica-agent <github@multica.ai>

---------

Co-authored-by: J <j@multica.ai>
Co-authored-by: multica-agent <github@multica.ai>
2026-05-28 19:06:57 +08:00

936 lines
35 KiB
Go

package handler
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"reflect"
"strings"
"testing"
)
// TestListWorkspaceAgentTaskSnapshot covers the agent presence snapshot endpoint:
// every active task (queued/dispatched/running) PLUS each agent's most recent
// OUTCOME task (completed/failed only). Cancelled tasks are excluded by design
// from the outcome half — they're a procedural signal, not an outcome, and
// must NOT mask a prior failure.
//
// The fixtures cover every branch the SQL must classify:
// - actives are always returned, no dedup
// - outcomes are deduped to "latest per agent" by completed_at
// - the OLD 2-minute window must be irrelevant (a 5-minute-old failure is
// still returned if it's the latest outcome)
// - cancelled rows are NEVER returned, even when they are temporally newer
// than a failure — this is what keeps the failed signal sticky after the
// user cancels their queued retry
func TestListWorkspaceAgentTaskSnapshot(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
// Three agents so we can verify per-agent semantics independently.
agentA := createHandlerTestAgent(t, "snapshot-agent-a", []byte(`{}`))
agentB := createHandlerTestAgent(t, "snapshot-agent-b", []byte(`{}`))
agentC := createHandlerTestAgent(t, "snapshot-agent-c", []byte(`{}`))
type taskFixture struct {
agentID string
status string
completedAt string // SQL expression; "" for NULL
label string
}
fixtures := []taskFixture{
// Agent A — actives + a newer completed supersedes an older failed.
{agentA, "queued", "", "A.queued"},
{agentA, "dispatched", "", "A.dispatched"},
{agentA, "running", "", "A.running"},
{agentA, "failed", "now() - interval '10 minutes'", "A.old_failed"},
{agentA, "completed", "now() - interval '30 seconds'", "A.latest_completed"},
// Agent B — old failure with no later outcome stays visible (no
// time window).
{agentB, "failed", "now() - interval '5 minutes'", "B.stale_failed_kept"},
// Agent C — failure followed by a NEWER cancelled. The cancelled
// must be skipped by the SQL filter so the failure remains visible.
// This is the scenario where a user fails, then cancels their
// queued retry to debug.
{agentC, "failed", "now() - interval '5 minutes'", "C.failure"},
{agentC, "cancelled", "now() - interval '30 seconds'", "C.newer_cancelled_must_be_ignored"},
}
insertedIDs := make([]string, 0, len(fixtures))
for _, f := range fixtures {
var id string
var query string
if f.completedAt == "" {
query = `INSERT INTO agent_task_queue (agent_id, runtime_id, status, priority)
VALUES ($1, $2, $3, 0) RETURNING id`
} else {
query = `INSERT INTO agent_task_queue (agent_id, runtime_id, status, priority, completed_at)
VALUES ($1, $2, $3, 0, ` + f.completedAt + `) RETURNING id`
}
if err := testPool.QueryRow(ctx, query, f.agentID, testRuntimeID, f.status).Scan(&id); err != nil {
t.Fatalf("insert %s: %v", f.label, err)
}
insertedIDs = append(insertedIDs, id)
}
t.Cleanup(func() {
for _, id := range insertedIDs {
testPool.Exec(ctx, `DELETE FROM agent_task_queue WHERE id = $1`, id)
}
})
w := httptest.NewRecorder()
req := newRequest(http.MethodGet, "/api/agent-task-snapshot", nil)
testHandler.ListWorkspaceAgentTaskSnapshot(w, req)
if w.Code != http.StatusOK {
t.Fatalf("ListWorkspaceAgentTaskSnapshot: expected 200, got %d: %s", w.Code, w.Body.String())
}
var tasks []AgentTaskResponse
if err := json.NewDecoder(w.Body).Decode(&tasks); err != nil {
t.Fatalf("decode response: %v", err)
}
// Per-agent breakdown so leftover tasks from other tests in this package
// don't pollute the assertions.
type key struct{ agent, status string }
counts := map[key]int{}
for _, task := range tasks {
if task.AgentID != agentA && task.AgentID != agentB && task.AgentID != agentC {
continue
}
counts[key{task.AgentID, task.Status}]++
}
wantCounts := map[key]int{
// Agent A: 3 actives + the latest outcome (completed). The older
// failed must be excluded by DISTINCT ON.
{agentA, "queued"}: 1,
{agentA, "dispatched"}: 1,
{agentA, "running"}: 1,
{agentA, "completed"}: 1,
// Agent B: just the failed outcome.
{agentB, "failed"}: 1,
// Agent C: the failed outcome must survive the temporally newer
// cancellation — that's the whole point of excluding cancelled
// from the outcome half.
{agentC, "failed"}: 1,
}
for k, expected := range wantCounts {
if got := counts[k]; got != expected {
t.Errorf("agent=%s status=%s: expected %d, got %d", k.agent, k.status, expected, got)
}
}
// The OLD failed terminal on agent A must be excluded.
if counts[key{agentA, "failed"}] != 0 {
t.Errorf("agent A old failed must be superseded by newer completed; got %d", counts[key{agentA, "failed"}])
}
// No cancelled row may ever appear in the snapshot — they're filtered at
// SQL level so the front-end's "cancel doesn't mask failure" rule lands
// without any front-end logic.
for _, agentID := range []string{agentA, agentB, agentC} {
if counts[key{agentID, "cancelled"}] != 0 {
t.Errorf("agent %s: cancelled rows must be excluded from snapshot; got %d",
agentID, counts[key{agentID, "cancelled"}])
}
}
}
func TestCreateAgent_RejectsDuplicateName(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
// Clean up any agents created by this test.
t.Cleanup(func() {
testPool.Exec(context.Background(),
`DELETE FROM agent WHERE workspace_id = $1 AND name = $2`,
testWorkspaceID, "duplicate-name-test-agent",
)
})
body := map[string]any{
"name": "duplicate-name-test-agent",
"description": "first description",
"runtime_id": testRuntimeID,
"visibility": "private",
"max_concurrent_tasks": 1,
}
// First call — creates the agent.
w1 := httptest.NewRecorder()
testHandler.CreateAgent(w1, newRequest(http.MethodPost, "/api/agents", body))
if w1.Code != http.StatusCreated {
t.Fatalf("first CreateAgent: expected 201, got %d: %s", w1.Code, w1.Body.String())
}
var resp1 map[string]any
if err := json.NewDecoder(w1.Body).Decode(&resp1); err != nil {
t.Fatalf("decode first response: %v", err)
}
agentID1, _ := resp1["id"].(string)
if agentID1 == "" {
t.Fatalf("first CreateAgent: no id in response: %v", resp1)
}
// Second call — same name must be rejected with 409 Conflict.
// The unique constraint prevents silent duplicates; the UI shows a clear error.
body["description"] = "updated description"
w2 := httptest.NewRecorder()
testHandler.CreateAgent(w2, newRequest(http.MethodPost, "/api/agents", body))
if w2.Code != http.StatusConflict {
t.Fatalf("second CreateAgent with duplicate name: expected 409, got %d: %s", w2.Code, w2.Body.String())
}
}
func TestWorkspaceAlwaysRedactSecrets(t *testing.T) {
tests := []struct {
name string
settings []byte
want bool
}{
{"nil settings", nil, false},
{"empty settings", []byte(`{}`), false},
{"false", []byte(`{"always_redact_env": false}`), false},
{"true", []byte(`{"always_redact_env": true}`), true},
{"invalid json", []byte(`not json`), false},
{"other fields only", []byte(`{"theme": "dark"}`), false},
{"true among other fields", []byte(`{"theme": "dark", "always_redact_env": true}`), true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := workspaceAlwaysRedactSecrets(tt.settings); got != tt.want {
t.Errorf("workspaceAlwaysRedactSecrets(%q) = %v, want %v", tt.settings, got, tt.want)
}
})
}
}
// rawJSONResponse decodes the raw map so we can assert the literal
// JSON shape — `custom_env` MUST be absent from the wire output, not
// merely empty, otherwise a future caller decoding into a wider struct
// could still see masked or partial values.
func rawJSONResponse(t *testing.T, body []byte) map[string]any {
t.Helper()
var out map[string]any
if err := json.Unmarshal(body, &out); err != nil {
t.Fatalf("failed to decode response: %v", err)
}
return out
}
// TestGetAgent_ResponseHasNoCustomEnv guards the core invariant from
// MUL-2600: the generic agent resource response NEVER carries the
// custom_env field, even for the agent's owner. Only the dedicated
// env endpoint exposes secret values.
func TestGetAgent_ResponseHasNoCustomEnv(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "noenv-get-agent", nil)
if _, err := testPool.Exec(ctx, `UPDATE agent SET custom_env = '{"SECRET_KEY": "super-secret"}' WHERE id = $1`, agentID); err != nil {
t.Fatalf("failed to set custom_env: %v", err)
}
req := newRequest("GET", "/agents/"+agentID, nil)
req = withURLParam(req, "id", agentID)
w := httptest.NewRecorder()
testHandler.GetAgent(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
raw := rawJSONResponse(t, w.Body.Bytes())
if _, ok := raw["custom_env"]; ok {
t.Errorf("custom_env field must not appear in agent response, got %v", raw["custom_env"])
}
if _, ok := raw["custom_env_redacted"]; ok {
t.Errorf("custom_env_redacted field must not appear in agent response (use has_custom_env)")
}
if got, _ := raw["has_custom_env"].(bool); !got {
t.Errorf("has_custom_env expected true, got %v", raw["has_custom_env"])
}
if got, _ := raw["custom_env_key_count"].(float64); got != 1 {
t.Errorf("custom_env_key_count expected 1, got %v", raw["custom_env_key_count"])
}
// Sanity-check the typed shape too — the struct must not have
// rehydrated the masked map.
var typed AgentResponse
if err := json.Unmarshal(w.Body.Bytes(), &typed); err != nil {
t.Fatalf("typed decode failed: %v", err)
}
if typed.HasCustomEnv != true {
t.Errorf("typed.HasCustomEnv expected true")
}
if typed.CustomEnvKeyCount != 1 {
t.Errorf("typed.CustomEnvKeyCount expected 1, got %d", typed.CustomEnvKeyCount)
}
}
// TestListAgents_ResponseHasNoCustomEnv mirrors the GetAgent guard for
// the list endpoint. Same invariant: no custom_env field on the wire,
// only coarse metadata.
func TestListAgents_ResponseHasNoCustomEnv(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentName := "noenv-list-agent"
agentID := createHandlerTestAgent(t, agentName, nil)
if _, err := testPool.Exec(ctx, `UPDATE agent SET custom_env = '{"SECRET_KEY": "super-secret", "OTHER": "y"}' WHERE id = $1`, agentID); err != nil {
t.Fatalf("failed to set custom_env: %v", err)
}
req := newRequest("GET", "/agents", nil)
w := httptest.NewRecorder()
testHandler.ListAgents(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var rawAgents []map[string]any
if err := json.Unmarshal(w.Body.Bytes(), &rawAgents); err != nil {
t.Fatalf("failed to decode response: %v", err)
}
var found map[string]any
for _, a := range rawAgents {
if name, _ := a["name"].(string); name == agentName {
found = a
break
}
}
if found == nil {
t.Fatal("agent not found in list response")
}
if _, ok := found["custom_env"]; ok {
t.Errorf("custom_env must not appear in list response")
}
if got, _ := found["custom_env_key_count"].(float64); got != 2 {
t.Errorf("custom_env_key_count expected 2, got %v", found["custom_env_key_count"])
}
if got, _ := found["has_custom_env"].(bool); !got {
t.Errorf("has_custom_env expected true")
}
}
// TestGetAgentEnv_OwnerSucceedsAndAudits exercises the happy path: an
// agent owner reveals env, and the response carries the plaintext map.
// The activity_log row is checked at the end so the audit trail is
// proven to land in the same transaction window.
func TestGetAgentEnv_OwnerSucceedsAndAudits(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "env-reveal-owner-agent", nil)
if _, err := testPool.Exec(ctx, `UPDATE agent SET custom_env = '{"KEY_ONE": "v1", "KEY_TWO": "v2"}' WHERE id = $1`, agentID); err != nil {
t.Fatalf("failed to set custom_env: %v", err)
}
req := newRequest("GET", "/api/agents/"+agentID+"/env", nil)
req = withURLParam(req, "id", agentID)
w := httptest.NewRecorder()
testHandler.GetAgentEnv(w, req)
if w.Code != http.StatusOK {
t.Fatalf("GetAgentEnv: expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp AgentEnvResponse
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("decode response: %v", err)
}
if resp.AgentID != agentID {
t.Errorf("agent_id mismatch: got %q", resp.AgentID)
}
expected := map[string]string{"KEY_ONE": "v1", "KEY_TWO": "v2"}
if !reflect.DeepEqual(resp.CustomEnv, expected) {
t.Errorf("CustomEnv mismatch: got %v, want %v", resp.CustomEnv, expected)
}
// Audit row must exist; keys but not values must be recorded.
var revealedKeysJSON string
if err := testPool.QueryRow(ctx, `
SELECT details::text FROM activity_log
WHERE workspace_id = $1 AND action = 'agent_env_revealed'
AND details->>'agent_id' = $2
ORDER BY created_at DESC LIMIT 1
`, testWorkspaceID, agentID).Scan(&revealedKeysJSON); err != nil {
t.Fatalf("no agent_env_revealed activity row found: %v", err)
}
if !strings.Contains(revealedKeysJSON, `"KEY_ONE"`) || !strings.Contains(revealedKeysJSON, `"KEY_TWO"`) {
t.Errorf("expected revealed_keys to contain KEY_ONE and KEY_TWO, got: %s", revealedKeysJSON)
}
if strings.Contains(revealedKeysJSON, `"v1"`) || strings.Contains(revealedKeysJSON, `"v2"`) {
t.Errorf("activity details must NOT contain env values, got: %s", revealedKeysJSON)
}
}
// TestAgentEnv_AgentActorRejected proves the security-critical actor
// guard: even when the underlying user is a workspace owner, a request
// arriving from inside a running agent task is denied 403. This is
// the lateral-movement fix — an agent running with its owner's token
// cannot reveal a sibling agent's secrets.
func TestAgentEnv_AgentActorRejected(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
targetID := createHandlerTestAgent(t, "env-target-agent", nil)
if _, err := testPool.Exec(context.Background(), `UPDATE agent SET custom_env = '{"K":"v"}' WHERE id = $1`, targetID); err != nil {
t.Fatalf("failed to set custom_env: %v", err)
}
// Spin up a separate agent + task that authorises the X-Agent-ID /
// X-Task-ID header pair resolveActor checks. The owning member of
// the host agent is the same testUserID (workspace owner), which is
// the exact lateral-movement shape we want to block.
hostAgentID := createHandlerTestAgent(t, "env-host-agent", nil)
hostTaskID := createHandlerTestTaskForAgent(t, hostAgentID)
cases := []struct {
name string
fn func(http.ResponseWriter, *http.Request)
body any
}{
{"reveal", testHandler.GetAgentEnv, nil},
{"update", testHandler.UpdateAgentEnv, map[string]any{"custom_env": map[string]string{"K": "v2"}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
method := http.MethodGet
if tc.body != nil {
method = http.MethodPut
}
req := newRequest(method, "/api/agents/"+targetID+"/env", tc.body)
req = withURLParam(req, "id", targetID)
req.Header.Set("X-Agent-ID", hostAgentID)
req.Header.Set("X-Task-ID", hostTaskID)
w := httptest.NewRecorder()
tc.fn(w, req)
if w.Code != http.StatusForbidden {
t.Fatalf("expected 403 from agent actor, got %d: %s", w.Code, w.Body.String())
}
})
}
}
// TestAgentEnv_TaskTokenActorSource locks in the post-MUL-2600 attack
// model: an agent process that strips its identifying headers
// (X-Agent-ID / X-Task-ID) but is still authenticated by an `mat_`
// task token MUST be recognized as actor=agent and rejected on the
// env endpoint. The auth middleware sets X-Actor-Source=task_token
// from the token row; resolveActor honors that header before the
// header-pair fallback. Without this guard the lateral-movement fix
// would only block "honest" CLIs that voluntarily set both headers.
func TestAgentEnv_TaskTokenActorSource(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
targetID := createHandlerTestAgent(t, "env-tt-target-agent", nil)
if _, err := testPool.Exec(context.Background(), `UPDATE agent SET custom_env = '{"K":"v"}' WHERE id = $1`, targetID); err != nil {
t.Fatalf("failed to set custom_env: %v", err)
}
req := newRequest(http.MethodGet, "/api/agents/"+targetID+"/env", nil)
req = withURLParam(req, "id", targetID)
// Simulate the auth middleware's post-mat_-resolution state: the
// only header touching actor identity is X-Actor-Source. The agent
// process stripped X-Agent-ID and X-Task-ID, hoping to fall back
// to the member auth path — the server-set X-Actor-Source must
// short-circuit that escape.
req.Header.Set("X-Actor-Source", "task_token")
req.Header.Del("X-Agent-ID")
req.Header.Del("X-Task-ID")
w := httptest.NewRecorder()
testHandler.GetAgentEnv(w, req)
if w.Code != http.StatusForbidden {
t.Fatalf("expected 403 when X-Actor-Source=task_token, got %d: %s", w.Code, w.Body.String())
}
}
// TestUpdateAgentEnv_PreservesSentinelValues verifies the **** guard.
// A naive write would clobber real secrets with the masked
// placeholder; we want any key whose value comes in as **** to keep
// its stored value.
func TestUpdateAgentEnv_PreservesSentinelValues(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "env-sentinel-agent", nil)
if _, err := testPool.Exec(ctx, `UPDATE agent SET custom_env = '{"KEEP_ME":"real-secret","ALSO":"another-secret"}' WHERE id = $1`, agentID); err != nil {
t.Fatalf("failed to seed custom_env: %v", err)
}
// Client sends one key with a real new value, one with **** (should
// be preserved), and one new key that isn't in the existing map but
// arrives as **** (must be dropped, never written as literal).
body := map[string]any{
"custom_env": map[string]string{
"KEEP_ME": "****",
"ALSO": "rotated",
"PHANTOM": "****",
"BRAND_NEW": "fresh",
},
}
req := newRequest(http.MethodPut, "/api/agents/"+agentID+"/env", body)
req = withURLParam(req, "id", agentID)
w := httptest.NewRecorder()
testHandler.UpdateAgentEnv(w, req)
if w.Code != http.StatusOK {
t.Fatalf("UpdateAgentEnv: expected 200, got %d: %s", w.Code, w.Body.String())
}
// Refetch from DB so we don't rely on the response body alone.
var stored string
if err := testPool.QueryRow(ctx, `SELECT custom_env::text FROM agent WHERE id = $1`, agentID).Scan(&stored); err != nil {
t.Fatalf("failed to read back custom_env: %v", err)
}
var got map[string]string
if err := json.Unmarshal([]byte(stored), &got); err != nil {
t.Fatalf("failed to decode stored custom_env: %v", err)
}
want := map[string]string{
"KEEP_ME": "real-secret", // **** must preserve the existing value
"ALSO": "rotated", // explicit overwrite
"BRAND_NEW": "fresh", // new addition
// PHANTOM is intentionally absent — **** for a non-existent key
// is dropped, never persisted as literal `****`.
}
if !reflect.DeepEqual(got, want) {
t.Errorf("stored custom_env mismatch:\n got: %v\n want: %v", got, want)
}
// Audit row should reflect the diff. We decode the jsonb back into a
// typed map and compare semantically — postgres serializes jsonb with
// canonicalised whitespace (`"added_keys": ["BRAND_NEW"]`), so a raw
// substring match on the dense form silently fails on real database
// output.
var details string
if err := testPool.QueryRow(ctx, `
SELECT details::text FROM activity_log
WHERE workspace_id = $1 AND action = 'agent_env_updated' AND details->>'agent_id' = $2
ORDER BY created_at DESC LIMIT 1
`, testWorkspaceID, agentID).Scan(&details); err != nil {
t.Fatalf("expected agent_env_updated activity row: %v", err)
}
var auditFields struct {
AddedKeys []string `json:"added_keys"`
ChangedKeys []string `json:"changed_keys"`
PreservedKeys []string `json:"preserved_keys"`
}
if err := json.Unmarshal([]byte(details), &auditFields); err != nil {
t.Fatalf("failed to decode audit details: %v (raw=%s)", err, details)
}
if !reflect.DeepEqual(auditFields.AddedKeys, []string{"BRAND_NEW"}) {
t.Errorf("added_keys: got %v, want [BRAND_NEW]; raw=%s", auditFields.AddedKeys, details)
}
if !reflect.DeepEqual(auditFields.ChangedKeys, []string{"ALSO"}) {
t.Errorf("changed_keys: got %v, want [ALSO]; raw=%s", auditFields.ChangedKeys, details)
}
if !reflect.DeepEqual(auditFields.PreservedKeys, []string{"KEEP_ME"}) {
t.Errorf("preserved_keys: got %v, want [KEEP_ME]; raw=%s", auditFields.PreservedKeys, details)
}
// Audit must never contain values.
for _, leak := range []string{"real-secret", "another-secret", "rotated", "fresh"} {
if strings.Contains(details, leak) {
t.Errorf("audit details leaked value %q: %s", leak, details)
}
}
}
func TestUpdateAgent_RejectsCustomEnvInBody(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "update-no-env-agent", nil)
if _, err := testPool.Exec(ctx, `UPDATE agent SET custom_env = '{"PRE":"existing"}' WHERE id = $1`, agentID); err != nil {
t.Fatalf("failed to seed custom_env: %v", err)
}
// Sending custom_env via the generic PUT /api/agents/{id} must fail
// loudly with a 400 — see the comment on the rejection in agent.go.
// Silently dropping the field used to make scripted clients believe
// they had rotated a secret when nothing actually happened.
body := map[string]any{
"description": "still updating description",
"custom_env": map[string]string{"INJECTED": "should-not-stick"},
}
req := newRequest(http.MethodPut, "/api/agents/"+agentID, body)
req = withURLParam(req, "id", agentID)
w := httptest.NewRecorder()
testHandler.UpdateAgent(w, req)
if w.Code != http.StatusBadRequest {
t.Fatalf("UpdateAgent: expected 400, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "custom_env") || !strings.Contains(w.Body.String(), "/env") {
t.Errorf("error body should mention custom_env and the env endpoint; got %s", w.Body.String())
}
// The stored env must be untouched by the rejected request.
var stored string
if err := testPool.QueryRow(ctx, `SELECT custom_env::text FROM agent WHERE id = $1`, agentID).Scan(&stored); err != nil {
t.Fatalf("failed to read custom_env: %v", err)
}
if !strings.Contains(stored, `"PRE": "existing"`) && !strings.Contains(stored, `"PRE":"existing"`) {
t.Errorf("UpdateAgent must NOT touch custom_env; got %q", stored)
}
if strings.Contains(stored, "INJECTED") {
t.Errorf("UpdateAgent should have rejected custom_env in body; got %q", stored)
}
}
// TestMergeAgentEnv_PureFunction exercises the diff/sentinel logic
// without the DB round-trip — keeps the contract front-and-centre in
// case someone refactors the handler later.
func TestMergeAgentEnv_PureFunction(t *testing.T) {
cases := []struct {
name string
existing map[string]string
request map[string]string
want map[string]string
audit envAudit
}{
{
name: "preserve sentinel",
existing: map[string]string{"A": "real"},
request: map[string]string{"A": "****"},
want: map[string]string{"A": "real"},
audit: envAudit{preserved: []string{"A"}},
},
{
name: "drop sentinel for missing key",
existing: map[string]string{},
request: map[string]string{"A": "****"},
want: map[string]string{},
audit: envAudit{},
},
{
name: "add new key",
existing: map[string]string{},
request: map[string]string{"B": "v"},
want: map[string]string{"B": "v"},
audit: envAudit{added: []string{"B"}},
},
{
name: "change existing value",
existing: map[string]string{"B": "old"},
request: map[string]string{"B": "new"},
want: map[string]string{"B": "new"},
audit: envAudit{changed: []string{"B"}},
},
{
name: "remove key absent from request",
existing: map[string]string{"B": "v"},
request: map[string]string{},
want: map[string]string{},
audit: envAudit{removed: []string{"B"}},
},
{
name: "noop when value unchanged",
existing: map[string]string{"B": "same"},
request: map[string]string{"B": "same"},
want: map[string]string{"B": "same"},
audit: envAudit{},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, audit := mergeAgentEnv(tc.existing, tc.request)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("merged map: got %v, want %v", got, tc.want)
}
if !reflect.DeepEqual(audit, tc.audit) {
t.Errorf("audit: got %+v, want %+v", audit, tc.audit)
}
})
}
}
// Compile-time guard: AgentResponse must NOT carry the legacy env
// fields. Reintroducing them is a security regression — this test
// fails to compile rather than fails at runtime so reviewers see the
// breakage in the diff. Kept as a runtime test because the package
// boundary makes a struct-tag introspection cheap and obvious.
func TestAgentResponseShape_HasNoLegacyEnvFields(t *testing.T) {
typ := reflect.TypeOf(AgentResponse{})
for i := 0; i < typ.NumField(); i++ {
f := typ.Field(i)
tag := strings.Split(f.Tag.Get("json"), ",")[0]
switch tag {
case "custom_env", "custom_env_redacted", "custom_env_redacted_reason":
t.Errorf("AgentResponse must not carry %q field (MUL-2600)", tag)
}
}
}
// TestUpdateAgent_RedactsMcpConfigForAgentActor closes the second leg
// of MUL-2600 review #2: an agent process with a task token (or with
// the X-Actor-Source server marker) must not be able to scrape another
// agent's mcp_config via an unrelated mutation response. Even when the
// host PAT would otherwise satisfy canManageAgent, the response body
// must come back with mcp_config redacted.
func TestUpdateAgent_RedactsMcpConfigForAgentActor(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
// The target agent has a populated mcp_config that historically would
// be leaked back via the UpdateAgent / ArchiveAgent / RestoreAgent
// HTTP response.
target := createHandlerTestAgent(t, "mut-mcp-target", []byte(`{"server":"secret-config"}`))
// A second agent acts as the "calling" agent process whose task
// token authenticated the request. It is registered in the same
// workspace so resolveActor recognises X-Agent-ID as valid.
caller := createHandlerTestAgent(t, "mut-mcp-caller", nil)
taskID := insertHandlerTestTask(t, caller)
desc := "trivial mutation that should NOT leak target mcp_config"
req := newRequest(http.MethodPut, "/api/agents/"+target, map[string]any{
"description": desc,
})
req = withURLParam(req, "id", target)
// Simulate a task-token-authenticated agent request. The auth
// middleware would normally set these; we mimic both the modern
// path (X-Actor-Source) and the legacy header pair so the test is
// resilient to either resolveActor branch.
req.Header.Set("X-Actor-Source", "task_token")
req.Header.Set("X-Agent-ID", caller)
req.Header.Set("X-Task-ID", taskID)
w := httptest.NewRecorder()
testHandler.UpdateAgent(w, req)
if w.Code != http.StatusOK {
t.Fatalf("UpdateAgent: expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp AgentResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("decode response: %v", err)
}
// The response contract keeps `mcp_config` always-present so clients
// can distinguish "no config" vs "redacted" via the companion flag.
// `json.RawMessage` of a JSON null decodes to the literal bytes
// `null`, not Go nil — so check for "no secret-bearing content"
// rather than `!= nil`.
if len(resp.McpConfig) > 0 && !bytes.Equal(bytes.TrimSpace(resp.McpConfig), []byte("null")) {
t.Errorf("UpdateAgent response leaked mcp_config to agent actor: %s", string(resp.McpConfig))
}
if !resp.McpConfigRedacted {
t.Errorf("UpdateAgent response should set mcp_config_redacted=true for agent actor")
}
}
// TestUpdateAgent_KeepsMcpConfigForMemberActor is the matching positive
// test — a normal member request (owner/admin) still receives the full
// mcp_config in the mutation response, so the redaction does not
// accidentally regress the legitimate Web admin flow.
func TestUpdateAgent_KeepsMcpConfigForMemberActor(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
target := createHandlerTestAgent(t, "mut-mcp-member", []byte(`{"server":"member-visible"}`))
req := newRequest(http.MethodPut, "/api/agents/"+target, map[string]any{
"description": "owner-visible mutation",
})
req = withURLParam(req, "id", target)
w := httptest.NewRecorder()
testHandler.UpdateAgent(w, req)
if w.Code != http.StatusOK {
t.Fatalf("UpdateAgent: expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp AgentResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("decode response: %v", err)
}
if resp.McpConfig == nil {
t.Errorf("UpdateAgent response should keep mcp_config for member actor; got nil")
}
if resp.McpConfigRedacted {
t.Errorf("UpdateAgent response should NOT mark mcp_config redacted for member actor")
}
}
// TestUpdateAgent_PreservesSkillsInResponse is the regression for #3459:
// updating only description/instructions used to return "skills": []
// because the handler skipped the skill reload that GetAgent does. The
// DB row was always preserved; the response just lied about it, which
// scared users into manually re-running `agent skills set` and risked
// scripted clients writing the empty set back. We assert (a) the
// response carries the bound skills, (b) the DB row is unchanged, and
// (c) GetAgent reports the same shape so the two endpoints don't drift.
func TestUpdateAgent_PreservesSkillsInResponse(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "update-preserves-skills-agent", nil)
skillA := insertHandlerTestSkill(t, "update-preserve-a", "alpha body")
skillB := insertHandlerTestSkill(t, "update-preserve-b", "beta body")
for _, sid := range []string{skillA, skillB} {
if _, err := testPool.Exec(ctx,
`INSERT INTO agent_skill (agent_id, skill_id) VALUES ($1, $2)`,
agentID, sid,
); err != nil {
t.Fatalf("attach skill %s: %v", sid, err)
}
}
req := newRequest(http.MethodPut, "/api/agents/"+agentID, map[string]any{
"description": "metadata-only update",
})
req = withURLParam(req, "id", agentID)
w := httptest.NewRecorder()
testHandler.UpdateAgent(w, req)
if w.Code != http.StatusOK {
t.Fatalf("UpdateAgent: expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp AgentResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("decode response: %v", err)
}
gotIDs := map[string]bool{}
for _, s := range resp.Skills {
gotIDs[s.ID] = true
}
for _, want := range []string{skillA, skillB} {
if !gotIDs[want] {
t.Errorf("UpdateAgent response missing skill %s; got %+v", want, resp.Skills)
}
}
// Defence in depth: the junction table must be untouched too. Without
// this check a future regression that DOES wipe agent_skill rows but
// reloads them into the response would silently pass.
var rowCount int
if err := testPool.QueryRow(ctx,
`SELECT COUNT(*) FROM agent_skill WHERE agent_id = $1`,
agentID,
).Scan(&rowCount); err != nil {
t.Fatalf("count agent_skill: %v", err)
}
if rowCount != 2 {
t.Errorf("agent_skill row count: expected 2, got %d", rowCount)
}
// GetAgent must agree with UpdateAgent on the skill list — otherwise
// CLI users will see one shape from the mutation and a different one
// on the very next read.
getReq := newRequest(http.MethodGet, "/api/agents/"+agentID, nil)
getReq = withURLParam(getReq, "id", agentID)
getW := httptest.NewRecorder()
testHandler.GetAgent(getW, getReq)
if getW.Code != http.StatusOK {
t.Fatalf("GetAgent: expected 200, got %d: %s", getW.Code, getW.Body.String())
}
var getResp AgentResponse
if err := json.NewDecoder(getW.Body).Decode(&getResp); err != nil {
t.Fatalf("decode GetAgent: %v", err)
}
if len(getResp.Skills) != len(resp.Skills) {
t.Errorf("GetAgent skill count %d != UpdateAgent skill count %d",
len(getResp.Skills), len(resp.Skills))
}
}
// TestArchiveRestoreAgent_PreservesSkillsInResponse is the sister
// regression for #3459: ArchiveAgent / RestoreAgent share the same
// agentToResponse path as UpdateAgent and previously also returned
// "skills": [] regardless of what was in the junction table. The
// archive/restore broadcasts are the only place where mobile clients
// learn about state flips, so an empty skills array there would propagate
// to every connected client until the next refetch.
func TestArchiveRestoreAgent_PreservesSkillsInResponse(t *testing.T) {
if testHandler == nil {
t.Skip("database not available")
}
ctx := context.Background()
agentID := createHandlerTestAgent(t, "archive-preserves-skills-agent", nil)
skillID := insertHandlerTestSkill(t, "archive-preserve", "body")
if _, err := testPool.Exec(ctx,
`INSERT INTO agent_skill (agent_id, skill_id) VALUES ($1, $2)`,
agentID, skillID,
); err != nil {
t.Fatalf("attach skill: %v", err)
}
archiveReq := newRequest(http.MethodPost, "/api/agents/"+agentID+"/archive", nil)
archiveReq = withURLParam(archiveReq, "id", agentID)
archiveW := httptest.NewRecorder()
testHandler.ArchiveAgent(archiveW, archiveReq)
if archiveW.Code != http.StatusOK {
t.Fatalf("ArchiveAgent: expected 200, got %d: %s", archiveW.Code, archiveW.Body.String())
}
var archived AgentResponse
if err := json.NewDecoder(archiveW.Body).Decode(&archived); err != nil {
t.Fatalf("decode archive: %v", err)
}
if len(archived.Skills) != 1 || archived.Skills[0].ID != skillID {
t.Errorf("ArchiveAgent: expected 1 skill %s, got %+v", skillID, archived.Skills)
}
restoreReq := newRequest(http.MethodPost, "/api/agents/"+agentID+"/restore", nil)
restoreReq = withURLParam(restoreReq, "id", agentID)
restoreW := httptest.NewRecorder()
testHandler.RestoreAgent(restoreW, restoreReq)
if restoreW.Code != http.StatusOK {
t.Fatalf("RestoreAgent: expected 200, got %d: %s", restoreW.Code, restoreW.Body.String())
}
var restored AgentResponse
if err := json.NewDecoder(restoreW.Body).Decode(&restored); err != nil {
t.Fatalf("decode restore: %v", err)
}
if len(restored.Skills) != 1 || restored.Skills[0].ID != skillID {
t.Errorf("RestoreAgent: expected 1 skill %s, got %+v", skillID, restored.Skills)
}
}
// insertHandlerTestTask creates an in_progress task for the given
// agent so resolveActor's GetAgentTask lookup succeeds without
// dragging the full TaskService into the test.
func insertHandlerTestTask(t *testing.T, agentID string) string {
t.Helper()
ctx := context.Background()
var taskID string
if err := testPool.QueryRow(ctx, `
INSERT INTO agent_task_queue (agent_id, runtime_id, status, priority)
VALUES ($1, $2, 'running', 0)
RETURNING id
`, agentID, handlerTestRuntimeID(t)).Scan(&taskID); err != nil {
t.Fatalf("insert test task: %v", err)
}
t.Cleanup(func() {
testPool.Exec(ctx, `DELETE FROM agent_task_queue WHERE id = $1`, taskID)
})
return taskID
}
// Defence-in-depth: spot-check that the package compiles a small
// fmt.Sprintf so accidental imports stay tidy.
var _ = fmt.Sprintf