Files
multica/server/internal/handler/runtime_update.go
Bohan Jiang f628e48775 refactor(server): error-returning ParseUUID to prevent silent data loss
* refactor(server): make ParseUUID error-returning to prevent silent data loss (MUL-1410)

util.ParseUUID previously swallowed errors and returned a zero pgtype.UUID
on invalid input. When this zero UUID reached a write query (DELETE/UPDATE),
the SQL matched zero rows and the handler returned 2xx success — producing
silent data corruption. #1661 (DeleteIssue with identifier-style ID) was the
visible symptom; PR #1680 patched that one site, this commit closes the
class of bug.

Changes:

- util.ParseUUID now returns (pgtype.UUID, error). Add util.MustParseUUID
  for trusted round-trips that should panic on invalid input.
- handler/handler.go: parseUUID wrapper now calls MustParseUUID — any
  unguarded user-input string reaching it surfaces as a recovered panic
  (chi middleware.Recoverer → 500) instead of silently corrupting data.
  Add parseUUIDOrBadRequest(w, s, fieldName) for handler entry points.
- Convert every Queries.Delete*/Update* call site reachable from raw user
  input (autopilot, comment, project, skill, skill_file, label, pin,
  attachment, feedback, issue assignee, daemon runtime, workspace) to
  validate UUIDs explicitly with parseUUIDOrBadRequest, returning 400 on
  invalid input. Where a resolved entity.ID is already in scope, write
  queries now use it directly instead of re-parsing the URL string.
- Update getWorkspaceMember + loadIssueForUser to handle invalid UUIDs
  gracefully (404/400 instead of panic).
- Update util/middleware/cmd-level callers (subscriber_listeners,
  notification_listeners, activity_listeners, scope_authorizer,
  middleware/workspace) to use the error-returning API.
- Add server/internal/util/pgx_test.go covering valid/invalid input and
  the MustParseUUID panic contract.
- Add TestDeleteIssueByIdentifier + TestDeleteIssueRejectsInvalidUUID
  regression tests in handler_test.go (the original #1661 bug + the
  invalid-input case).
- Document the handler UUID parsing convention in CLAUDE.md so the rule
  is enforceable in future PR review.

* fix(server): address GPT-Boy review of #1748

P1 fixes from PR #1748 review:

1. Migrate remaining request-boundary UUIDs to parseUUIDOrBadRequest so
   malformed input returns 400 instead of panic/500. Was missing on:
   - issue.go: workspace_id in CreateIssue/ChildIssueProgress/ListIssues/
     SearchIssues/BatchUpdateIssues/BatchDeleteIssues; project_id /
     parent_issue_id / lead_id / assignee_id / assignee_ids / creator_id
     filters; batch issue_ids and assignee/parent/project fields in
     BatchUpdateIssues (skip on bad input via util.ParseUUID, matching
     the existing per-row continue semantics).
   - project.go: project id + workspace_id in GetProject/UpdateProject/
     DeleteProject; lead_id in CreateProject/UpdateProject;
     workspace_id in ListProjects + SearchProjects.
   - handler.go: resolveActor now uses util.ParseUUID for X-Agent-ID /
     X-Task-ID headers; invalid UUID falls back to "member" (matches
     pre-existing semantics) instead of panicking.
   - issue.go: validateAssigneePair returns 400 on invalid workspace_id
     instead of panicking.

2. Fix issue:deleted WS event payloads to emit uuidToString(issue.ID)
   instead of the raw URL string. After an identifier-path delete
   ("MUL-7"), the previous payload would have leaked the identifier to
   subscribers, leaving stale entries in frontend caches that key by
   UUID. Updated DeleteIssue (issue.go:1341) and BatchDeleteIssues
   (issue.go:1641). The slog "issue deleted" log line also now records
   the resolved UUID so logs match the WS payload.

3. Extend TestDeleteIssueByIdentifier to subscribe to the bus and
   assert issue:deleted.payload.issue_id is the resolved UUID, not
   the identifier.

* fix(server): validate remaining reviewed UUID inputs

* fix(server): validate remaining handler UUID inputs

* fix(server): finish request boundary UUID audit

* fix(server): validate remaining request body UUIDs

* fix(server): validate runtime path UUIDs

* fix(server): validate remaining audit UUID inputs

---------

Co-authored-by: Eve <eve@multica.ai>
2026-04-28 14:50:28 +08:00

233 lines
6.2 KiB
Go

package handler
import (
"encoding/json"
"net/http"
"sync"
"time"
"github.com/go-chi/chi/v5"
)
// ---------------------------------------------------------------------------
// In-memory update store
// ---------------------------------------------------------------------------
type UpdateStatus string
const (
UpdatePending UpdateStatus = "pending"
UpdateRunning UpdateStatus = "running"
UpdateCompleted UpdateStatus = "completed"
UpdateFailed UpdateStatus = "failed"
UpdateTimeout UpdateStatus = "timeout"
)
// UpdateRequest represents a pending or completed CLI update request.
type UpdateRequest struct {
ID string `json:"id"`
RuntimeID string `json:"runtime_id"`
Status UpdateStatus `json:"status"`
TargetVersion string `json:"target_version"`
Output string `json:"output,omitempty"`
Error string `json:"error,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// UpdateStore is a thread-safe in-memory store for CLI update requests.
type UpdateStore struct {
mu sync.Mutex
requests map[string]*UpdateRequest // keyed by update ID
}
func NewUpdateStore() *UpdateStore {
return &UpdateStore{
requests: make(map[string]*UpdateRequest),
}
}
func (s *UpdateStore) Create(runtimeID, targetVersion string) (*UpdateRequest, error) {
s.mu.Lock()
defer s.mu.Unlock()
// Clean up old requests (>5 minutes).
for id, req := range s.requests {
if time.Since(req.CreatedAt) > 5*time.Minute {
delete(s.requests, id)
}
}
// Reject if there is already a pending or running update for this runtime.
for _, req := range s.requests {
if req.RuntimeID == runtimeID && (req.Status == UpdatePending || req.Status == UpdateRunning) {
return nil, errUpdateInProgress
}
}
req := &UpdateRequest{
ID: randomID(),
RuntimeID: runtimeID,
Status: UpdatePending,
TargetVersion: targetVersion,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
s.requests[req.ID] = req
return req, nil
}
var errUpdateInProgress = &updateError{msg: "an update is already in progress for this runtime"}
type updateError struct{ msg string }
func (e *updateError) Error() string { return e.msg }
func (s *UpdateStore) Get(id string) *UpdateRequest {
s.mu.Lock()
defer s.mu.Unlock()
req, ok := s.requests[id]
if !ok {
return nil
}
// Check for timeout (both pending and running states).
if (req.Status == UpdatePending || req.Status == UpdateRunning) && time.Since(req.CreatedAt) > 120*time.Second {
req.Status = UpdateTimeout
req.Error = "update did not complete within 120 seconds"
req.UpdatedAt = time.Now()
}
return req
}
// PopPending returns and marks as running the pending update for a runtime.
func (s *UpdateStore) PopPending(runtimeID string) *UpdateRequest {
s.mu.Lock()
defer s.mu.Unlock()
for _, req := range s.requests {
if req.RuntimeID == runtimeID && req.Status == UpdatePending {
req.Status = UpdateRunning
req.UpdatedAt = time.Now()
return req
}
}
return nil
}
func (s *UpdateStore) Complete(id string, output string) {
s.mu.Lock()
defer s.mu.Unlock()
if req, ok := s.requests[id]; ok {
req.Status = UpdateCompleted
req.Output = output
req.UpdatedAt = time.Now()
}
}
func (s *UpdateStore) Fail(id string, errMsg string) {
s.mu.Lock()
defer s.mu.Unlock()
if req, ok := s.requests[id]; ok {
req.Status = UpdateFailed
req.Error = errMsg
req.UpdatedAt = time.Now()
}
}
// ---------------------------------------------------------------------------
// Handlers
// ---------------------------------------------------------------------------
// InitiateUpdate creates a new CLI update request (protected route, called by frontend).
func (h *Handler) InitiateUpdate(w http.ResponseWriter, r *http.Request) {
runtimeID := chi.URLParam(r, "runtimeId")
runtimeUUID, ok := parseUUIDOrBadRequest(w, runtimeID, "runtime_id")
if !ok {
return
}
rt, err := h.Queries.GetAgentRuntime(r.Context(), runtimeUUID)
if err != nil {
writeError(w, http.StatusNotFound, "runtime not found")
return
}
if _, ok := h.requireWorkspaceMember(w, r, uuidToString(rt.WorkspaceID), "runtime not found"); !ok {
return
}
var req struct {
TargetVersion string `json:"target_version"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid request body")
return
}
if req.TargetVersion == "" {
writeError(w, http.StatusBadRequest, "target_version is required")
return
}
update, err := h.UpdateStore.Create(uuidToString(rt.ID), req.TargetVersion)
if err != nil {
writeError(w, http.StatusConflict, err.Error())
return
}
writeJSON(w, http.StatusOK, update)
}
// GetUpdate returns the status of an update request (protected route, called by frontend).
func (h *Handler) GetUpdate(w http.ResponseWriter, r *http.Request) {
updateID := chi.URLParam(r, "updateId")
update := h.UpdateStore.Get(updateID)
if update == nil {
writeError(w, http.StatusNotFound, "update not found")
return
}
writeJSON(w, http.StatusOK, update)
}
// ReportUpdateResult receives the update result from the daemon.
func (h *Handler) ReportUpdateResult(w http.ResponseWriter, r *http.Request) {
runtimeID := chi.URLParam(r, "runtimeId")
// Verify the caller owns this runtime's workspace.
if _, ok := h.requireDaemonRuntimeAccess(w, r, runtimeID); !ok {
return
}
updateID := chi.URLParam(r, "updateId")
var req struct {
Status string `json:"status"` // "running", "completed", or "failed"
Output string `json:"output"`
Error string `json:"error"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid request body")
return
}
switch req.Status {
case "completed":
h.UpdateStore.Complete(updateID, req.Output)
case "failed":
h.UpdateStore.Fail(updateID, req.Error)
case "running":
// No-op: status is already "running" from PopPending. This call is
// just a progress signal from the daemon to confirm it received the
// update command and is executing it.
default:
writeError(w, http.StatusBadRequest, "invalid status: "+req.Status)
return
}
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
}