mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-17 11:48:42 +02:00
* refactor(server): make ParseUUID error-returning to prevent silent data loss (MUL-1410) util.ParseUUID previously swallowed errors and returned a zero pgtype.UUID on invalid input. When this zero UUID reached a write query (DELETE/UPDATE), the SQL matched zero rows and the handler returned 2xx success — producing silent data corruption. #1661 (DeleteIssue with identifier-style ID) was the visible symptom; PR #1680 patched that one site, this commit closes the class of bug. Changes: - util.ParseUUID now returns (pgtype.UUID, error). Add util.MustParseUUID for trusted round-trips that should panic on invalid input. - handler/handler.go: parseUUID wrapper now calls MustParseUUID — any unguarded user-input string reaching it surfaces as a recovered panic (chi middleware.Recoverer → 500) instead of silently corrupting data. Add parseUUIDOrBadRequest(w, s, fieldName) for handler entry points. - Convert every Queries.Delete*/Update* call site reachable from raw user input (autopilot, comment, project, skill, skill_file, label, pin, attachment, feedback, issue assignee, daemon runtime, workspace) to validate UUIDs explicitly with parseUUIDOrBadRequest, returning 400 on invalid input. Where a resolved entity.ID is already in scope, write queries now use it directly instead of re-parsing the URL string. - Update getWorkspaceMember + loadIssueForUser to handle invalid UUIDs gracefully (404/400 instead of panic). - Update util/middleware/cmd-level callers (subscriber_listeners, notification_listeners, activity_listeners, scope_authorizer, middleware/workspace) to use the error-returning API. - Add server/internal/util/pgx_test.go covering valid/invalid input and the MustParseUUID panic contract. - Add TestDeleteIssueByIdentifier + TestDeleteIssueRejectsInvalidUUID regression tests in handler_test.go (the original #1661 bug + the invalid-input case). - Document the handler UUID parsing convention in CLAUDE.md so the rule is enforceable in future PR review. * fix(server): address GPT-Boy review of #1748 P1 fixes from PR #1748 review: 1. Migrate remaining request-boundary UUIDs to parseUUIDOrBadRequest so malformed input returns 400 instead of panic/500. Was missing on: - issue.go: workspace_id in CreateIssue/ChildIssueProgress/ListIssues/ SearchIssues/BatchUpdateIssues/BatchDeleteIssues; project_id / parent_issue_id / lead_id / assignee_id / assignee_ids / creator_id filters; batch issue_ids and assignee/parent/project fields in BatchUpdateIssues (skip on bad input via util.ParseUUID, matching the existing per-row continue semantics). - project.go: project id + workspace_id in GetProject/UpdateProject/ DeleteProject; lead_id in CreateProject/UpdateProject; workspace_id in ListProjects + SearchProjects. - handler.go: resolveActor now uses util.ParseUUID for X-Agent-ID / X-Task-ID headers; invalid UUID falls back to "member" (matches pre-existing semantics) instead of panicking. - issue.go: validateAssigneePair returns 400 on invalid workspace_id instead of panicking. 2. Fix issue:deleted WS event payloads to emit uuidToString(issue.ID) instead of the raw URL string. After an identifier-path delete ("MUL-7"), the previous payload would have leaked the identifier to subscribers, leaving stale entries in frontend caches that key by UUID. Updated DeleteIssue (issue.go:1341) and BatchDeleteIssues (issue.go:1641). The slog "issue deleted" log line also now records the resolved UUID so logs match the WS payload. 3. Extend TestDeleteIssueByIdentifier to subscribe to the bus and assert issue:deleted.payload.issue_id is the resolved UUID, not the identifier. * fix(server): validate remaining reviewed UUID inputs * fix(server): validate remaining handler UUID inputs * fix(server): finish request boundary UUID audit * fix(server): validate remaining request body UUIDs * fix(server): validate runtime path UUIDs * fix(server): validate remaining audit UUID inputs --------- Co-authored-by: Eve <eve@multica.ai>
266 lines
8.4 KiB
Go
266 lines
8.4 KiB
Go
package handler
|
|
|
|
import (
|
|
"encoding/json"
|
|
"log/slog"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
)
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// In-memory model-list request store
|
|
// ---------------------------------------------------------------------------
|
|
//
|
|
// The server cannot call the daemon directly (the daemon is behind the user's
|
|
// NAT and only polls the server). So "list models for this runtime" uses a
|
|
// pending-request pattern: server creates a pending request, daemon pops it
|
|
// on the next heartbeat, executes locally, and reports the result back.
|
|
|
|
// ModelListStatus represents the lifecycle of a model list request.
|
|
type ModelListStatus string
|
|
|
|
const (
|
|
ModelListPending ModelListStatus = "pending"
|
|
ModelListRunning ModelListStatus = "running"
|
|
ModelListCompleted ModelListStatus = "completed"
|
|
ModelListFailed ModelListStatus = "failed"
|
|
ModelListTimeout ModelListStatus = "timeout"
|
|
)
|
|
|
|
// ModelListRequest represents a pending or completed model list request.
|
|
// Supported is false when the provider ignores per-agent model
|
|
// selection entirely (currently: hermes). The UI uses this to
|
|
// disable its dropdown rather than silently accepting a value the
|
|
// backend will drop.
|
|
type ModelListRequest struct {
|
|
ID string `json:"id"`
|
|
RuntimeID string `json:"runtime_id"`
|
|
Status ModelListStatus `json:"status"`
|
|
Models []ModelEntry `json:"models,omitempty"`
|
|
Supported bool `json:"supported"`
|
|
Error string `json:"error,omitempty"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
}
|
|
|
|
// ModelEntry mirrors agent.Model for the wire. `Default` tags the
|
|
// model the runtime advertises as its preferred pick (e.g. Claude
|
|
// Code's shipped default, or hermes' currentModelId) so the UI can
|
|
// badge it — don't drop it when marshalling.
|
|
type ModelEntry struct {
|
|
ID string `json:"id"`
|
|
Label string `json:"label"`
|
|
Provider string `json:"provider,omitempty"`
|
|
Default bool `json:"default,omitempty"`
|
|
}
|
|
|
|
const (
|
|
// modelListPendingTimeout bounds how long a pending request can sit in
|
|
// the store before the UI is told "daemon didn't pick this up".
|
|
modelListPendingTimeout = 30 * time.Second
|
|
// modelListRunningTimeout bounds how long a claimed (running) request
|
|
// can stay claimed before the UI is told "daemon picked this up but
|
|
// never reported a result". This matters when the heartbeat response
|
|
// carrying `pending_model_list` is lost in transit (e.g. HTTP client
|
|
// timeout after PopPending already mutated store state): without this
|
|
// transition the UI would keep polling a record that is stuck in
|
|
// `running` until the 2-minute memory GC sweeps it.
|
|
modelListRunningTimeout = 60 * time.Second
|
|
)
|
|
|
|
// ModelListStore is a thread-safe in-memory store. Entries expire after 2 min
|
|
// to bound memory use; the UI polls /requests/:id until status is terminal.
|
|
type ModelListStore struct {
|
|
mu sync.Mutex
|
|
requests map[string]*ModelListRequest
|
|
}
|
|
|
|
func NewModelListStore() *ModelListStore {
|
|
return &ModelListStore{requests: make(map[string]*ModelListRequest)}
|
|
}
|
|
|
|
func (s *ModelListStore) Create(runtimeID string) *ModelListRequest {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// Garbage-collect stale entries so the map can't grow unbounded.
|
|
for id, req := range s.requests {
|
|
if time.Since(req.CreatedAt) > 2*time.Minute {
|
|
delete(s.requests, id)
|
|
}
|
|
}
|
|
|
|
req := &ModelListRequest{
|
|
ID: randomID(),
|
|
RuntimeID: runtimeID,
|
|
Status: ModelListPending,
|
|
// Default to true; the daemon overrides this in the report
|
|
// for providers that don't support per-agent model selection.
|
|
Supported: true,
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
s.requests[req.ID] = req
|
|
return req
|
|
}
|
|
|
|
func (s *ModelListStore) Get(id string) *ModelListRequest {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
req, ok := s.requests[id]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
applyModelListTimeout(req, time.Now())
|
|
return req
|
|
}
|
|
|
|
// applyModelListTimeout transitions a request to ModelListTimeout when it has
|
|
// been stuck in a non-terminal state past its threshold. The pending threshold
|
|
// catches "daemon never picked this up"; the running threshold catches
|
|
// "daemon picked it up but the result report was lost" — previously the only
|
|
// escape from running was the 2-minute memory GC, which exceeded the UI's
|
|
// polling window and surfaced as a silent discovery failure.
|
|
func applyModelListTimeout(req *ModelListRequest, now time.Time) {
|
|
switch req.Status {
|
|
case ModelListPending:
|
|
if now.Sub(req.CreatedAt) > modelListPendingTimeout {
|
|
req.Status = ModelListTimeout
|
|
req.Error = "daemon did not respond within 30 seconds"
|
|
req.UpdatedAt = now
|
|
}
|
|
case ModelListRunning:
|
|
if now.Sub(req.UpdatedAt) > modelListRunningTimeout {
|
|
req.Status = ModelListTimeout
|
|
req.Error = "daemon did not finish within 60 seconds"
|
|
req.UpdatedAt = now
|
|
}
|
|
}
|
|
}
|
|
|
|
// PopPending returns and marks-running the oldest pending request for a runtime.
|
|
func (s *ModelListStore) PopPending(runtimeID string) *ModelListRequest {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
var oldest *ModelListRequest
|
|
for _, req := range s.requests {
|
|
if req.RuntimeID == runtimeID && req.Status == ModelListPending {
|
|
if oldest == nil || req.CreatedAt.Before(oldest.CreatedAt) {
|
|
oldest = req
|
|
}
|
|
}
|
|
}
|
|
if oldest != nil {
|
|
oldest.Status = ModelListRunning
|
|
oldest.UpdatedAt = time.Now()
|
|
}
|
|
return oldest
|
|
}
|
|
|
|
func (s *ModelListStore) Complete(id string, models []ModelEntry, supported bool) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if req, ok := s.requests[id]; ok {
|
|
req.Status = ModelListCompleted
|
|
req.Models = models
|
|
req.Supported = supported
|
|
req.UpdatedAt = time.Now()
|
|
}
|
|
}
|
|
|
|
func (s *ModelListStore) Fail(id string, errMsg string) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if req, ok := s.requests[id]; ok {
|
|
req.Status = ModelListFailed
|
|
req.Error = errMsg
|
|
req.UpdatedAt = time.Now()
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Handlers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// InitiateListModels creates a pending model list request for a runtime.
|
|
// Called by the frontend; the daemon picks it up on its next heartbeat.
|
|
func (h *Handler) InitiateListModels(w http.ResponseWriter, r *http.Request) {
|
|
runtimeID := chi.URLParam(r, "runtimeId")
|
|
runtimeUUID, ok := parseUUIDOrBadRequest(w, runtimeID, "runtime_id")
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
rt, err := h.Queries.GetAgentRuntime(r.Context(), runtimeUUID)
|
|
if err != nil {
|
|
writeError(w, http.StatusNotFound, "runtime not found")
|
|
return
|
|
}
|
|
if _, ok := h.requireWorkspaceMember(w, r, uuidToString(rt.WorkspaceID), "runtime not found"); !ok {
|
|
return
|
|
}
|
|
if rt.Status != "online" {
|
|
writeError(w, http.StatusServiceUnavailable, "runtime is offline")
|
|
return
|
|
}
|
|
|
|
req := h.ModelListStore.Create(uuidToString(rt.ID))
|
|
writeJSON(w, http.StatusOK, req)
|
|
}
|
|
|
|
// GetModelListRequest returns the status of a model list request.
|
|
func (h *Handler) GetModelListRequest(w http.ResponseWriter, r *http.Request) {
|
|
requestID := chi.URLParam(r, "requestId")
|
|
|
|
req := h.ModelListStore.Get(requestID)
|
|
if req == nil {
|
|
writeError(w, http.StatusNotFound, "request not found")
|
|
return
|
|
}
|
|
writeJSON(w, http.StatusOK, req)
|
|
}
|
|
|
|
// ReportModelListResult receives the list result from the daemon.
|
|
func (h *Handler) ReportModelListResult(w http.ResponseWriter, r *http.Request) {
|
|
runtimeID := chi.URLParam(r, "runtimeId")
|
|
|
|
if _, ok := h.requireDaemonRuntimeAccess(w, r, runtimeID); !ok {
|
|
return
|
|
}
|
|
|
|
requestID := chi.URLParam(r, "requestId")
|
|
|
|
var body struct {
|
|
Status string `json:"status"` // "completed" or "failed"
|
|
Models []ModelEntry `json:"models"`
|
|
Supported *bool `json:"supported"`
|
|
Error string `json:"error"`
|
|
}
|
|
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
|
return
|
|
}
|
|
|
|
if body.Status == "completed" {
|
|
// Older daemons may omit `supported`; default to true to keep
|
|
// the UI usable while they haven't been redeployed yet.
|
|
supported := true
|
|
if body.Supported != nil {
|
|
supported = *body.Supported
|
|
}
|
|
h.ModelListStore.Complete(requestID, body.Models, supported)
|
|
} else {
|
|
h.ModelListStore.Fail(requestID, body.Error)
|
|
}
|
|
|
|
slog.Debug("model list report", "runtime_id", runtimeID, "request_id", requestID, "status", body.Status, "count", len(body.Models))
|
|
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
|
}
|