Files
multica/server/pkg/db/queries/runtime.sql
Jiayuan Zhang ff5f6ac2ee fix(daemon): prevent duplicate runtime registration on profile switch (#906)
* fix(daemon): prevent duplicate runtime registration on profile switch

The daemon_id included a profile name suffix (e.g. "hostname-staging"),
so switching profiles created a new daemon_id that bypassed the UPSERT
dedup constraint, leaving orphaned runtime records in the database.

Three changes:
- Remove profile suffix from daemon_id — use stable hostname only.
  The unique constraint (workspace_id, daemon_id, provider) already
  prevents collisions within the same workspace.
- Auto-migrate agents from old offline runtimes to the newly registered
  runtime during DaemonRegister (same workspace/provider/owner).
- Add TTL-based GC in the runtime sweeper to delete offline runtimes
  with no active agents after 7 days.

Closes MUL-695

* fix(daemon): address code review issues on PR #906

1. Move gcRuntimes() to the main sweep loop — previously it was inside
   sweepStaleRuntimes() after an early return, so it only ran when new
   runtimes were marked stale. Now it runs every sweep cycle independently.

2. Fix DeleteStaleOfflineRuntimes to exclude runtimes with ANY agent
   reference (not just active ones). The FK agent.runtime_id is ON DELETE
   RESTRICT, so archived agents also block deletion.

3. Scope MigrateAgentsToRuntime to the same machine by matching
   daemon_id LIKE '<current_daemon_id>-%'. This prevents cross-machine
   agent migration when the same user has multiple devices.
2026-04-14 01:52:34 +08:00

110 lines
3.6 KiB
SQL

-- name: ListAgentRuntimes :many
SELECT * FROM agent_runtime
WHERE workspace_id = $1
ORDER BY created_at ASC;
-- name: GetAgentRuntime :one
SELECT * FROM agent_runtime
WHERE id = $1;
-- name: GetAgentRuntimeForWorkspace :one
SELECT * FROM agent_runtime
WHERE id = $1 AND workspace_id = $2;
-- name: UpsertAgentRuntime :one
INSERT INTO agent_runtime (
workspace_id,
daemon_id,
name,
runtime_mode,
provider,
status,
device_info,
metadata,
owner_id,
last_seen_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, now())
ON CONFLICT (workspace_id, daemon_id, provider)
DO UPDATE SET
name = EXCLUDED.name,
runtime_mode = EXCLUDED.runtime_mode,
status = EXCLUDED.status,
device_info = EXCLUDED.device_info,
metadata = EXCLUDED.metadata,
owner_id = COALESCE(EXCLUDED.owner_id, agent_runtime.owner_id),
last_seen_at = now(),
updated_at = now()
RETURNING *;
-- name: UpdateAgentRuntimeHeartbeat :one
UPDATE agent_runtime
SET status = 'online', last_seen_at = now(), updated_at = now()
WHERE id = $1
RETURNING *;
-- name: SetAgentRuntimeOffline :exec
UPDATE agent_runtime
SET status = 'offline', updated_at = now()
WHERE id = $1;
-- name: MarkStaleRuntimesOffline :many
UPDATE agent_runtime
SET status = 'offline', updated_at = now()
WHERE status = 'online'
AND last_seen_at < now() - make_interval(secs => @stale_seconds::double precision)
RETURNING id, workspace_id;
-- name: FailTasksForOfflineRuntimes :many
-- Marks dispatched/running tasks as failed when their runtime is offline.
-- This cleans up orphaned tasks after a daemon crash or network partition.
UPDATE agent_task_queue
SET status = 'failed', completed_at = now(), error = 'runtime went offline'
WHERE status IN ('dispatched', 'running')
AND runtime_id IN (
SELECT id FROM agent_runtime WHERE status = 'offline'
)
RETURNING id, agent_id, issue_id;
-- name: ListAgentRuntimesByOwner :many
SELECT * FROM agent_runtime
WHERE workspace_id = $1 AND owner_id = $2
ORDER BY created_at ASC;
-- name: DeleteAgentRuntime :exec
DELETE FROM agent_runtime WHERE id = $1;
-- name: CountActiveAgentsByRuntime :one
SELECT count(*) FROM agent WHERE runtime_id = $1 AND archived_at IS NULL;
-- name: DeleteArchivedAgentsByRuntime :exec
DELETE FROM agent WHERE runtime_id = $1 AND archived_at IS NOT NULL;
-- name: MigrateAgentsToRuntime :execrows
-- Migrates agents from stale offline runtimes to the newly registered runtime.
-- Only migrates from runtimes that match the same workspace, provider, owner,
-- AND whose daemon_id starts with the current daemon_id followed by '-'.
-- This scopes migration to old profile-suffixed runtimes from the same machine
-- (e.g. "MacBook-staging" matches daemon_id_prefix "MacBook") without touching
-- runtimes from other machines belonging to the same user.
UPDATE agent
SET runtime_id = @new_runtime_id
WHERE runtime_id IN (
SELECT ar.id FROM agent_runtime ar
WHERE ar.workspace_id = @workspace_id
AND ar.provider = @provider
AND ar.owner_id = @owner_id
AND ar.id != @new_runtime_id
AND ar.status = 'offline'
AND ar.daemon_id LIKE @daemon_id_prefix || '-%'
);
-- name: DeleteStaleOfflineRuntimes :many
-- Deletes runtimes that have been offline for longer than the TTL and have
-- no agents bound (active or archived). The FK constraint on agent.runtime_id
-- is ON DELETE RESTRICT, so we must exclude all agent references.
DELETE FROM agent_runtime
WHERE status = 'offline'
AND last_seen_at < now() - make_interval(secs => @stale_seconds::double precision)
AND id NOT IN (SELECT DISTINCT runtime_id FROM agent)
RETURNING id, workspace_id;