mirror of
https://github.com/multica-ai/multica.git
synced 2026-07-05 21:39:54 +02:00
* fix(daemon): prevent duplicate runtime registration on profile switch The daemon_id included a profile name suffix (e.g. "hostname-staging"), so switching profiles created a new daemon_id that bypassed the UPSERT dedup constraint, leaving orphaned runtime records in the database. Three changes: - Remove profile suffix from daemon_id — use stable hostname only. The unique constraint (workspace_id, daemon_id, provider) already prevents collisions within the same workspace. - Auto-migrate agents from old offline runtimes to the newly registered runtime during DaemonRegister (same workspace/provider/owner). - Add TTL-based GC in the runtime sweeper to delete offline runtimes with no active agents after 7 days. Closes MUL-695 * fix(daemon): address code review issues on PR #906 1. Move gcRuntimes() to the main sweep loop — previously it was inside sweepStaleRuntimes() after an early return, so it only ran when new runtimes were marked stale. Now it runs every sweep cycle independently. 2. Fix DeleteStaleOfflineRuntimes to exclude runtimes with ANY agent reference (not just active ones). The FK agent.runtime_id is ON DELETE RESTRICT, so archived agents also block deletion. 3. Scope MigrateAgentsToRuntime to the same machine by matching daemon_id LIKE '<current_daemon_id>-%'. This prevents cross-machine agent migration when the same user has multiple devices.
110 lines
3.6 KiB
SQL
110 lines
3.6 KiB
SQL
-- name: ListAgentRuntimes :many
|
|
SELECT * FROM agent_runtime
|
|
WHERE workspace_id = $1
|
|
ORDER BY created_at ASC;
|
|
|
|
-- name: GetAgentRuntime :one
|
|
SELECT * FROM agent_runtime
|
|
WHERE id = $1;
|
|
|
|
-- name: GetAgentRuntimeForWorkspace :one
|
|
SELECT * FROM agent_runtime
|
|
WHERE id = $1 AND workspace_id = $2;
|
|
|
|
-- name: UpsertAgentRuntime :one
|
|
INSERT INTO agent_runtime (
|
|
workspace_id,
|
|
daemon_id,
|
|
name,
|
|
runtime_mode,
|
|
provider,
|
|
status,
|
|
device_info,
|
|
metadata,
|
|
owner_id,
|
|
last_seen_at
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, now())
|
|
ON CONFLICT (workspace_id, daemon_id, provider)
|
|
DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
runtime_mode = EXCLUDED.runtime_mode,
|
|
status = EXCLUDED.status,
|
|
device_info = EXCLUDED.device_info,
|
|
metadata = EXCLUDED.metadata,
|
|
owner_id = COALESCE(EXCLUDED.owner_id, agent_runtime.owner_id),
|
|
last_seen_at = now(),
|
|
updated_at = now()
|
|
RETURNING *;
|
|
|
|
-- name: UpdateAgentRuntimeHeartbeat :one
|
|
UPDATE agent_runtime
|
|
SET status = 'online', last_seen_at = now(), updated_at = now()
|
|
WHERE id = $1
|
|
RETURNING *;
|
|
|
|
-- name: SetAgentRuntimeOffline :exec
|
|
UPDATE agent_runtime
|
|
SET status = 'offline', updated_at = now()
|
|
WHERE id = $1;
|
|
|
|
-- name: MarkStaleRuntimesOffline :many
|
|
UPDATE agent_runtime
|
|
SET status = 'offline', updated_at = now()
|
|
WHERE status = 'online'
|
|
AND last_seen_at < now() - make_interval(secs => @stale_seconds::double precision)
|
|
RETURNING id, workspace_id;
|
|
|
|
-- name: FailTasksForOfflineRuntimes :many
|
|
-- Marks dispatched/running tasks as failed when their runtime is offline.
|
|
-- This cleans up orphaned tasks after a daemon crash or network partition.
|
|
UPDATE agent_task_queue
|
|
SET status = 'failed', completed_at = now(), error = 'runtime went offline'
|
|
WHERE status IN ('dispatched', 'running')
|
|
AND runtime_id IN (
|
|
SELECT id FROM agent_runtime WHERE status = 'offline'
|
|
)
|
|
RETURNING id, agent_id, issue_id;
|
|
|
|
-- name: ListAgentRuntimesByOwner :many
|
|
SELECT * FROM agent_runtime
|
|
WHERE workspace_id = $1 AND owner_id = $2
|
|
ORDER BY created_at ASC;
|
|
|
|
-- name: DeleteAgentRuntime :exec
|
|
DELETE FROM agent_runtime WHERE id = $1;
|
|
|
|
-- name: CountActiveAgentsByRuntime :one
|
|
SELECT count(*) FROM agent WHERE runtime_id = $1 AND archived_at IS NULL;
|
|
|
|
-- name: DeleteArchivedAgentsByRuntime :exec
|
|
DELETE FROM agent WHERE runtime_id = $1 AND archived_at IS NOT NULL;
|
|
|
|
-- name: MigrateAgentsToRuntime :execrows
|
|
-- Migrates agents from stale offline runtimes to the newly registered runtime.
|
|
-- Only migrates from runtimes that match the same workspace, provider, owner,
|
|
-- AND whose daemon_id starts with the current daemon_id followed by '-'.
|
|
-- This scopes migration to old profile-suffixed runtimes from the same machine
|
|
-- (e.g. "MacBook-staging" matches daemon_id_prefix "MacBook") without touching
|
|
-- runtimes from other machines belonging to the same user.
|
|
UPDATE agent
|
|
SET runtime_id = @new_runtime_id
|
|
WHERE runtime_id IN (
|
|
SELECT ar.id FROM agent_runtime ar
|
|
WHERE ar.workspace_id = @workspace_id
|
|
AND ar.provider = @provider
|
|
AND ar.owner_id = @owner_id
|
|
AND ar.id != @new_runtime_id
|
|
AND ar.status = 'offline'
|
|
AND ar.daemon_id LIKE @daemon_id_prefix || '-%'
|
|
);
|
|
|
|
-- name: DeleteStaleOfflineRuntimes :many
|
|
-- Deletes runtimes that have been offline for longer than the TTL and have
|
|
-- no agents bound (active or archived). The FK constraint on agent.runtime_id
|
|
-- is ON DELETE RESTRICT, so we must exclude all agent references.
|
|
DELETE FROM agent_runtime
|
|
WHERE status = 'offline'
|
|
AND last_seen_at < now() - make_interval(secs => @stale_seconds::double precision)
|
|
AND id NOT IN (SELECT DISTINCT runtime_id FROM agent)
|
|
RETURNING id, workspace_id;
|