Files
multica/server/internal/daemon/auto_update.go
Bohan Jiang ffba2607aa fix(daemon): default auto-update off for self-host instances (MUL-2381) (#2807)
A self-host operator running a fork of Multica with their own patches would
have their daemon silently upgraded to the upstream GitHub release, clobbering
the fork. Self-host setups also routinely pin to an older server, so a fresh
CLI may no longer talk to it.

Flip the default: auto-update remains opt-in on api.multica.ai and defaults to
off on any other server URL. Either side can override via
MULTICA_DAEMON_AUTO_UPDATE.

Co-authored-by: multica-agent <github@multica.ai>
2026-05-18 18:19:07 +08:00

177 lines
6.6 KiB
Go

package daemon
import (
"context"
"time"
"github.com/multica-ai/multica/server/internal/cli"
)
// Indirections over the real release / version helpers so tests can run the
// auto-update loop deterministically without reaching out to GitHub or
// shelling out to brew/curl. Mirrors the pattern used at the top of daemon.go
// for `isBrewInstall` / `getBrewPrefix` / `matchKnownBrewPrefix`.
var (
fetchLatestRelease = cli.FetchLatestRelease
isReleaseVersion = cli.IsReleaseVersion
isNewerVersion = cli.IsNewerVersion
)
// autoUpdateInitialDelay is how long the loop waits after Run() returns before
// performing its first version check. The daemon has plenty to do at startup
// (auth, register, sync workspaces, kick off heartbeats); we don't want to add
// an outbound HTTPS call to GitHub on top of that. The delay is also short
// enough that a brand-new install with an available update still self-updates
// within a couple of minutes rather than after the full check interval.
var autoUpdateInitialDelay = 2 * time.Minute
// autoUpdateLoop periodically polls GitHub for a newer CLI release and, when
// one is available and the daemon is idle, runs the same brew-or-download
// upgrade path as the server-triggered update. On success it triggers a
// graceful restart into the new binary.
//
// Disabled when:
// - the operator opted out via --no-auto-update / MULTICA_DAEMON_AUTO_UPDATE=false;
// - the daemon points at a self-hosted server (default-off — set
// MULTICA_DAEMON_AUTO_UPDATE=true to opt back in);
// - the daemon was spawned by Desktop (the Electron app owns the binary);
// - the running version doesn't look like a tagged release (dev builds).
//
// Each tick is silent on the happy path of "already on latest" so the log
// stays uncluttered for users who run the daemon for weeks at a time.
func (d *Daemon) autoUpdateLoop(ctx context.Context) {
if !d.cfg.AutoUpdateEnabled {
d.logger.Info("auto-update: disabled")
return
}
if d.cfg.LaunchedBy == "desktop" {
// Desktop ships and replaces the CLI binary itself; self-update would
// be clobbered on the next launch. Stay quiet but don't run.
d.logger.Info("auto-update: skipped (managed by Desktop)")
return
}
if !isReleaseVersion(d.cfg.CLIVersion) {
// Source builds (`make daemon`) and ad-hoc builds report a
// `git describe`-style version; auto-upgrading them to a public
// release would silently downgrade the dev work checked out on the
// machine. Skip and let the developer drive their own version.
d.logger.Info("auto-update: skipped (not a release build)", "version", d.cfg.CLIVersion)
return
}
interval := d.cfg.AutoUpdateCheckInterval
if interval <= 0 {
interval = DefaultAutoUpdateCheckInterval
}
d.logger.Info("auto-update: started", "interval", interval, "current", d.cfg.CLIVersion)
if err := sleepWithContext(ctx, autoUpdateInitialDelay); err != nil {
return
}
d.tryAutoUpdate(ctx)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
d.tryAutoUpdate(ctx)
}
}
}
// tryAutoUpdate runs one check-and-maybe-upgrade cycle. Bails early on any of:
// already updating (server-triggered upgrade in flight), active tasks (defer
// to next tick — we never interrupt running agents), version fetch failure,
// or no newer release. The function never returns an error: a check that
// fails today will be retried at the next tick, and we don't want a transient
// network blip to escalate to a process-level shutdown.
func (d *Daemon) tryAutoUpdate(ctx context.Context) {
if ctx.Err() != nil {
return
}
// Don't race the server-triggered update path. If a manual update from
// the Runtimes page is already in flight, let it finish and re-check next
// tick (by which time we'll either be on the new binary or it failed and
// we can retry).
if d.updating.Load() {
d.logger.Debug("auto-update: skip — update already in progress")
return
}
// Cheap pre-fetch idle check: the release-metadata fetch below makes an
// HTTPS call to GitHub, and there is no point paying that cost (or the
// rate-limit budget) when we already know we are going to defer. A task
// that starts between this load and the barrier check below is caught
// by the strict re-check under claimMu inside trySetClaimBarrier.
if running := d.activeTasks.Load(); running > 0 {
d.logger.Debug("auto-update: skip — tasks running", "active", running)
return
}
release, err := fetchLatestRelease()
if err != nil {
d.logger.Warn("auto-update: fetch latest release failed — will retry", "error", err)
return
}
if release == nil || release.TagName == "" {
return
}
if !isNewerVersion(release.TagName, d.cfg.CLIVersion) {
return
}
// CAS the updating flag so a concurrent server-triggered handleUpdate
// dropped onto a heartbeat tick can't double-fire. Release on every exit
// path before triggerRestart — once that lands, the daemon ctx is
// cancelled and the flag dies with the process.
if !d.updating.CompareAndSwap(false, true) {
d.logger.Debug("auto-update: skip — update already in progress (raced)")
return
}
released := false
defer func() {
if !released {
d.updating.Store(false)
}
}()
// Strict barrier: between the cheap pre-fetch idle check and now the
// release fetch took anywhere from tens of milliseconds (typical) to
// seconds (slow link, GitHub hiccup), plenty of time for a poller to
// claim a fresh task. trySetClaimBarrier checks claimsInFlight +
// activeTasks under claimMu and only flips pauseClaims to true if both
// are zero, so once it returns true we can run the upgrade knowing that
// no in-flight task will be cancelled by triggerRestart.
if !d.trySetClaimBarrier() {
d.logger.Info("auto-update: deferring — task or claim in flight at barrier check")
return
}
barrierReleased := false
defer func() {
if !barrierReleased {
d.releaseClaimBarrier()
}
}()
d.logger.Info("auto-update: newer release available, upgrading",
"current", d.cfg.CLIVersion, "target", release.TagName)
output, err := d.runUpdateFn(release.TagName)
if err != nil {
d.logger.Warn("auto-update: upgrade failed — will retry", "error", err, "output", output)
return
}
d.logger.Info("auto-update: upgrade completed, restarting", "target", release.TagName, "output", output)
// triggerRestart cancels the root context, which causes Run() to return
// and the parent (cmd_daemon.go) to re-exec the new binary. Leave both
// the updating flag and the claim barrier held — process exit is
// imminent and clearing either would open a window for new claims / a
// second auto-update tick to fire mid-shutdown.
released = true
barrierReleased = true
d.triggerRestart()
}