Compare commits

...

3 Commits

Author SHA1 Message Date
Jiang Bohan
814a2e2ac3 fix(cli): stop daemon via HTTP /shutdown instead of console ctrl events
With DETACHED_PROCESS the Windows daemon shares no console with the
stop caller, so `GenerateConsoleCtrlEvent(CTRL_BREAK_EVENT, pid)`
silently never reaches it — the old code would report "stop sent"
while the daemon kept running. Replace the platform-specific
stopDaemonProcess with a cross-platform POST to the daemon's HTTP
/shutdown endpoint, which cancels the same top-level context the
self-restart path already uses. Fall back to `process.Kill()` if
the HTTP call fails.

Also drops the now-unused stopDaemonProcess / CTRL_BREAK_EVENT
wiring, adds handler tests, and updates the DETACHED_PROCESS comment.
2026-04-21 12:41:48 +08:00
Jiang Bohan
c570941bc9 fix(cli): make multica update work while the binary is running on Windows
On Windows, a running .exe is opened without FILE_SHARE_WRITE, so the
previous os.Rename(tmp, exe) always failed with "Access is denied" —
every `multica update` on Windows hit this, because the CLI is
updating its own running binary.

Windows does allow renaming the running .exe (just not overwriting
it), so the new Windows-only replaceBinary moves the running binary
to `.old` first, installs the new one, and restores the original if
installation fails. A best-effort CleanupStaleUpdateArtifacts runs
at CLI/daemon startup to reclaim the leftover `.old` file once the
old process has exited.

Unix keeps the plain rename-over semantics (the old inode stays valid
for the running process).
2026-04-21 12:13:34 +08:00
Jiang Bohan
bfb775fe14 fix(cli): detach daemon from parent console on Windows
CREATE_NEW_PROCESS_GROUP alone leaves the daemon attached to the
parent console, so closing the launching cmd/PowerShell window fires
CTRL_CLOSE_EVENT down the inherited console and takes the daemon
with it. Add DETACHED_PROCESS so the child has no console at all;
stdout/stderr are already redirected to the log file before spawn.
2026-04-21 12:13:14 +08:00
9 changed files with 205 additions and 35 deletions

View File

@@ -360,17 +360,19 @@ func runDaemonRestart(cmd *cobra.Command, args []string) error {
if health["status"] == "running" {
pid, _ := health["pid"].(float64)
if pid > 0 {
if p, err := os.FindProcess(int(pid)); err == nil {
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
_ = stopDaemonProcess(p)
for i := 0; i < 10; i++ {
time.Sleep(500 * time.Millisecond)
sctx, scancel := context.WithTimeout(context.Background(), 1*time.Second)
h := checkDaemonHealthOnPort(sctx, healthPort)
scancel()
if h["status"] != "running" {
break
}
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
if err := requestDaemonShutdown(healthPort); err != nil {
if p, perr := os.FindProcess(int(pid)); perr == nil {
_ = p.Kill()
}
}
for i := 0; i < 10; i++ {
time.Sleep(500 * time.Millisecond)
sctx, scancel := context.WithTimeout(context.Background(), 1*time.Second)
h := checkDaemonHealthOnPort(sctx, healthPort)
scancel()
if h["status"] != "running" {
break
}
}
}
@@ -409,8 +411,17 @@ func runDaemonStop(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("find process %d: %w", int(pid), err)
}
if err := stopDaemonProcess(process); err != nil {
return fmt.Errorf("stop daemon (pid %d): %w", int(pid), err)
// Request graceful shutdown via the daemon's HTTP /shutdown endpoint
// rather than an OS signal. On Windows the daemon is spawned with
// DETACHED_PROCESS so it shares no console with us, which means
// GenerateConsoleCtrlEvent can't reach it; HTTP works on both
// platforms and triggers the same context-cancel path the daemon
// already uses for self-restart.
if err := requestDaemonShutdown(healthPort); err != nil {
fmt.Fprintf(os.Stderr, "Graceful shutdown request failed: %v — falling back to forced kill.\n", err)
if kerr := process.Kill(); kerr != nil {
return fmt.Errorf("kill daemon (pid %d): %w", int(pid), kerr)
}
}
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
@@ -432,6 +443,27 @@ func runDaemonStop(cmd *cobra.Command, _ []string) error {
return nil
}
// requestDaemonShutdown POSTs to the daemon's /shutdown endpoint to ask it
// to exit gracefully. Returns an error if the request could not be delivered
// (network error, non-2xx status, or the endpoint predates this change).
func requestDaemonShutdown(healthPort int) error {
url := fmt.Sprintf("http://127.0.0.1:%d/shutdown", healthPort)
req, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {
return err
}
client := &http.Client{Timeout: 2 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("unexpected status %d", resp.StatusCode)
}
return nil
}
// --- daemon status ---
func runDaemonStatus(cmd *cobra.Command, _ []string) error {

View File

@@ -15,10 +15,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{Setsid: true}
}
func stopDaemonProcess(process *os.Process) error {
return process.Signal(syscall.SIGTERM)
}
func notifyShutdownContext(parent context.Context) (context.Context, context.CancelFunc) {
return signal.NotifyContext(parent, syscall.SIGINT, syscall.SIGTERM)
}

View File

@@ -12,30 +12,25 @@ import (
)
const (
createNewProcessGroup = 0x00000200
ctrlBreakEvent = 1
sigBreak = syscall.Signal(0x15)
detachedProcess = 0x00000008
sigBreak = syscall.Signal(0x15)
)
// daemonSysProcAttr returns the attributes used when spawning the background
// daemon. DETACHED_PROCESS severs the inherited console so closing the parent
// cmd/PowerShell window no longer propagates CTRL_CLOSE_EVENT to the daemon.
// Because the detached daemon shares no console with the stop caller,
// `daemon stop` talks to it via the HTTP /shutdown endpoint rather than
// GenerateConsoleCtrlEvent. The daemon's stdout/stderr are already
// redirected to the log file before Start() is called, so losing the
// console is safe.
func daemonSysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
HideWindow: true,
CreationFlags: createNewProcessGroup,
CreationFlags: detachedProcess,
}
}
func stopDaemonProcess(process *os.Process) error {
// Try graceful shutdown via CTRL_BREAK_EVENT first.
// The daemon's process group ID matches its PID (CREATE_NEW_PROCESS_GROUP).
dll := syscall.NewLazyDLL("kernel32.dll")
generateCtrlEvent := dll.NewProc("GenerateConsoleCtrlEvent")
ret, _, _ := generateCtrlEvent.Call(uintptr(ctrlBreakEvent), uintptr(process.Pid))
if ret != 0 {
return nil
}
return process.Kill()
}
func notifyShutdownContext(parent context.Context) (context.Context, context.CancelFunc) {
return signal.NotifyContext(parent, os.Interrupt, sigBreak)
}

View File

@@ -6,6 +6,8 @@ import (
"runtime"
"github.com/spf13/cobra"
"github.com/multica-ai/multica/server/internal/cli"
)
var (
@@ -73,6 +75,7 @@ func init() {
}
func main() {
cli.CleanupStaleUpdateArtifacts()
if err := rootCmd.Execute(); err != nil {
if err != errSilent {
fmt.Fprintln(os.Stderr, "Error:", err)

View File

@@ -172,8 +172,9 @@ func UpdateViaDownload(targetVersion string) (string, error) {
return "", fmt.Errorf("chmod temp file: %w", err)
}
// Replace the original binary.
if err := os.Rename(tmpPath, exePath); err != nil {
// Replace the original binary. On Windows this moves the running executable
// aside first; on Unix a plain rename over the running inode is fine.
if err := replaceBinary(tmpPath, exePath); err != nil {
os.Remove(tmpPath)
return "", fmt.Errorf("replace binary: %w", err)
}

View File

@@ -0,0 +1,16 @@
//go:build !windows
package cli
import "os"
// replaceBinary swaps the running executable for the freshly-downloaded one.
// On Unix, the kernel keeps the old inode alive for the running process, so a
// plain rename is safe.
func replaceBinary(tmpPath, exePath string) error {
return os.Rename(tmpPath, exePath)
}
// CleanupStaleUpdateArtifacts is a no-op on Unix — there are no sidecar files
// to reclaim.
func CleanupStaleUpdateArtifacts() {}

View File

@@ -0,0 +1,60 @@
//go:build windows
package cli
import (
"fmt"
"os"
"path/filepath"
)
// oldBinarySuffix is appended to the previous executable while a new one is
// being installed. Windows refuses to overwrite a running .exe but allows
// renaming it, so we shuffle the running binary out of the way first.
const oldBinarySuffix = ".old"
// replaceBinary swaps the running executable for the freshly-downloaded one.
// Windows holds an exclusive handle on a running .exe, so the rename-over
// pattern used on Unix fails with "Access is denied". Instead:
// 1. Clear any stale leftover from a previous update.
// 2. Move the running executable aside to exePath+".old".
// 3. Rename the new binary into place.
// 4. If step 3 fails, restore the original so the user isn't stranded.
//
// The leftover .old file is cleaned up on next startup via
// CleanupStaleUpdateArtifacts.
func replaceBinary(tmpPath, exePath string) error {
oldPath := exePath + oldBinarySuffix
// Best-effort cleanup; if this fails (file still locked) the next Rename
// will surface a useful error.
_ = os.Remove(oldPath)
if err := os.Rename(exePath, oldPath); err != nil {
return fmt.Errorf("move running binary aside: %w", err)
}
if err := os.Rename(tmpPath, exePath); err != nil {
// Restore so the user isn't left without a multica.exe.
if rerr := os.Rename(oldPath, exePath); rerr != nil {
return fmt.Errorf("install new binary: %w (and failed to restore: %v)", err, rerr)
}
return fmt.Errorf("install new binary: %w", err)
}
return nil
}
// CleanupStaleUpdateArtifacts removes leftover `.old` binaries from previous
// updates. Windows can't delete a running .exe, so a prior update may have
// left one behind; once the user restarts, this call reclaims the space.
func CleanupStaleUpdateArtifacts() {
exePath, err := os.Executable()
if err != nil {
return
}
if resolved, err := filepath.EvalSymlinks(exePath); err == nil {
exePath = resolved
}
_ = os.Remove(exePath + oldBinarySuffix)
}

View File

@@ -89,11 +89,34 @@ func (d *Daemon) healthHandler(startedAt time.Time) http.HandlerFunc {
}
}
// shutdownHandler triggers a graceful daemon shutdown by cancelling the
// top-level context. Used by `multica daemon stop` so we don't depend on
// OS-signal delivery, which is unreliable on Windows once the daemon is
// spawned with DETACHED_PROCESS (no shared console with the stop caller).
// The listener is bound to 127.0.0.1 only, so only local processes can hit
// this endpoint.
func (d *Daemon) shutdownHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{"status": "shutting down"})
if d.cancelFunc != nil {
// Cancel asynchronously so the response flushes first; otherwise
// srv.Close() races with the writer.
go d.cancelFunc()
}
}
}
// serveHealth runs the health HTTP server on the given listener.
// Blocks until ctx is cancelled.
func (d *Daemon) serveHealth(ctx context.Context, ln net.Listener, startedAt time.Time) {
mux := http.NewServeMux()
mux.HandleFunc("/health", d.healthHandler(startedAt))
mux.HandleFunc("/shutdown", d.shutdownHandler())
mux.HandleFunc("/repo/checkout", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {

View File

@@ -1,6 +1,7 @@
package daemon
import (
"context"
"encoding/json"
"log/slog"
"net/http"
@@ -86,6 +87,49 @@ func TestHealthHandlerActiveTaskCountTracksCounter(t *testing.T) {
assertActiveTaskCount(t, handler, 0)
}
func TestShutdownHandlerPostCancelsDaemonContext(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
d := &Daemon{cancelFunc: cancel}
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/shutdown", nil)
d.shutdownHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", rec.Code)
}
select {
case <-ctx.Done():
case <-time.After(time.Second):
t.Fatal("daemon context was not cancelled after POST /shutdown")
}
}
func TestShutdownHandlerRejectsNonPost(t *testing.T) {
t.Parallel()
cancelled := false
d := &Daemon{cancelFunc: func() { cancelled = true }}
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/shutdown", nil)
d.shutdownHandler().ServeHTTP(rec, req)
if rec.Code != http.StatusMethodNotAllowed {
t.Fatalf("expected 405, got %d", rec.Code)
}
// Give the handler's deferred cancel goroutine a moment to fire
// in case a bug causes it to run anyway.
time.Sleep(10 * time.Millisecond)
if cancelled {
t.Fatal("GET request should not trigger cancellation")
}
}
func assertActiveTaskCount(t *testing.T, h http.HandlerFunc, want int64) {
t.Helper()
rec := httptest.NewRecorder()