mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-24 16:09:19 +02:00
Compare commits
3 Commits
v0.3.28
...
agent/j/7e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
814a2e2ac3 | ||
|
|
c570941bc9 | ||
|
|
bfb775fe14 |
@@ -360,17 +360,19 @@ func runDaemonRestart(cmd *cobra.Command, args []string) error {
|
||||
if health["status"] == "running" {
|
||||
pid, _ := health["pid"].(float64)
|
||||
if pid > 0 {
|
||||
if p, err := os.FindProcess(int(pid)); err == nil {
|
||||
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
|
||||
_ = stopDaemonProcess(p)
|
||||
for i := 0; i < 10; i++ {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
sctx, scancel := context.WithTimeout(context.Background(), 1*time.Second)
|
||||
h := checkDaemonHealthOnPort(sctx, healthPort)
|
||||
scancel()
|
||||
if h["status"] != "running" {
|
||||
break
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
|
||||
if err := requestDaemonShutdown(healthPort); err != nil {
|
||||
if p, perr := os.FindProcess(int(pid)); perr == nil {
|
||||
_ = p.Kill()
|
||||
}
|
||||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
sctx, scancel := context.WithTimeout(context.Background(), 1*time.Second)
|
||||
h := checkDaemonHealthOnPort(sctx, healthPort)
|
||||
scancel()
|
||||
if h["status"] != "running" {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -409,8 +411,17 @@ func runDaemonStop(cmd *cobra.Command, _ []string) error {
|
||||
return fmt.Errorf("find process %d: %w", int(pid), err)
|
||||
}
|
||||
|
||||
if err := stopDaemonProcess(process); err != nil {
|
||||
return fmt.Errorf("stop daemon (pid %d): %w", int(pid), err)
|
||||
// Request graceful shutdown via the daemon's HTTP /shutdown endpoint
|
||||
// rather than an OS signal. On Windows the daemon is spawned with
|
||||
// DETACHED_PROCESS so it shares no console with us, which means
|
||||
// GenerateConsoleCtrlEvent can't reach it; HTTP works on both
|
||||
// platforms and triggers the same context-cancel path the daemon
|
||||
// already uses for self-restart.
|
||||
if err := requestDaemonShutdown(healthPort); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Graceful shutdown request failed: %v — falling back to forced kill.\n", err)
|
||||
if kerr := process.Kill(); kerr != nil {
|
||||
return fmt.Errorf("kill daemon (pid %d): %w", int(pid), kerr)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "Stopping daemon (pid %d)...\n", int(pid))
|
||||
@@ -432,6 +443,27 @@ func runDaemonStop(cmd *cobra.Command, _ []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// requestDaemonShutdown POSTs to the daemon's /shutdown endpoint to ask it
|
||||
// to exit gracefully. Returns an error if the request could not be delivered
|
||||
// (network error, non-2xx status, or the endpoint predates this change).
|
||||
func requestDaemonShutdown(healthPort int) error {
|
||||
url := fmt.Sprintf("http://127.0.0.1:%d/shutdown", healthPort)
|
||||
req, err := http.NewRequest(http.MethodPost, url, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client := &http.Client{Timeout: 2 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("unexpected status %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- daemon status ---
|
||||
|
||||
func runDaemonStatus(cmd *cobra.Command, _ []string) error {
|
||||
|
||||
@@ -15,10 +15,6 @@ func daemonSysProcAttr() *syscall.SysProcAttr {
|
||||
return &syscall.SysProcAttr{Setsid: true}
|
||||
}
|
||||
|
||||
func stopDaemonProcess(process *os.Process) error {
|
||||
return process.Signal(syscall.SIGTERM)
|
||||
}
|
||||
|
||||
func notifyShutdownContext(parent context.Context) (context.Context, context.CancelFunc) {
|
||||
return signal.NotifyContext(parent, syscall.SIGINT, syscall.SIGTERM)
|
||||
}
|
||||
|
||||
@@ -12,30 +12,25 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
createNewProcessGroup = 0x00000200
|
||||
ctrlBreakEvent = 1
|
||||
sigBreak = syscall.Signal(0x15)
|
||||
detachedProcess = 0x00000008
|
||||
sigBreak = syscall.Signal(0x15)
|
||||
)
|
||||
|
||||
// daemonSysProcAttr returns the attributes used when spawning the background
|
||||
// daemon. DETACHED_PROCESS severs the inherited console so closing the parent
|
||||
// cmd/PowerShell window no longer propagates CTRL_CLOSE_EVENT to the daemon.
|
||||
// Because the detached daemon shares no console with the stop caller,
|
||||
// `daemon stop` talks to it via the HTTP /shutdown endpoint rather than
|
||||
// GenerateConsoleCtrlEvent. The daemon's stdout/stderr are already
|
||||
// redirected to the log file before Start() is called, so losing the
|
||||
// console is safe.
|
||||
func daemonSysProcAttr() *syscall.SysProcAttr {
|
||||
return &syscall.SysProcAttr{
|
||||
HideWindow: true,
|
||||
CreationFlags: createNewProcessGroup,
|
||||
CreationFlags: detachedProcess,
|
||||
}
|
||||
}
|
||||
|
||||
func stopDaemonProcess(process *os.Process) error {
|
||||
// Try graceful shutdown via CTRL_BREAK_EVENT first.
|
||||
// The daemon's process group ID matches its PID (CREATE_NEW_PROCESS_GROUP).
|
||||
dll := syscall.NewLazyDLL("kernel32.dll")
|
||||
generateCtrlEvent := dll.NewProc("GenerateConsoleCtrlEvent")
|
||||
ret, _, _ := generateCtrlEvent.Call(uintptr(ctrlBreakEvent), uintptr(process.Pid))
|
||||
if ret != 0 {
|
||||
return nil
|
||||
}
|
||||
return process.Kill()
|
||||
}
|
||||
|
||||
func notifyShutdownContext(parent context.Context) (context.Context, context.CancelFunc) {
|
||||
return signal.NotifyContext(parent, os.Interrupt, sigBreak)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"runtime"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/multica-ai/multica/server/internal/cli"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -73,6 +75,7 @@ func init() {
|
||||
}
|
||||
|
||||
func main() {
|
||||
cli.CleanupStaleUpdateArtifacts()
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
if err != errSilent {
|
||||
fmt.Fprintln(os.Stderr, "Error:", err)
|
||||
|
||||
@@ -172,8 +172,9 @@ func UpdateViaDownload(targetVersion string) (string, error) {
|
||||
return "", fmt.Errorf("chmod temp file: %w", err)
|
||||
}
|
||||
|
||||
// Replace the original binary.
|
||||
if err := os.Rename(tmpPath, exePath); err != nil {
|
||||
// Replace the original binary. On Windows this moves the running executable
|
||||
// aside first; on Unix a plain rename over the running inode is fine.
|
||||
if err := replaceBinary(tmpPath, exePath); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return "", fmt.Errorf("replace binary: %w", err)
|
||||
}
|
||||
|
||||
16
server/internal/cli/update_unix.go
Normal file
16
server/internal/cli/update_unix.go
Normal file
@@ -0,0 +1,16 @@
|
||||
//go:build !windows
|
||||
|
||||
package cli
|
||||
|
||||
import "os"
|
||||
|
||||
// replaceBinary swaps the running executable for the freshly-downloaded one.
|
||||
// On Unix, the kernel keeps the old inode alive for the running process, so a
|
||||
// plain rename is safe.
|
||||
func replaceBinary(tmpPath, exePath string) error {
|
||||
return os.Rename(tmpPath, exePath)
|
||||
}
|
||||
|
||||
// CleanupStaleUpdateArtifacts is a no-op on Unix — there are no sidecar files
|
||||
// to reclaim.
|
||||
func CleanupStaleUpdateArtifacts() {}
|
||||
60
server/internal/cli/update_windows.go
Normal file
60
server/internal/cli/update_windows.go
Normal file
@@ -0,0 +1,60 @@
|
||||
//go:build windows
|
||||
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// oldBinarySuffix is appended to the previous executable while a new one is
|
||||
// being installed. Windows refuses to overwrite a running .exe but allows
|
||||
// renaming it, so we shuffle the running binary out of the way first.
|
||||
const oldBinarySuffix = ".old"
|
||||
|
||||
// replaceBinary swaps the running executable for the freshly-downloaded one.
|
||||
// Windows holds an exclusive handle on a running .exe, so the rename-over
|
||||
// pattern used on Unix fails with "Access is denied". Instead:
|
||||
// 1. Clear any stale leftover from a previous update.
|
||||
// 2. Move the running executable aside to exePath+".old".
|
||||
// 3. Rename the new binary into place.
|
||||
// 4. If step 3 fails, restore the original so the user isn't stranded.
|
||||
//
|
||||
// The leftover .old file is cleaned up on next startup via
|
||||
// CleanupStaleUpdateArtifacts.
|
||||
func replaceBinary(tmpPath, exePath string) error {
|
||||
oldPath := exePath + oldBinarySuffix
|
||||
|
||||
// Best-effort cleanup; if this fails (file still locked) the next Rename
|
||||
// will surface a useful error.
|
||||
_ = os.Remove(oldPath)
|
||||
|
||||
if err := os.Rename(exePath, oldPath); err != nil {
|
||||
return fmt.Errorf("move running binary aside: %w", err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, exePath); err != nil {
|
||||
// Restore so the user isn't left without a multica.exe.
|
||||
if rerr := os.Rename(oldPath, exePath); rerr != nil {
|
||||
return fmt.Errorf("install new binary: %w (and failed to restore: %v)", err, rerr)
|
||||
}
|
||||
return fmt.Errorf("install new binary: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CleanupStaleUpdateArtifacts removes leftover `.old` binaries from previous
|
||||
// updates. Windows can't delete a running .exe, so a prior update may have
|
||||
// left one behind; once the user restarts, this call reclaims the space.
|
||||
func CleanupStaleUpdateArtifacts() {
|
||||
exePath, err := os.Executable()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if resolved, err := filepath.EvalSymlinks(exePath); err == nil {
|
||||
exePath = resolved
|
||||
}
|
||||
_ = os.Remove(exePath + oldBinarySuffix)
|
||||
}
|
||||
@@ -89,11 +89,34 @@ func (d *Daemon) healthHandler(startedAt time.Time) http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// shutdownHandler triggers a graceful daemon shutdown by cancelling the
|
||||
// top-level context. Used by `multica daemon stop` so we don't depend on
|
||||
// OS-signal delivery, which is unreliable on Windows once the daemon is
|
||||
// spawned with DETACHED_PROCESS (no shared console with the stop caller).
|
||||
// The listener is bound to 127.0.0.1 only, so only local processes can hit
|
||||
// this endpoint.
|
||||
func (d *Daemon) shutdownHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "shutting down"})
|
||||
if d.cancelFunc != nil {
|
||||
// Cancel asynchronously so the response flushes first; otherwise
|
||||
// srv.Close() races with the writer.
|
||||
go d.cancelFunc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// serveHealth runs the health HTTP server on the given listener.
|
||||
// Blocks until ctx is cancelled.
|
||||
func (d *Daemon) serveHealth(ctx context.Context, ln net.Listener, startedAt time.Time) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/health", d.healthHandler(startedAt))
|
||||
mux.HandleFunc("/shutdown", d.shutdownHandler())
|
||||
|
||||
mux.HandleFunc("/repo/checkout", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
@@ -86,6 +87,49 @@ func TestHealthHandlerActiveTaskCountTracksCounter(t *testing.T) {
|
||||
assertActiveTaskCount(t, handler, 0)
|
||||
}
|
||||
|
||||
func TestShutdownHandlerPostCancelsDaemonContext(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
d := &Daemon{cancelFunc: cancel}
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/shutdown", nil)
|
||||
d.shutdownHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", rec.Code)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("daemon context was not cancelled after POST /shutdown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShutdownHandlerRejectsNonPost(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cancelled := false
|
||||
d := &Daemon{cancelFunc: func() { cancelled = true }}
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/shutdown", nil)
|
||||
d.shutdownHandler().ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusMethodNotAllowed {
|
||||
t.Fatalf("expected 405, got %d", rec.Code)
|
||||
}
|
||||
// Give the handler's deferred cancel goroutine a moment to fire
|
||||
// in case a bug causes it to run anyway.
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
if cancelled {
|
||||
t.Fatal("GET request should not trigger cancellation")
|
||||
}
|
||||
}
|
||||
|
||||
func assertActiveTaskCount(t *testing.T, h http.HandlerFunc, want int64) {
|
||||
t.Helper()
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
Reference in New Issue
Block a user