From cb70fbe79620ff9f3dcc28ce83caf1a529e9366f Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 23 Apr 2026 17:25:33 +0800 Subject: [PATCH] =?UTF-8?q?fix(daemon/gc):=20address=20review=20=E2=80=94?= =?UTF-8?q?=20404=20safety=20+=20decimal/overflow=20in=20duration=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues flagged in PR review: 1. 404-immediate-clean is unsafe. The /gc-check endpoint returns 404 for both "issue deleted" AND "daemon token has no access to the workspace" (anti-enumeration, see requireDaemonWorkspaceAccess). Clean-on-404 would let a scoped-down daemon token wipe taskDirs whose issues are still live. Restore the mtime gate against GCOrphanTTL. With the new 72h default we still shrink the original 30d window dramatically without the cross-workspace hazard. Lock the behavior in with a new test that asserts a recent 404 is skipped. 2. parseFlexDuration mishandled decimals and swallowed Atoi errors: "0.5d" → 7m12s (regex matched only the "5d"), "1.5d" → 1h7m12s, and 20+ digit day values Atoi-errored silently to 0. Match the full decimal number with `\d*\.\d+|\d+` and parse with ParseFloat so fractional days and oversized inputs both go through time.ParseDuration correctly — fractions as sub-hour durations, overflow as a returned error. --- server/internal/daemon/gc.go | 17 ++++++++++--- server/internal/daemon/gc_test.go | 35 +++++++++++++++++++++++--- server/internal/daemon/helpers.go | 23 ++++++++++++----- server/internal/daemon/helpers_test.go | 13 +++++++++- 4 files changed, 73 insertions(+), 15 deletions(-) diff --git a/server/internal/daemon/gc.go b/server/internal/daemon/gc.go index 6081b5589..a73da6c08 100644 --- a/server/internal/daemon/gc.go +++ b/server/internal/daemon/gc.go @@ -140,10 +140,19 @@ func (d *Daemon) shouldCleanTaskDir(ctx context.Context, taskDir string) gcActio if err != nil { var reqErr *requestError if errors.As(err, &reqErr) && reqErr.StatusCode == http.StatusNotFound { - // Issue deleted — clean immediately. The issue row is gone from the - // server, so there's nothing left to protect with a grace period. - d.logger.Info("gc: orphan directory (issue deleted)", "dir", taskDir, "issue", meta.IssueID) - return gcActionOrphan + // 404 is ambiguous: the server returns it for both "issue deleted" + // and "daemon token has no access to the workspace" (anti-enumeration, + // see requireDaemonWorkspaceAccess). Fall back to the mtime-gated + // orphan cleanup so a scoped-down token can't instantly wipe dirs + // whose issues are still live. + info, statErr := os.Stat(taskDir) + if statErr != nil { + return gcActionSkip + } + if time.Since(info.ModTime()) > d.cfg.GCOrphanTTL { + d.logger.Info("gc: orphan directory (issue not accessible)", "dir", taskDir, "issue", meta.IssueID) + return gcActionOrphan + } } // API error (network, auth, etc.) — skip and retry next cycle. return gcActionSkip diff --git a/server/internal/daemon/gc_test.go b/server/internal/daemon/gc_test.go index 47c403b08..340c3b2b9 100644 --- a/server/internal/daemon/gc_test.go +++ b/server/internal/daemon/gc_test.go @@ -203,7 +203,7 @@ func TestShouldCleanTaskDir_APIErrorSkipped(t *testing.T) { } } -func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) { +func TestShouldCleanTaskDir_Issue404OldOrphan(t *testing.T) { t.Parallel() issueID := "66666666-6666-6666-6666-666666666666" @@ -214,8 +214,7 @@ func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) { }) d := newGCTestDaemon(t, mux) - // Keep a long OrphanTTL to prove the 404 branch no longer gates on mtime. - d.cfg.GCOrphanTTL = 30 * 24 * time.Hour + d.cfg.GCOrphanTTL = 0 // treat orphans as immediately eligible taskDir := createTaskDir(t, d.cfg.WorkspacesRoot, "ws1", "task8", &execenv.GCMeta{ IssueID: issueID, WorkspaceID: "ws1", @@ -224,7 +223,35 @@ func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) { action := d.shouldCleanTaskDir(context.Background(), taskDir) if action != gcActionOrphan { - t.Fatalf("expected gcActionOrphan for deleted issue, got %d", action) + t.Fatalf("expected gcActionOrphan for unreachable issue past TTL, got %d", action) + } +} + +// TestShouldCleanTaskDir_Issue404RecentSkipped locks in the cross-workspace +// safety: the server returns 404 both for deleted issues and for workspaces +// the daemon token can't see, so a recent 404 must NOT trigger immediate +// cleanup — otherwise a token re-scope could wipe dirs whose issues are live. +func TestShouldCleanTaskDir_Issue404RecentSkipped(t *testing.T) { + t.Parallel() + issueID := "66666666-6666-6666-6666-666666666667" + + mux := http.NewServeMux() + mux.HandleFunc(fmt.Sprintf("/api/daemon/issues/%s/gc-check", issueID), func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + w.Write([]byte(`{"error":"not found"}`)) + }) + + d := newGCTestDaemon(t, mux) + // Default production OrphanTTL; taskDir mtime is now, so it's fresh. + taskDir := createTaskDir(t, d.cfg.WorkspacesRoot, "ws1", "fresh-404", &execenv.GCMeta{ + IssueID: issueID, + WorkspaceID: "ws1", + CompletedAt: time.Now(), + }) + + action := d.shouldCleanTaskDir(context.Background(), taskDir) + if action != gcActionSkip { + t.Fatalf("expected gcActionSkip for recent 404 (cross-workspace safety), got %d", action) } } diff --git a/server/internal/daemon/helpers.go b/server/internal/daemon/helpers.go index 05b6a5f25..2e93ed7ed 100644 --- a/server/internal/daemon/helpers.go +++ b/server/internal/daemon/helpers.go @@ -30,17 +30,28 @@ func durationFromEnv(key string, fallback time.Duration) (time.Duration, error) return d, nil } -// dayUnit matches an integer followed by `d` (days), optionally mixed with -// other Go duration components, e.g. "5d", "1d12h", "2d30m". -var dayUnit = regexp.MustCompile(`(\d+)d`) +// dayUnit matches a decimal number (with optional leading digits) followed by +// `d` (days), so both "5d" and "1.5d" are captured whole and expanded to hours. +var dayUnit = regexp.MustCompile(`(\d*\.\d+|\d+)d`) // parseFlexDuration accepts the standard Go time.ParseDuration syntax plus a -// `d` (day) suffix, which the stdlib rejects. "5d" → 120h, "1d12h" → 36h. +// `d` (day) suffix, which the stdlib rejects. "5d" → 120h, "1d12h" → 36h, +// "0.5d" → 12h. Overflow or malformed numbers propagate as errors. func parseFlexDuration(value string) (time.Duration, error) { + var convErr error expanded := dayUnit.ReplaceAllStringFunc(value, func(match string) string { - n, _ := strconv.Atoi(match[:len(match)-1]) - return fmt.Sprintf("%dh", n*24) + days, err := strconv.ParseFloat(match[:len(match)-1], 64) + if err != nil { + convErr = err + return match + } + // time.ParseDuration handles fractional hours natively, and rejects + // overflow on its own. + return strconv.FormatFloat(days*24, 'f', -1, 64) + "h" }) + if convErr != nil { + return 0, convErr + } return time.ParseDuration(expanded) } diff --git a/server/internal/daemon/helpers_test.go b/server/internal/daemon/helpers_test.go index e5db32416..aad618f5b 100644 --- a/server/internal/daemon/helpers_test.go +++ b/server/internal/daemon/helpers_test.go @@ -15,6 +15,9 @@ func TestParseFlexDuration(t *testing.T) { {"1d", 24 * time.Hour}, {"1d12h", 36 * time.Hour}, {"2d30m", 2*24*time.Hour + 30*time.Minute}, + {"0.5d", 12 * time.Hour}, + {"1.5d", 36 * time.Hour}, + {".5d", 12 * time.Hour}, {"120h", 120 * time.Hour}, {"24h", 24 * time.Hour}, {"30m", 30 * time.Minute}, @@ -33,7 +36,15 @@ func TestParseFlexDuration(t *testing.T) { func TestParseFlexDuration_Invalid(t *testing.T) { t.Parallel() - for _, in := range []string{"", "xyz", "5days", "abc5d"} { + for _, in := range []string{ + "", + "xyz", + "5days", + "abc5d", + // Overflow: 30 digits is well past int64/float64 safe range; must error + // rather than silently produce 0h. + "999999999999999999999999999999d", + } { if _, err := parseFlexDuration(in); err == nil { t.Errorf("parseFlexDuration(%q) expected error, got nil", in) }