fix(daemon/gc): address review — 404 safety + decimal/overflow in duration parser

Two issues flagged in PR review:

1. 404-immediate-clean is unsafe. The /gc-check endpoint returns 404 for
   both "issue deleted" AND "daemon token has no access to the workspace"
   (anti-enumeration, see requireDaemonWorkspaceAccess). Clean-on-404
   would let a scoped-down daemon token wipe taskDirs whose issues are
   still live. Restore the mtime gate against GCOrphanTTL. With the new
   72h default we still shrink the original 30d window dramatically
   without the cross-workspace hazard. Lock the behavior in with a new
   test that asserts a recent 404 is skipped.

2. parseFlexDuration mishandled decimals and swallowed Atoi errors:
   "0.5d" → 7m12s (regex matched only the "5d"), "1.5d" → 1h7m12s,
   and 20+ digit day values Atoi-errored silently to 0. Match the full
   decimal number with `\d*\.\d+|\d+` and parse with ParseFloat so
   fractional days and oversized inputs both go through
   time.ParseDuration correctly — fractions as sub-hour durations,
   overflow as a returned error.
This commit is contained in:
Jiang Bohan
2026-04-23 17:25:33 +08:00
parent 74a43f5bec
commit cb70fbe796
4 changed files with 73 additions and 15 deletions

View File

@@ -140,10 +140,19 @@ func (d *Daemon) shouldCleanTaskDir(ctx context.Context, taskDir string) gcActio
if err != nil {
var reqErr *requestError
if errors.As(err, &reqErr) && reqErr.StatusCode == http.StatusNotFound {
// Issue deleted — clean immediately. The issue row is gone from the
// server, so there's nothing left to protect with a grace period.
d.logger.Info("gc: orphan directory (issue deleted)", "dir", taskDir, "issue", meta.IssueID)
return gcActionOrphan
// 404 is ambiguous: the server returns it for both "issue deleted"
// and "daemon token has no access to the workspace" (anti-enumeration,
// see requireDaemonWorkspaceAccess). Fall back to the mtime-gated
// orphan cleanup so a scoped-down token can't instantly wipe dirs
// whose issues are still live.
info, statErr := os.Stat(taskDir)
if statErr != nil {
return gcActionSkip
}
if time.Since(info.ModTime()) > d.cfg.GCOrphanTTL {
d.logger.Info("gc: orphan directory (issue not accessible)", "dir", taskDir, "issue", meta.IssueID)
return gcActionOrphan
}
}
// API error (network, auth, etc.) — skip and retry next cycle.
return gcActionSkip

View File

@@ -203,7 +203,7 @@ func TestShouldCleanTaskDir_APIErrorSkipped(t *testing.T) {
}
}
func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) {
func TestShouldCleanTaskDir_Issue404OldOrphan(t *testing.T) {
t.Parallel()
issueID := "66666666-6666-6666-6666-666666666666"
@@ -214,8 +214,7 @@ func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) {
})
d := newGCTestDaemon(t, mux)
// Keep a long OrphanTTL to prove the 404 branch no longer gates on mtime.
d.cfg.GCOrphanTTL = 30 * 24 * time.Hour
d.cfg.GCOrphanTTL = 0 // treat orphans as immediately eligible
taskDir := createTaskDir(t, d.cfg.WorkspacesRoot, "ws1", "task8", &execenv.GCMeta{
IssueID: issueID,
WorkspaceID: "ws1",
@@ -224,7 +223,35 @@ func TestShouldCleanTaskDir_Issue404CleansImmediately(t *testing.T) {
action := d.shouldCleanTaskDir(context.Background(), taskDir)
if action != gcActionOrphan {
t.Fatalf("expected gcActionOrphan for deleted issue, got %d", action)
t.Fatalf("expected gcActionOrphan for unreachable issue past TTL, got %d", action)
}
}
// TestShouldCleanTaskDir_Issue404RecentSkipped locks in the cross-workspace
// safety: the server returns 404 both for deleted issues and for workspaces
// the daemon token can't see, so a recent 404 must NOT trigger immediate
// cleanup — otherwise a token re-scope could wipe dirs whose issues are live.
func TestShouldCleanTaskDir_Issue404RecentSkipped(t *testing.T) {
t.Parallel()
issueID := "66666666-6666-6666-6666-666666666667"
mux := http.NewServeMux()
mux.HandleFunc(fmt.Sprintf("/api/daemon/issues/%s/gc-check", issueID), func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte(`{"error":"not found"}`))
})
d := newGCTestDaemon(t, mux)
// Default production OrphanTTL; taskDir mtime is now, so it's fresh.
taskDir := createTaskDir(t, d.cfg.WorkspacesRoot, "ws1", "fresh-404", &execenv.GCMeta{
IssueID: issueID,
WorkspaceID: "ws1",
CompletedAt: time.Now(),
})
action := d.shouldCleanTaskDir(context.Background(), taskDir)
if action != gcActionSkip {
t.Fatalf("expected gcActionSkip for recent 404 (cross-workspace safety), got %d", action)
}
}

View File

@@ -30,17 +30,28 @@ func durationFromEnv(key string, fallback time.Duration) (time.Duration, error)
return d, nil
}
// dayUnit matches an integer followed by `d` (days), optionally mixed with
// other Go duration components, e.g. "5d", "1d12h", "2d30m".
var dayUnit = regexp.MustCompile(`(\d+)d`)
// dayUnit matches a decimal number (with optional leading digits) followed by
// `d` (days), so both "5d" and "1.5d" are captured whole and expanded to hours.
var dayUnit = regexp.MustCompile(`(\d*\.\d+|\d+)d`)
// parseFlexDuration accepts the standard Go time.ParseDuration syntax plus a
// `d` (day) suffix, which the stdlib rejects. "5d" → 120h, "1d12h" → 36h.
// `d` (day) suffix, which the stdlib rejects. "5d" → 120h, "1d12h" → 36h,
// "0.5d" → 12h. Overflow or malformed numbers propagate as errors.
func parseFlexDuration(value string) (time.Duration, error) {
var convErr error
expanded := dayUnit.ReplaceAllStringFunc(value, func(match string) string {
n, _ := strconv.Atoi(match[:len(match)-1])
return fmt.Sprintf("%dh", n*24)
days, err := strconv.ParseFloat(match[:len(match)-1], 64)
if err != nil {
convErr = err
return match
}
// time.ParseDuration handles fractional hours natively, and rejects
// overflow on its own.
return strconv.FormatFloat(days*24, 'f', -1, 64) + "h"
})
if convErr != nil {
return 0, convErr
}
return time.ParseDuration(expanded)
}

View File

@@ -15,6 +15,9 @@ func TestParseFlexDuration(t *testing.T) {
{"1d", 24 * time.Hour},
{"1d12h", 36 * time.Hour},
{"2d30m", 2*24*time.Hour + 30*time.Minute},
{"0.5d", 12 * time.Hour},
{"1.5d", 36 * time.Hour},
{".5d", 12 * time.Hour},
{"120h", 120 * time.Hour},
{"24h", 24 * time.Hour},
{"30m", 30 * time.Minute},
@@ -33,7 +36,15 @@ func TestParseFlexDuration(t *testing.T) {
func TestParseFlexDuration_Invalid(t *testing.T) {
t.Parallel()
for _, in := range []string{"", "xyz", "5days", "abc5d"} {
for _, in := range []string{
"",
"xyz",
"5days",
"abc5d",
// Overflow: 30 digits is well past int64/float64 safe range; must error
// rather than silently produce 0h.
"999999999999999999999999999999d",
} {
if _, err := parseFlexDuration(in); err == nil {
t.Errorf("parseFlexDuration(%q) expected error, got nil", in)
}