diff --git a/server/cmd/multica/cmd_daemon.go b/server/cmd/multica/cmd_daemon.go index 604bb200a..b2af1768a 100644 --- a/server/cmd/multica/cmd_daemon.go +++ b/server/cmd/multica/cmd_daemon.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "net/http" "os" "os/exec" @@ -56,6 +57,18 @@ var daemonLogsCmd = &cobra.Command{ RunE: runDaemonLogs, } +var daemonDiskUsageCmd = &cobra.Command{ + Use: "disk-usage", + Short: "Show daemon workspace disk usage by task or workspace", + Long: "Walks the daemon's workspaces root and reports per-task or per-workspace disk usage.\n" + + "Default view is per-task, sorted by size descending. --by-workspace switches to a per-workspace summary;\n" + + "--top N keeps only the largest N entries.\n\n" + + "Bytes are split into total and the artifact-cleanable subset (node_modules, .next, .turbo by default,\n" + + "overridable via MULTICA_GC_ARTIFACT_PATTERNS) so the report stays in sync with what the GC reclaims.\n" + + "The walk skips .git and never follows symlinks. The daemon does not need to be running.", + RunE: runDaemonDiskUsage, +} + func init() { f := daemonStartCmd.Flags() f.Bool("foreground", false, "Run in the foreground instead of background") @@ -85,11 +98,19 @@ func init() { rf.Duration("codex-semantic-inactivity-timeout", 0, "Codex semantic inactivity timeout (env: MULTICA_CODEX_SEMANTIC_INACTIVITY_TIMEOUT)") rf.Int("max-concurrent-tasks", 0, "Max tasks running in parallel (env: MULTICA_DAEMON_MAX_CONCURRENT_TASKS)") + df := daemonDiskUsageCmd.Flags() + df.Bool("by-workspace", false, "Aggregate output by workspace instead of by task") + df.Bool("by-task", false, "Per-task view (default; mutually exclusive with --by-workspace)") + df.Int("top", 0, "Keep only the largest N entries (across all workspaces)") + df.String("output", "table", "Output format: table or json") + df.String("workspaces-root", "", "Override the workspaces root path (default: same as the daemon)") + daemonCmd.AddCommand(daemonStartCmd) daemonCmd.AddCommand(daemonStopCmd) daemonCmd.AddCommand(daemonRestartCmd) daemonCmd.AddCommand(daemonStatusCmd) daemonCmd.AddCommand(daemonLogsCmd) + daemonCmd.AddCommand(daemonDiskUsageCmd) } // daemonDirForProfile returns the state directory for the given profile. @@ -586,3 +607,179 @@ func flagString(cmd *cobra.Command, name string) string { val, _ := cmd.Flags().GetString(name) return val } + +// --- daemon disk-usage --- + +func runDaemonDiskUsage(cmd *cobra.Command, _ []string) error { + profile := resolveProfile(cmd) + rootOverride, _ := cmd.Flags().GetString("workspaces-root") + byWorkspace, _ := cmd.Flags().GetBool("by-workspace") + byTask, _ := cmd.Flags().GetBool("by-task") + top, _ := cmd.Flags().GetInt("top") + output, _ := cmd.Flags().GetString("output") + + if byWorkspace && byTask { + return fmt.Errorf("--by-workspace and --by-task are mutually exclusive") + } + if top < 0 { + return fmt.Errorf("--top must be a non-negative integer") + } + + workspacesRoot, err := daemon.ResolveWorkspacesRoot(profile, rootOverride) + if err != nil { + return fmt.Errorf("resolve workspaces root: %w", err) + } + + report, err := daemon.ScanDiskUsage(workspacesRoot, daemon.ArtifactPatternsFromEnv()) + if err != nil { + return err + } + + if top > 0 { + if byWorkspace { + if top < len(report.Workspaces) { + report.Workspaces = report.Workspaces[:top] + } + } else if top < len(report.Tasks) { + report.Tasks = report.Tasks[:top] + } + } + + if output == "json" { + return cli.PrintJSON(os.Stdout, report) + } + + if byWorkspace { + printDiskUsageWorkspaceTable(os.Stdout, report) + return nil + } + printDiskUsageTaskTable(os.Stdout, report) + return nil +} + +func printDiskUsageTaskTable(w io.Writer, report daemon.DiskUsageReport) { + fmt.Fprintf(w, "Workspaces root: %s\n", report.WorkspacesRoot) + if report.TotalTaskCount == 0 { + fmt.Fprintln(w, "(no task directories)") + return + } + rows := make([][]string, 0, len(report.Tasks)) + var displayedSize, displayedArtifact int64 + for _, task := range report.Tasks { + displayedSize += task.SizeBytes + displayedArtifact += task.ArtifactSizeBytes + rows = append(rows, []string{ + task.WorkspaceShort + "/" + task.TaskShort, + task.Kind, + emptyDash(task.ParentStatus), + formatAge(task.AgeSeconds), + formatBytes(task.SizeBytes), + formatBytes(task.ArtifactSizeBytes), + }) + } + cli.PrintTable(w, []string{"PATH", "KIND", "STATUS", "AGE", "SIZE", "ARTIFACTS"}, rows) + + if len(report.Tasks) < report.TotalTaskCount { + // Report-wide totals stay anchored to the full scan; the displayed + // row is what the user is currently looking at. Calling these out + // separately keeps `--top N` from misleading at-a-glance triage. + fmt.Fprintf(w, "\nShowing top %d of %d task(s). Displayed: %s (%s artifacts). Scan total: %s (%s artifacts, %.1f%% reclaimable).\n", + len(report.Tasks), report.TotalTaskCount, + formatBytes(displayedSize), formatBytes(displayedArtifact), + formatBytes(report.TotalSizeBytes), formatBytes(report.TotalArtifactSizeBytes), + report.TotalArtifactRatio*100) + return + } + fmt.Fprintf(w, "\nTotal: %s across %d task(s); %s reclaimable as artifacts (%.1f%%).\n", + formatBytes(report.TotalSizeBytes), report.TotalTaskCount, + formatBytes(report.TotalArtifactSizeBytes), report.TotalArtifactRatio*100) +} + +func printDiskUsageWorkspaceTable(w io.Writer, report daemon.DiskUsageReport) { + fmt.Fprintf(w, "Workspaces root: %s\n", report.WorkspacesRoot) + if report.TotalWorkspaceCount == 0 { + fmt.Fprintln(w, "(no workspaces)") + return + } + rows := make([][]string, 0, len(report.Workspaces)) + var displayedSize, displayedArtifact int64 + for _, ws := range report.Workspaces { + displayedSize += ws.SizeBytes + displayedArtifact += ws.ArtifactSizeBytes + rows = append(rows, []string{ + ws.WorkspaceShort, + strconv.Itoa(ws.TaskCount), + formatBytes(ws.SizeBytes), + formatBytes(ws.ArtifactSizeBytes), + formatRatio(ws.ArtifactRatio), + formatAge(ws.OldestAgeSeconds), + }) + } + cli.PrintTable(w, []string{"WORKSPACE", "TASKS", "SIZE", "ARTIFACTS", "ARTIFACT %", "OLDEST"}, rows) + + if len(report.Workspaces) < report.TotalWorkspaceCount { + fmt.Fprintf(w, "\nShowing top %d of %d workspace(s). Displayed: %s (%s artifacts). Scan total: %s (%s artifacts, %.1f%% reclaimable).\n", + len(report.Workspaces), report.TotalWorkspaceCount, + formatBytes(displayedSize), formatBytes(displayedArtifact), + formatBytes(report.TotalSizeBytes), formatBytes(report.TotalArtifactSizeBytes), + report.TotalArtifactRatio*100) + return + } + fmt.Fprintf(w, "\nTotal: %s across %d workspace(s); %s reclaimable as artifacts (%.1f%%).\n", + formatBytes(report.TotalSizeBytes), report.TotalWorkspaceCount, + formatBytes(report.TotalArtifactSizeBytes), report.TotalArtifactRatio*100) +} + +// formatRatio renders a 0..1 fraction as a percentage to one decimal. A +// non-finite or negative input collapses to "0.0%" — total=0 workspaces +// shouldn't surface "NaN%". +func formatRatio(r float64) string { + if r != r || r < 0 { // NaN check via inequality + return "0.0%" + } + return fmt.Sprintf("%.1f%%", r*100) +} + +func emptyDash(s string) string { + if s == "" { + return "-" + } + return s +} + +// formatBytes renders a byte count in IEC units (KiB/MiB/GiB) with one decimal +// place above 1 KiB. Kept intentionally compact so the table view stays +// scannable at terminal widths. +func formatBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + prefix := "KMGTPE"[exp] + return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), prefix) +} + +// formatAge renders an age in the most human-friendly unit that still keeps +// the value above 1. "0s" stands for "less than a second" — matches what the +// GC log lines look like. +func formatAge(seconds int64) string { + if seconds <= 0 { + return "0s" + } + d := time.Duration(seconds) * time.Second + switch { + case d >= 24*time.Hour: + return fmt.Sprintf("%dd %dh", int(d/(24*time.Hour)), int((d%(24*time.Hour))/time.Hour)) + case d >= time.Hour: + return fmt.Sprintf("%dh %dm", int(d/time.Hour), int((d%time.Hour)/time.Minute)) + case d >= time.Minute: + return fmt.Sprintf("%dm %ds", int(d/time.Minute), int((d%time.Minute)/time.Second)) + default: + return fmt.Sprintf("%ds", seconds) + } +} diff --git a/server/internal/daemon/config.go b/server/internal/daemon/config.go index e01df760b..2f04e2e3e 100644 --- a/server/internal/daemon/config.go +++ b/server/internal/daemon/config.go @@ -283,24 +283,9 @@ func LoadConfig(overrides Overrides) (Config, error) { } // Workspaces root: override > env > default (~/multica_workspaces or ~/multica_workspaces_) - workspacesRoot := strings.TrimSpace(os.Getenv("MULTICA_WORKSPACES_ROOT")) - if overrides.WorkspacesRoot != "" { - workspacesRoot = overrides.WorkspacesRoot - } - if workspacesRoot == "" { - home, err := os.UserHomeDir() - if err != nil { - return Config{}, fmt.Errorf("resolve home directory: %w (set MULTICA_WORKSPACES_ROOT to override)", err) - } - if profile != "" { - workspacesRoot = filepath.Join(home, "multica_workspaces_"+profile) - } else { - workspacesRoot = filepath.Join(home, "multica_workspaces") - } - } - workspacesRoot, err = filepath.Abs(workspacesRoot) + workspacesRoot, err := ResolveWorkspacesRoot(profile, overrides.WorkspacesRoot) if err != nil { - return Config{}, fmt.Errorf("resolve absolute workspaces root: %w", err) + return Config{}, err } // Health port: override > default @@ -386,6 +371,43 @@ func NormalizeServerBaseURL(raw string) (string, error) { return strings.TrimRight(u.String(), "/"), nil } +// ResolveWorkspacesRoot returns the absolute path that the daemon and CLI +// should treat as the workspaces root. Resolution order: explicit override > +// MULTICA_WORKSPACES_ROOT env > default ($HOME/multica_workspaces, or +// $HOME/multica_workspaces_ for a named profile). Read-only callers +// (e.g. `multica daemon disk-usage`) use this directly so they pick the same +// directory the running daemon would have picked. +func ResolveWorkspacesRoot(profile, override string) (string, error) { + root := strings.TrimSpace(os.Getenv("MULTICA_WORKSPACES_ROOT")) + if override != "" { + root = override + } + if root == "" { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("resolve home directory: %w (set MULTICA_WORKSPACES_ROOT to override)", err) + } + if profile != "" { + root = filepath.Join(home, "multica_workspaces_"+profile) + } else { + root = filepath.Join(home, "multica_workspaces") + } + } + abs, err := filepath.Abs(root) + if err != nil { + return "", fmt.Errorf("resolve absolute workspaces root: %w", err) + } + return abs, nil +} + +// ArtifactPatternsFromEnv returns the configured artifact patternSet — the +// same list the GC loop consults when it runs the artifact-only cleanup. The +// disk-usage CLI uses this to make sure the "artifact size" it reports +// matches what the GC would actually reclaim. +func ArtifactPatternsFromEnv() []string { + return patternsFromEnv("MULTICA_GC_ARTIFACT_PATTERNS", DefaultGCArtifactPatterns) +} + // patternsFromEnv reads a comma-separated list from env. Patterns containing // path separators are silently dropped — the GC artifact cleanup only matches // directory basenames, never paths, so a pattern like "foo/bar" is meaningless diff --git a/server/internal/daemon/diskusage.go b/server/internal/daemon/diskusage.go new file mode 100644 index 000000000..8d4c9d5e7 --- /dev/null +++ b/server/internal/daemon/diskusage.go @@ -0,0 +1,278 @@ +package daemon + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/multica-ai/multica/server/internal/daemon/execenv" +) + +// TaskDiskUsage describes one task workdir's footprint on disk. +type TaskDiskUsage struct { + WorkspaceID string `json:"workspace_id"` + WorkspaceShort string `json:"workspace_short"` + TaskShort string `json:"task_short"` + Path string `json:"path"` + Kind string `json:"kind"` + ParentStatus string `json:"parent_status"` + AgeSeconds int64 `json:"age_seconds"` + SizeBytes int64 `json:"size_bytes"` + ArtifactSizeBytes int64 `json:"artifact_size_bytes"` +} + +// WorkspaceDiskUsage aggregates per-workspace footprint across all tasks. +// ArtifactRatio is the fraction (0..1) of SizeBytes that the GC artifact +// cleanup could reclaim — kept here so the JSON consumer doesn't have to +// re-derive it (and so the table view can render the column without dividing +// by zero on empty workspaces). +type WorkspaceDiskUsage struct { + WorkspaceID string `json:"workspace_id"` + WorkspaceShort string `json:"workspace_short"` + TaskCount int `json:"task_count"` + SizeBytes int64 `json:"size_bytes"` + ArtifactSizeBytes int64 `json:"artifact_size_bytes"` + ArtifactRatio float64 `json:"artifact_ratio"` + OldestAgeSeconds int64 `json:"oldest_age_seconds"` +} + +// DiskUsageReport is the full result of a single ScanDiskUsage call. Total* +// fields always reflect the entire scan, never the post-`--top` truncated +// view — consumers that need the displayed subtotals can sum the slice. +type DiskUsageReport struct { + WorkspacesRoot string `json:"workspaces_root"` + GeneratedAt time.Time `json:"generated_at"` + ArtifactPatterns []string `json:"artifact_patterns"` + Tasks []TaskDiskUsage `json:"tasks"` + Workspaces []WorkspaceDiskUsage `json:"workspaces"` + TotalTaskCount int `json:"total_task_count"` + TotalWorkspaceCount int `json:"total_workspace_count"` + TotalSizeBytes int64 `json:"total_size_bytes"` + TotalArtifactSizeBytes int64 `json:"total_artifact_size_bytes"` + TotalArtifactRatio float64 `json:"total_artifact_ratio"` +} + +// DiskUsageKindUnknown is the kind reported for task directories whose +// .gc_meta.json is missing or unreadable. Mirrors how the GC orphan path +// treats them — present on disk, but no parent record we can lock onto. +const DiskUsageKindUnknown = "unknown" + +// ScanDiskUsage walks workspacesRoot and returns the disk-usage report. The +// walk is read-only and follows the same safety contract as the GC artifact +// cleaner: it never enters .git, never follows symlinks, and counts only +// regular files. artifactPatterns is filtered through the basename-only check +// used by cleanTaskArtifacts so the reported "artifact" footprint matches the +// bytes the GC would actually reclaim. Missing roots return an empty report +// (not an error) — a daemon that's never run yet has no directory to walk. +func ScanDiskUsage(workspacesRoot string, artifactPatterns []string) (DiskUsageReport, error) { + report := DiskUsageReport{ + WorkspacesRoot: workspacesRoot, + GeneratedAt: time.Now().UTC(), + ArtifactPatterns: nil, + } + if workspacesRoot == "" { + return report, fmt.Errorf("disk-usage: workspaces root is required") + } + + patternSet := buildPatternSet(artifactPatterns) + report.ArtifactPatterns = sortedKeys(patternSet) + + wsEntries, err := os.ReadDir(workspacesRoot) + if err != nil { + if os.IsNotExist(err) { + return report, nil + } + return report, fmt.Errorf("disk-usage: read workspaces root: %w", err) + } + + wsAgg := map[string]*WorkspaceDiskUsage{} + + for _, wsEntry := range wsEntries { + // Skip the bare-repo cache and any non-directory entries; the GC loop + // applies the same exclusions, so the disk-usage report stays in sync + // with what the GC actually walks. + if !wsEntry.IsDir() || wsEntry.Name() == ".repos" { + continue + } + wsID := wsEntry.Name() + wsDir := filepath.Join(workspacesRoot, wsID) + taskEntries, err := os.ReadDir(wsDir) + if err != nil { + continue + } + for _, t := range taskEntries { + if !t.IsDir() { + continue + } + taskDir := filepath.Join(wsDir, t.Name()) + usage := buildTaskUsage(taskDir, wsID, t.Name(), patternSet) + + report.Tasks = append(report.Tasks, usage) + report.TotalSizeBytes += usage.SizeBytes + report.TotalArtifactSizeBytes += usage.ArtifactSizeBytes + + ws, ok := wsAgg[wsID] + if !ok { + ws = &WorkspaceDiskUsage{ + WorkspaceID: wsID, + WorkspaceShort: ShortID(wsID), + } + wsAgg[wsID] = ws + } + ws.TaskCount++ + ws.SizeBytes += usage.SizeBytes + ws.ArtifactSizeBytes += usage.ArtifactSizeBytes + if usage.AgeSeconds > ws.OldestAgeSeconds { + ws.OldestAgeSeconds = usage.AgeSeconds + } + } + } + + sort.Slice(report.Tasks, func(i, j int) bool { + return report.Tasks[i].SizeBytes > report.Tasks[j].SizeBytes + }) + + report.Workspaces = make([]WorkspaceDiskUsage, 0, len(wsAgg)) + for _, ws := range wsAgg { + ws.ArtifactRatio = ratio(ws.ArtifactSizeBytes, ws.SizeBytes) + report.Workspaces = append(report.Workspaces, *ws) + } + sort.Slice(report.Workspaces, func(i, j int) bool { + return report.Workspaces[i].SizeBytes > report.Workspaces[j].SizeBytes + }) + + report.TotalTaskCount = len(report.Tasks) + report.TotalWorkspaceCount = len(report.Workspaces) + report.TotalArtifactRatio = ratio(report.TotalArtifactSizeBytes, report.TotalSizeBytes) + + return report, nil +} + +// ratio returns numerator / denominator, mapping 0/0 (and any 0 denominator) +// to 0 instead of NaN. Callers render the result as a percentage so a NaN +// would surface as "NaN%" in the table — guard at the source. +func ratio(numerator, denominator int64) float64 { + if denominator <= 0 { + return 0 + } + return float64(numerator) / float64(denominator) +} + +func buildPatternSet(patterns []string) map[string]struct{} { + set := make(map[string]struct{}, len(patterns)) + for _, p := range patterns { + p = strings.TrimSpace(p) + if p == "" || strings.ContainsAny(p, "/\\") { + continue + } + set[p] = struct{}{} + } + return set +} + +func sortedKeys(set map[string]struct{}) []string { + out := make([]string, 0, len(set)) + for k := range set { + out = append(out, k) + } + sort.Strings(out) + return out +} + +func buildTaskUsage(taskDir, wsID, taskShort string, patternSet map[string]struct{}) TaskDiskUsage { + usage := TaskDiskUsage{ + WorkspaceID: wsID, + WorkspaceShort: ShortID(wsID), + TaskShort: taskShort, + Path: taskDir, + Kind: DiskUsageKindUnknown, + } + + if meta, err := execenv.ReadGCMeta(taskDir); err == nil && meta != nil { + usage.Kind = string(meta.Kind) + if !meta.CompletedAt.IsZero() { + usage.AgeSeconds = int64(time.Since(meta.CompletedAt).Seconds()) + } + } + // Fall back to mtime when meta is missing or didn't carry a completed_at. + // Matches the orphanByMTime path the GC loop takes for the same case. + if usage.AgeSeconds <= 0 { + if info, err := os.Stat(taskDir); err == nil { + usage.AgeSeconds = int64(time.Since(info.ModTime()).Seconds()) + } + } + + usage.SizeBytes, usage.ArtifactSizeBytes = taskSize(taskDir, patternSet) + return usage +} + +// taskSize walks taskDir and returns (totalBytes, artifactBytes). Both honor +// the GC safety contract: never descends into .git, never follows symlinks, +// counts only regular files. A directory whose basename matches patternSet +// is treated as an artifact subtree — its size is added to both totals and +// the walk does not descend further so the size matches what os.RemoveAll +// would reclaim if the GC ran cleanTaskArtifacts on it. +func taskSize(taskDir string, patternSet map[string]struct{}) (totalBytes int64, artifactBytes int64) { + if taskDir == "" { + return + } + absRoot, err := filepath.Abs(taskDir) + if err != nil { + return + } + + _ = filepath.WalkDir(absRoot, func(path string, entry os.DirEntry, err error) error { + if err != nil { + return nil + } + if path == absRoot { + return nil + } + // Symlinks: never followed, never counted. WalkDir already refuses to + // descend through them, but a symlinked file would otherwise show up + // here as a non-dir entry — drop it explicitly so the size stays + // consistent with cleanTaskArtifacts' refusal to touch link targets. + if entry.Type()&os.ModeSymlink != 0 { + return nil + } + if entry.IsDir() { + if entry.Name() == ".git" { + return filepath.SkipDir + } + if _, ok := patternSet[entry.Name()]; ok { + rel, relErr := filepath.Rel(absRoot, path) + if relErr != nil || rel == "" || rel == "." || strings.HasPrefix(rel, "..") { + return filepath.SkipDir + } + size := dirSize(path) + totalBytes += size + artifactBytes += size + return filepath.SkipDir + } + return nil + } + info, infoErr := entry.Info() + if infoErr != nil { + return nil + } + if info.Mode().IsRegular() { + totalBytes += info.Size() + } + return nil + }) + return +} + +// ShortID returns the first 8 chars (dashes stripped) of a UUID, falling back +// to the raw input when shorter. Mirrors execenv.shortID, which lives in an +// internal subpackage and isn't exported. +func ShortID(id string) string { + s := strings.ReplaceAll(id, "-", "") + if len(s) > 8 { + return s[:8] + } + return s +} diff --git a/server/internal/daemon/diskusage_test.go b/server/internal/daemon/diskusage_test.go new file mode 100644 index 000000000..a9890d556 --- /dev/null +++ b/server/internal/daemon/diskusage_test.go @@ -0,0 +1,358 @@ +package daemon + +import ( + "encoding/json" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/multica-ai/multica/server/internal/daemon/execenv" +) + +func writeFile(t *testing.T, path string, size int) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + buf := make([]byte, size) + for i := range buf { + buf[i] = 'x' + } + if err := os.WriteFile(path, buf, 0o644); err != nil { + t.Fatal(err) + } +} + +// TestScanDiskUsage_AggregatesAndCategorizes verifies the happy-path: each +// task directory is sized, categorized by GC meta kind, and aggregated into +// per-workspace totals matching the per-task totals. +func TestScanDiskUsage_AggregatesAndCategorizes(t *testing.T) { + t.Parallel() + + root := t.TempDir() + wsA := "11111111-1111-1111-1111-111111111111" + wsB := "22222222-2222-2222-2222-222222222222" + + taskA1 := filepath.Join(root, wsA, "aaaaaaaa") + writeFile(t, filepath.Join(taskA1, "workdir/main.go"), 1000) + writeFile(t, filepath.Join(taskA1, "workdir/node_modules/dep/index.js"), 4000) + mustWriteMeta(t, taskA1, execenv.GCMeta{ + Kind: execenv.GCKindIssue, + IssueID: "issue-1", + WorkspaceID: wsA, + CompletedAt: time.Now().Add(-3 * time.Hour), + }) + + taskA2 := filepath.Join(root, wsA, "bbbbbbbb") + writeFile(t, filepath.Join(taskA2, "workdir/notes.md"), 500) + mustWriteMeta(t, taskA2, execenv.GCMeta{ + Kind: execenv.GCKindChat, + ChatSessionID: "chat-1", + WorkspaceID: wsA, + CompletedAt: time.Now().Add(-1 * time.Hour), + }) + + taskB1 := filepath.Join(root, wsB, "cccccccc") + writeFile(t, filepath.Join(taskB1, "workdir/result.txt"), 2000) + // No meta — exercises the unknown-kind / mtime-fallback path. Backdate + // the dir mtime so the fallback produces a measurable age (a freshly + // created dir has mtime=now, which would round to 0 seconds). + backdate := time.Now().Add(-2 * time.Hour) + if err := os.Chtimes(taskB1, backdate, backdate); err != nil { + t.Fatal(err) + } + + report, err := ScanDiskUsage(root, []string{"node_modules", ".next", ".turbo"}) + if err != nil { + t.Fatalf("ScanDiskUsage: %v", err) + } + + if len(report.Tasks) != 3 { + t.Fatalf("expected 3 tasks, got %d", len(report.Tasks)) + } + + byShort := map[string]TaskDiskUsage{} + for _, task := range report.Tasks { + byShort[task.TaskShort] = task + } + + a1 := byShort["aaaaaaaa"] + if a1.Kind != string(execenv.GCKindIssue) { + t.Errorf("task a1 kind = %q, want %q", a1.Kind, execenv.GCKindIssue) + } + // Size includes main.go (1000) + node_modules subtree (4000) + the + // .gc_meta.json control file we wrote. Bound the meta overhead so we + // don't drift if the meta JSON shape changes. + if a1.SizeBytes < 5000 || a1.SizeBytes > 5000+1024 { + t.Errorf("task a1 size = %d, want in [5000, 6024]", a1.SizeBytes) + } + if a1.ArtifactSizeBytes != 4000 { + t.Errorf("task a1 artifact size = %d, want 4000", a1.ArtifactSizeBytes) + } + if a1.AgeSeconds < 60 { + t.Errorf("task a1 age_seconds = %d, want >= 60 (CompletedAt -3h)", a1.AgeSeconds) + } + if a1.WorkspaceShort != ShortID(wsA) { + t.Errorf("task a1 workspace_short = %q, want %q", a1.WorkspaceShort, ShortID(wsA)) + } + + a2 := byShort["bbbbbbbb"] + if a2.Kind != string(execenv.GCKindChat) { + t.Errorf("task a2 kind = %q, want chat", a2.Kind) + } + if a2.SizeBytes < 500 || a2.SizeBytes > 500+1024 { + t.Errorf("task a2 size = %d, want in [500, 1524]", a2.SizeBytes) + } + if a2.ArtifactSizeBytes != 0 { + t.Errorf("task a2 artifact size = %d, want 0", a2.ArtifactSizeBytes) + } + + b1 := byShort["cccccccc"] + if b1.Kind != DiskUsageKindUnknown { + t.Errorf("task b1 kind = %q, want %q", b1.Kind, DiskUsageKindUnknown) + } + if b1.SizeBytes != 2000 { + t.Errorf("task b1 size = %d, want 2000 (no meta file)", b1.SizeBytes) + } + if b1.AgeSeconds < 60 { + t.Errorf("task b1 age_seconds = %d, want >= 60 (mtime backdated 2h)", b1.AgeSeconds) + } + + if report.TotalSizeBytes != a1.SizeBytes+a2.SizeBytes+b1.SizeBytes { + t.Errorf("total size = %d, want sum of per-task sizes (%d)", + report.TotalSizeBytes, a1.SizeBytes+a2.SizeBytes+b1.SizeBytes) + } + if report.TotalArtifactSizeBytes != 4000 { + t.Errorf("total artifact size = %d, want 4000", report.TotalArtifactSizeBytes) + } + + wsByID := map[string]WorkspaceDiskUsage{} + for _, ws := range report.Workspaces { + wsByID[ws.WorkspaceID] = ws + } + if wsByID[wsA].SizeBytes != a1.SizeBytes+a2.SizeBytes { + t.Errorf("workspace A size = %d, want %d (a1+a2)", + wsByID[wsA].SizeBytes, a1.SizeBytes+a2.SizeBytes) + } + if wsByID[wsA].ArtifactSizeBytes != 4000 { + t.Errorf("workspace A artifact size = %d, want 4000", wsByID[wsA].ArtifactSizeBytes) + } + if wsByID[wsA].TaskCount != 2 { + t.Errorf("workspace A task count = %d, want 2", wsByID[wsA].TaskCount) + } + if wsByID[wsB].SizeBytes != 2000 { + t.Errorf("workspace B size = %d, want 2000", wsByID[wsB].SizeBytes) + } + + // Workspace A's artifact ratio: 4000 reclaimable / a1+a2 size. Match + // within float tolerance so a small meta-file delta doesn't break it. + wantARatio := 4000.0 / float64(a1.SizeBytes+a2.SizeBytes) + if got := wsByID[wsA].ArtifactRatio; got < wantARatio-0.005 || got > wantARatio+0.005 { + t.Errorf("workspace A artifact_ratio = %f, want ~%f", got, wantARatio) + } + // Workspace B has no artifact subtree at all → ratio must be 0, not NaN. + if got := wsByID[wsB].ArtifactRatio; got != 0 { + t.Errorf("workspace B artifact_ratio = %f, want 0", got) + } + + // Scan-wide counts must reflect the full scan, not the (un-truncated + // here) slice — they're the contract callers rely on once --top kicks in. + if report.TotalTaskCount != 3 { + t.Errorf("total_task_count = %d, want 3", report.TotalTaskCount) + } + if report.TotalWorkspaceCount != 2 { + t.Errorf("total_workspace_count = %d, want 2", report.TotalWorkspaceCount) + } + if report.TotalArtifactRatio <= 0 || report.TotalArtifactRatio > 1 { + t.Errorf("total_artifact_ratio = %f, want in (0, 1]", report.TotalArtifactRatio) + } + + // Tasks must be sorted by size descending — the consumer treats this as + // a stable contract for `--top N` slicing. + for i := 1; i < len(report.Tasks); i++ { + if report.Tasks[i-1].SizeBytes < report.Tasks[i].SizeBytes { + t.Errorf("tasks not sorted by size desc: %d < %d at idx %d", + report.Tasks[i-1].SizeBytes, report.Tasks[i].SizeBytes, i) + } + } + + // JSON round-trip — guards the field names the issue spec calls out. + raw, err := json.Marshal(report) + if err != nil { + t.Fatalf("marshal report: %v", err) + } + for _, want := range []string{ + `"kind"`, + `"parent_status"`, + `"age_seconds"`, + `"size_bytes"`, + `"artifact_size_bytes"`, + `"workspace_id"`, + `"task_short"`, + `"artifact_ratio"`, + `"total_task_count"`, + `"total_workspace_count"`, + `"total_artifact_ratio"`, + } { + if !strings.Contains(string(raw), want) { + t.Errorf("JSON missing required field %s: %s", want, raw) + } + } +} + +// TestScanDiskUsage_EmptyWorkspaceArtifactRatio guards the total=0 edge: +// a workspace whose tasks have no measurable bytes (or no files at all) must +// still report ArtifactRatio=0, never NaN. The CLI table renders this column, +// and `NaN%` would surface in the user's terminal otherwise. +func TestScanDiskUsage_EmptyWorkspaceArtifactRatio(t *testing.T) { + t.Parallel() + + root := t.TempDir() + wsID := "00000000-0000-0000-0000-000000000000" + taskDir := filepath.Join(root, wsID, "tttttttt") + if err := os.MkdirAll(filepath.Join(taskDir, "workdir"), 0o755); err != nil { + t.Fatal(err) + } + + report, err := ScanDiskUsage(root, []string{"node_modules"}) + if err != nil { + t.Fatalf("ScanDiskUsage: %v", err) + } + if len(report.Workspaces) != 1 { + t.Fatalf("expected 1 workspace, got %d", len(report.Workspaces)) + } + if got := report.Workspaces[0].ArtifactRatio; got != 0 { + t.Errorf("empty workspace artifact_ratio = %f, want 0 (no NaN)", got) + } + if got := report.TotalArtifactRatio; got != 0 { + t.Errorf("empty scan total_artifact_ratio = %f, want 0 (no NaN)", got) + } +} + +// TestScanDiskUsage_DoesNotEnterGit guards the GC safety contract: anything +// inside a .git directory must not be counted, even if it would otherwise +// match an artifact basename. Reflects the same constraint cleanTaskArtifacts +// enforces so the disk-usage report stays in sync with what GC reclaims. +func TestScanDiskUsage_DoesNotEnterGit(t *testing.T) { + t.Parallel() + + root := t.TempDir() + wsID := "wwwwwwww-wwww-wwww-wwww-wwwwwwwwwwww" + taskDir := filepath.Join(root, wsID, "tttttttt") + + writeFile(t, filepath.Join(taskDir, "workdir/.git/objects/pack"), 9999) + writeFile(t, filepath.Join(taskDir, "workdir/.git/node_modules/x"), 5555) + writeFile(t, filepath.Join(taskDir, "workdir/main.go"), 100) + + report, err := ScanDiskUsage(root, []string{"node_modules"}) + if err != nil { + t.Fatalf("ScanDiskUsage: %v", err) + } + + if len(report.Tasks) != 1 { + t.Fatalf("expected 1 task, got %d", len(report.Tasks)) + } + got := report.Tasks[0] + if got.SizeBytes != 100 { + t.Errorf("size_bytes = %d, want 100 (only main.go; .git tree skipped)", got.SizeBytes) + } + if got.ArtifactSizeBytes != 0 { + t.Errorf("artifact_size_bytes = %d, want 0 (node_modules under .git is invisible)", got.ArtifactSizeBytes) + } +} + +// TestScanDiskUsage_DoesNotFollowSymlinks guards the second safety +// constraint. A symlinked artifact directory must not be sized — neither +// the link itself nor its target — because cleanTaskArtifacts won't reclaim +// it either. +func TestScanDiskUsage_DoesNotFollowSymlinks(t *testing.T) { + t.Parallel() + if runtime.GOOS == "windows" { + t.Skip("symlink semantics differ on windows") + } + + root := t.TempDir() + outside := t.TempDir() + writeFile(t, filepath.Join(outside, "huge.bin"), 10000) + + wsID := "ssssssss-ssss-ssss-ssss-ssssssssssss" + taskDir := filepath.Join(root, wsID, "tttttttt") + writeFile(t, filepath.Join(taskDir, "workdir/main.go"), 100) + if err := os.Symlink(outside, filepath.Join(taskDir, "workdir/node_modules")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + // Symlinked regular file too — the link's target lives outside taskDir + // and must not be summed. + if err := os.Symlink(filepath.Join(outside, "huge.bin"), filepath.Join(taskDir, "workdir/big-link")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + report, err := ScanDiskUsage(root, []string{"node_modules"}) + if err != nil { + t.Fatalf("ScanDiskUsage: %v", err) + } + + if len(report.Tasks) != 1 { + t.Fatalf("expected 1 task, got %d", len(report.Tasks)) + } + got := report.Tasks[0] + if got.SizeBytes != 100 { + t.Errorf("size_bytes = %d, want 100 (only main.go; symlinks ignored)", got.SizeBytes) + } + if got.ArtifactSizeBytes != 0 { + t.Errorf("artifact_size_bytes = %d, want 0 (symlinked node_modules ignored)", got.ArtifactSizeBytes) + } +} + +// TestScanDiskUsage_MissingRoot ensures a daemon that has never run yet +// (workspaces dir doesn't exist) returns an empty report, not an error. +func TestScanDiskUsage_MissingRoot(t *testing.T) { + t.Parallel() + report, err := ScanDiskUsage(filepath.Join(t.TempDir(), "does-not-exist"), nil) + if err != nil { + t.Fatalf("ScanDiskUsage on missing root returned error: %v", err) + } + if len(report.Tasks) != 0 || len(report.Workspaces) != 0 { + t.Errorf("expected empty report, got %+v", report) + } +} + +// TestScanDiskUsage_RejectsPatternsWithSeparators mirrors the GC safety check: +// a pattern containing "/" or "\\" is meaningless for basename matching and +// must be silently dropped, not interpreted as a path. +func TestScanDiskUsage_RejectsPatternsWithSeparators(t *testing.T) { + t.Parallel() + + root := t.TempDir() + wsID := "rrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr" + taskDir := filepath.Join(root, wsID, "tttttttt") + writeFile(t, filepath.Join(taskDir, "workdir/node_modules/x"), 1000) + + report, err := ScanDiskUsage(root, []string{"workdir/node_modules", "../etc"}) + if err != nil { + t.Fatalf("ScanDiskUsage: %v", err) + } + if got := report.Tasks[0].ArtifactSizeBytes; got != 0 { + t.Errorf("artifact_size_bytes = %d, want 0 (separator-bearing patterns dropped)", got) + } + if got := report.ArtifactPatterns; len(got) != 0 { + t.Errorf("ArtifactPatterns = %v, want empty (all dropped)", got) + } +} + +func mustWriteMeta(t *testing.T, taskDir string, meta execenv.GCMeta) { + t.Helper() + data, err := json.Marshal(meta) + if err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(taskDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(taskDir, ".gc_meta.json"), data, 0o644); err != nil { + t.Fatal(err) + } +}