feat(daemon): add disk-usage CLI to surface per-task / per-workspace footprint (#2267)

* feat(daemon): add disk-usage CLI to surface per-task / per-workspace footprint

Adds `multica daemon disk-usage [--by-workspace] [--by-task] [--top N]
[--output json]`, walking the workspaces root to report task and workspace
disk consumption without requiring a running daemon. Sizing reuses the GC
artifact patternSet (basename-only) so the reported "artifact" footprint
matches what `cleanTaskArtifacts` would actually reclaim, and the walk
honors the same safety contract: never enters .git, never follows symlinks,
counts only regular files.

Refactors WorkspacesRoot resolution into an exported `ResolveWorkspacesRoot`
so the read-only CLI picks the same root the running daemon would have.

Co-authored-by: multica-agent <github@multica.ai>

* fix(daemon): distinguish displayed totals from scan totals; add workspace artifact ratio

- Track scan-wide TotalTaskCount / TotalWorkspaceCount on the report so
  `--top N` no longer leaves the table footer claiming the truncated row
  count is the full count. The CLI now prints a "Showing top N of M …
  Displayed: X. Scan total: Y" line whenever truncation happens, and keeps
  the bare "Total: …" footer for the un-truncated case.
- Add ArtifactRatio (0..1) on WorkspaceDiskUsage and TotalArtifactRatio on
  the report. The workspace table renders an `ARTIFACT %` column. ratio()
  guards size=0 so empty workspaces report 0% instead of NaN%.

Co-authored-by: multica-agent <github@multica.ai>

---------

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
Bohan Jiang
2026-05-08 17:14:52 +08:00
committed by GitHub
parent fe8326fa0c
commit 61ce8a8090
4 changed files with 872 additions and 17 deletions

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/exec"
@@ -56,6 +57,18 @@ var daemonLogsCmd = &cobra.Command{
RunE: runDaemonLogs,
}
var daemonDiskUsageCmd = &cobra.Command{
Use: "disk-usage",
Short: "Show daemon workspace disk usage by task or workspace",
Long: "Walks the daemon's workspaces root and reports per-task or per-workspace disk usage.\n" +
"Default view is per-task, sorted by size descending. --by-workspace switches to a per-workspace summary;\n" +
"--top N keeps only the largest N entries.\n\n" +
"Bytes are split into total and the artifact-cleanable subset (node_modules, .next, .turbo by default,\n" +
"overridable via MULTICA_GC_ARTIFACT_PATTERNS) so the report stays in sync with what the GC reclaims.\n" +
"The walk skips .git and never follows symlinks. The daemon does not need to be running.",
RunE: runDaemonDiskUsage,
}
func init() {
f := daemonStartCmd.Flags()
f.Bool("foreground", false, "Run in the foreground instead of background")
@@ -85,11 +98,19 @@ func init() {
rf.Duration("codex-semantic-inactivity-timeout", 0, "Codex semantic inactivity timeout (env: MULTICA_CODEX_SEMANTIC_INACTIVITY_TIMEOUT)")
rf.Int("max-concurrent-tasks", 0, "Max tasks running in parallel (env: MULTICA_DAEMON_MAX_CONCURRENT_TASKS)")
df := daemonDiskUsageCmd.Flags()
df.Bool("by-workspace", false, "Aggregate output by workspace instead of by task")
df.Bool("by-task", false, "Per-task view (default; mutually exclusive with --by-workspace)")
df.Int("top", 0, "Keep only the largest N entries (across all workspaces)")
df.String("output", "table", "Output format: table or json")
df.String("workspaces-root", "", "Override the workspaces root path (default: same as the daemon)")
daemonCmd.AddCommand(daemonStartCmd)
daemonCmd.AddCommand(daemonStopCmd)
daemonCmd.AddCommand(daemonRestartCmd)
daemonCmd.AddCommand(daemonStatusCmd)
daemonCmd.AddCommand(daemonLogsCmd)
daemonCmd.AddCommand(daemonDiskUsageCmd)
}
// daemonDirForProfile returns the state directory for the given profile.
@@ -586,3 +607,179 @@ func flagString(cmd *cobra.Command, name string) string {
val, _ := cmd.Flags().GetString(name)
return val
}
// --- daemon disk-usage ---
func runDaemonDiskUsage(cmd *cobra.Command, _ []string) error {
profile := resolveProfile(cmd)
rootOverride, _ := cmd.Flags().GetString("workspaces-root")
byWorkspace, _ := cmd.Flags().GetBool("by-workspace")
byTask, _ := cmd.Flags().GetBool("by-task")
top, _ := cmd.Flags().GetInt("top")
output, _ := cmd.Flags().GetString("output")
if byWorkspace && byTask {
return fmt.Errorf("--by-workspace and --by-task are mutually exclusive")
}
if top < 0 {
return fmt.Errorf("--top must be a non-negative integer")
}
workspacesRoot, err := daemon.ResolveWorkspacesRoot(profile, rootOverride)
if err != nil {
return fmt.Errorf("resolve workspaces root: %w", err)
}
report, err := daemon.ScanDiskUsage(workspacesRoot, daemon.ArtifactPatternsFromEnv())
if err != nil {
return err
}
if top > 0 {
if byWorkspace {
if top < len(report.Workspaces) {
report.Workspaces = report.Workspaces[:top]
}
} else if top < len(report.Tasks) {
report.Tasks = report.Tasks[:top]
}
}
if output == "json" {
return cli.PrintJSON(os.Stdout, report)
}
if byWorkspace {
printDiskUsageWorkspaceTable(os.Stdout, report)
return nil
}
printDiskUsageTaskTable(os.Stdout, report)
return nil
}
func printDiskUsageTaskTable(w io.Writer, report daemon.DiskUsageReport) {
fmt.Fprintf(w, "Workspaces root: %s\n", report.WorkspacesRoot)
if report.TotalTaskCount == 0 {
fmt.Fprintln(w, "(no task directories)")
return
}
rows := make([][]string, 0, len(report.Tasks))
var displayedSize, displayedArtifact int64
for _, task := range report.Tasks {
displayedSize += task.SizeBytes
displayedArtifact += task.ArtifactSizeBytes
rows = append(rows, []string{
task.WorkspaceShort + "/" + task.TaskShort,
task.Kind,
emptyDash(task.ParentStatus),
formatAge(task.AgeSeconds),
formatBytes(task.SizeBytes),
formatBytes(task.ArtifactSizeBytes),
})
}
cli.PrintTable(w, []string{"PATH", "KIND", "STATUS", "AGE", "SIZE", "ARTIFACTS"}, rows)
if len(report.Tasks) < report.TotalTaskCount {
// Report-wide totals stay anchored to the full scan; the displayed
// row is what the user is currently looking at. Calling these out
// separately keeps `--top N` from misleading at-a-glance triage.
fmt.Fprintf(w, "\nShowing top %d of %d task(s). Displayed: %s (%s artifacts). Scan total: %s (%s artifacts, %.1f%% reclaimable).\n",
len(report.Tasks), report.TotalTaskCount,
formatBytes(displayedSize), formatBytes(displayedArtifact),
formatBytes(report.TotalSizeBytes), formatBytes(report.TotalArtifactSizeBytes),
report.TotalArtifactRatio*100)
return
}
fmt.Fprintf(w, "\nTotal: %s across %d task(s); %s reclaimable as artifacts (%.1f%%).\n",
formatBytes(report.TotalSizeBytes), report.TotalTaskCount,
formatBytes(report.TotalArtifactSizeBytes), report.TotalArtifactRatio*100)
}
func printDiskUsageWorkspaceTable(w io.Writer, report daemon.DiskUsageReport) {
fmt.Fprintf(w, "Workspaces root: %s\n", report.WorkspacesRoot)
if report.TotalWorkspaceCount == 0 {
fmt.Fprintln(w, "(no workspaces)")
return
}
rows := make([][]string, 0, len(report.Workspaces))
var displayedSize, displayedArtifact int64
for _, ws := range report.Workspaces {
displayedSize += ws.SizeBytes
displayedArtifact += ws.ArtifactSizeBytes
rows = append(rows, []string{
ws.WorkspaceShort,
strconv.Itoa(ws.TaskCount),
formatBytes(ws.SizeBytes),
formatBytes(ws.ArtifactSizeBytes),
formatRatio(ws.ArtifactRatio),
formatAge(ws.OldestAgeSeconds),
})
}
cli.PrintTable(w, []string{"WORKSPACE", "TASKS", "SIZE", "ARTIFACTS", "ARTIFACT %", "OLDEST"}, rows)
if len(report.Workspaces) < report.TotalWorkspaceCount {
fmt.Fprintf(w, "\nShowing top %d of %d workspace(s). Displayed: %s (%s artifacts). Scan total: %s (%s artifacts, %.1f%% reclaimable).\n",
len(report.Workspaces), report.TotalWorkspaceCount,
formatBytes(displayedSize), formatBytes(displayedArtifact),
formatBytes(report.TotalSizeBytes), formatBytes(report.TotalArtifactSizeBytes),
report.TotalArtifactRatio*100)
return
}
fmt.Fprintf(w, "\nTotal: %s across %d workspace(s); %s reclaimable as artifacts (%.1f%%).\n",
formatBytes(report.TotalSizeBytes), report.TotalWorkspaceCount,
formatBytes(report.TotalArtifactSizeBytes), report.TotalArtifactRatio*100)
}
// formatRatio renders a 0..1 fraction as a percentage to one decimal. A
// non-finite or negative input collapses to "0.0%" — total=0 workspaces
// shouldn't surface "NaN%".
func formatRatio(r float64) string {
if r != r || r < 0 { // NaN check via inequality
return "0.0%"
}
return fmt.Sprintf("%.1f%%", r*100)
}
func emptyDash(s string) string {
if s == "" {
return "-"
}
return s
}
// formatBytes renders a byte count in IEC units (KiB/MiB/GiB) with one decimal
// place above 1 KiB. Kept intentionally compact so the table view stays
// scannable at terminal widths.
func formatBytes(b int64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%d B", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
prefix := "KMGTPE"[exp]
return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), prefix)
}
// formatAge renders an age in the most human-friendly unit that still keeps
// the value above 1. "0s" stands for "less than a second" — matches what the
// GC log lines look like.
func formatAge(seconds int64) string {
if seconds <= 0 {
return "0s"
}
d := time.Duration(seconds) * time.Second
switch {
case d >= 24*time.Hour:
return fmt.Sprintf("%dd %dh", int(d/(24*time.Hour)), int((d%(24*time.Hour))/time.Hour))
case d >= time.Hour:
return fmt.Sprintf("%dh %dm", int(d/time.Hour), int((d%time.Hour)/time.Minute))
case d >= time.Minute:
return fmt.Sprintf("%dm %ds", int(d/time.Minute), int((d%time.Minute)/time.Second))
default:
return fmt.Sprintf("%ds", seconds)
}
}

View File

@@ -283,24 +283,9 @@ func LoadConfig(overrides Overrides) (Config, error) {
}
// Workspaces root: override > env > default (~/multica_workspaces or ~/multica_workspaces_<profile>)
workspacesRoot := strings.TrimSpace(os.Getenv("MULTICA_WORKSPACES_ROOT"))
if overrides.WorkspacesRoot != "" {
workspacesRoot = overrides.WorkspacesRoot
}
if workspacesRoot == "" {
home, err := os.UserHomeDir()
if err != nil {
return Config{}, fmt.Errorf("resolve home directory: %w (set MULTICA_WORKSPACES_ROOT to override)", err)
}
if profile != "" {
workspacesRoot = filepath.Join(home, "multica_workspaces_"+profile)
} else {
workspacesRoot = filepath.Join(home, "multica_workspaces")
}
}
workspacesRoot, err = filepath.Abs(workspacesRoot)
workspacesRoot, err := ResolveWorkspacesRoot(profile, overrides.WorkspacesRoot)
if err != nil {
return Config{}, fmt.Errorf("resolve absolute workspaces root: %w", err)
return Config{}, err
}
// Health port: override > default
@@ -386,6 +371,43 @@ func NormalizeServerBaseURL(raw string) (string, error) {
return strings.TrimRight(u.String(), "/"), nil
}
// ResolveWorkspacesRoot returns the absolute path that the daemon and CLI
// should treat as the workspaces root. Resolution order: explicit override >
// MULTICA_WORKSPACES_ROOT env > default ($HOME/multica_workspaces, or
// $HOME/multica_workspaces_<profile> for a named profile). Read-only callers
// (e.g. `multica daemon disk-usage`) use this directly so they pick the same
// directory the running daemon would have picked.
func ResolveWorkspacesRoot(profile, override string) (string, error) {
root := strings.TrimSpace(os.Getenv("MULTICA_WORKSPACES_ROOT"))
if override != "" {
root = override
}
if root == "" {
home, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("resolve home directory: %w (set MULTICA_WORKSPACES_ROOT to override)", err)
}
if profile != "" {
root = filepath.Join(home, "multica_workspaces_"+profile)
} else {
root = filepath.Join(home, "multica_workspaces")
}
}
abs, err := filepath.Abs(root)
if err != nil {
return "", fmt.Errorf("resolve absolute workspaces root: %w", err)
}
return abs, nil
}
// ArtifactPatternsFromEnv returns the configured artifact patternSet — the
// same list the GC loop consults when it runs the artifact-only cleanup. The
// disk-usage CLI uses this to make sure the "artifact size" it reports
// matches what the GC would actually reclaim.
func ArtifactPatternsFromEnv() []string {
return patternsFromEnv("MULTICA_GC_ARTIFACT_PATTERNS", DefaultGCArtifactPatterns)
}
// patternsFromEnv reads a comma-separated list from env. Patterns containing
// path separators are silently dropped — the GC artifact cleanup only matches
// directory basenames, never paths, so a pattern like "foo/bar" is meaningless

View File

@@ -0,0 +1,278 @@
package daemon
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/multica-ai/multica/server/internal/daemon/execenv"
)
// TaskDiskUsage describes one task workdir's footprint on disk.
type TaskDiskUsage struct {
WorkspaceID string `json:"workspace_id"`
WorkspaceShort string `json:"workspace_short"`
TaskShort string `json:"task_short"`
Path string `json:"path"`
Kind string `json:"kind"`
ParentStatus string `json:"parent_status"`
AgeSeconds int64 `json:"age_seconds"`
SizeBytes int64 `json:"size_bytes"`
ArtifactSizeBytes int64 `json:"artifact_size_bytes"`
}
// WorkspaceDiskUsage aggregates per-workspace footprint across all tasks.
// ArtifactRatio is the fraction (0..1) of SizeBytes that the GC artifact
// cleanup could reclaim — kept here so the JSON consumer doesn't have to
// re-derive it (and so the table view can render the column without dividing
// by zero on empty workspaces).
type WorkspaceDiskUsage struct {
WorkspaceID string `json:"workspace_id"`
WorkspaceShort string `json:"workspace_short"`
TaskCount int `json:"task_count"`
SizeBytes int64 `json:"size_bytes"`
ArtifactSizeBytes int64 `json:"artifact_size_bytes"`
ArtifactRatio float64 `json:"artifact_ratio"`
OldestAgeSeconds int64 `json:"oldest_age_seconds"`
}
// DiskUsageReport is the full result of a single ScanDiskUsage call. Total*
// fields always reflect the entire scan, never the post-`--top` truncated
// view — consumers that need the displayed subtotals can sum the slice.
type DiskUsageReport struct {
WorkspacesRoot string `json:"workspaces_root"`
GeneratedAt time.Time `json:"generated_at"`
ArtifactPatterns []string `json:"artifact_patterns"`
Tasks []TaskDiskUsage `json:"tasks"`
Workspaces []WorkspaceDiskUsage `json:"workspaces"`
TotalTaskCount int `json:"total_task_count"`
TotalWorkspaceCount int `json:"total_workspace_count"`
TotalSizeBytes int64 `json:"total_size_bytes"`
TotalArtifactSizeBytes int64 `json:"total_artifact_size_bytes"`
TotalArtifactRatio float64 `json:"total_artifact_ratio"`
}
// DiskUsageKindUnknown is the kind reported for task directories whose
// .gc_meta.json is missing or unreadable. Mirrors how the GC orphan path
// treats them — present on disk, but no parent record we can lock onto.
const DiskUsageKindUnknown = "unknown"
// ScanDiskUsage walks workspacesRoot and returns the disk-usage report. The
// walk is read-only and follows the same safety contract as the GC artifact
// cleaner: it never enters .git, never follows symlinks, and counts only
// regular files. artifactPatterns is filtered through the basename-only check
// used by cleanTaskArtifacts so the reported "artifact" footprint matches the
// bytes the GC would actually reclaim. Missing roots return an empty report
// (not an error) — a daemon that's never run yet has no directory to walk.
func ScanDiskUsage(workspacesRoot string, artifactPatterns []string) (DiskUsageReport, error) {
report := DiskUsageReport{
WorkspacesRoot: workspacesRoot,
GeneratedAt: time.Now().UTC(),
ArtifactPatterns: nil,
}
if workspacesRoot == "" {
return report, fmt.Errorf("disk-usage: workspaces root is required")
}
patternSet := buildPatternSet(artifactPatterns)
report.ArtifactPatterns = sortedKeys(patternSet)
wsEntries, err := os.ReadDir(workspacesRoot)
if err != nil {
if os.IsNotExist(err) {
return report, nil
}
return report, fmt.Errorf("disk-usage: read workspaces root: %w", err)
}
wsAgg := map[string]*WorkspaceDiskUsage{}
for _, wsEntry := range wsEntries {
// Skip the bare-repo cache and any non-directory entries; the GC loop
// applies the same exclusions, so the disk-usage report stays in sync
// with what the GC actually walks.
if !wsEntry.IsDir() || wsEntry.Name() == ".repos" {
continue
}
wsID := wsEntry.Name()
wsDir := filepath.Join(workspacesRoot, wsID)
taskEntries, err := os.ReadDir(wsDir)
if err != nil {
continue
}
for _, t := range taskEntries {
if !t.IsDir() {
continue
}
taskDir := filepath.Join(wsDir, t.Name())
usage := buildTaskUsage(taskDir, wsID, t.Name(), patternSet)
report.Tasks = append(report.Tasks, usage)
report.TotalSizeBytes += usage.SizeBytes
report.TotalArtifactSizeBytes += usage.ArtifactSizeBytes
ws, ok := wsAgg[wsID]
if !ok {
ws = &WorkspaceDiskUsage{
WorkspaceID: wsID,
WorkspaceShort: ShortID(wsID),
}
wsAgg[wsID] = ws
}
ws.TaskCount++
ws.SizeBytes += usage.SizeBytes
ws.ArtifactSizeBytes += usage.ArtifactSizeBytes
if usage.AgeSeconds > ws.OldestAgeSeconds {
ws.OldestAgeSeconds = usage.AgeSeconds
}
}
}
sort.Slice(report.Tasks, func(i, j int) bool {
return report.Tasks[i].SizeBytes > report.Tasks[j].SizeBytes
})
report.Workspaces = make([]WorkspaceDiskUsage, 0, len(wsAgg))
for _, ws := range wsAgg {
ws.ArtifactRatio = ratio(ws.ArtifactSizeBytes, ws.SizeBytes)
report.Workspaces = append(report.Workspaces, *ws)
}
sort.Slice(report.Workspaces, func(i, j int) bool {
return report.Workspaces[i].SizeBytes > report.Workspaces[j].SizeBytes
})
report.TotalTaskCount = len(report.Tasks)
report.TotalWorkspaceCount = len(report.Workspaces)
report.TotalArtifactRatio = ratio(report.TotalArtifactSizeBytes, report.TotalSizeBytes)
return report, nil
}
// ratio returns numerator / denominator, mapping 0/0 (and any 0 denominator)
// to 0 instead of NaN. Callers render the result as a percentage so a NaN
// would surface as "NaN%" in the table — guard at the source.
func ratio(numerator, denominator int64) float64 {
if denominator <= 0 {
return 0
}
return float64(numerator) / float64(denominator)
}
func buildPatternSet(patterns []string) map[string]struct{} {
set := make(map[string]struct{}, len(patterns))
for _, p := range patterns {
p = strings.TrimSpace(p)
if p == "" || strings.ContainsAny(p, "/\\") {
continue
}
set[p] = struct{}{}
}
return set
}
func sortedKeys(set map[string]struct{}) []string {
out := make([]string, 0, len(set))
for k := range set {
out = append(out, k)
}
sort.Strings(out)
return out
}
func buildTaskUsage(taskDir, wsID, taskShort string, patternSet map[string]struct{}) TaskDiskUsage {
usage := TaskDiskUsage{
WorkspaceID: wsID,
WorkspaceShort: ShortID(wsID),
TaskShort: taskShort,
Path: taskDir,
Kind: DiskUsageKindUnknown,
}
if meta, err := execenv.ReadGCMeta(taskDir); err == nil && meta != nil {
usage.Kind = string(meta.Kind)
if !meta.CompletedAt.IsZero() {
usage.AgeSeconds = int64(time.Since(meta.CompletedAt).Seconds())
}
}
// Fall back to mtime when meta is missing or didn't carry a completed_at.
// Matches the orphanByMTime path the GC loop takes for the same case.
if usage.AgeSeconds <= 0 {
if info, err := os.Stat(taskDir); err == nil {
usage.AgeSeconds = int64(time.Since(info.ModTime()).Seconds())
}
}
usage.SizeBytes, usage.ArtifactSizeBytes = taskSize(taskDir, patternSet)
return usage
}
// taskSize walks taskDir and returns (totalBytes, artifactBytes). Both honor
// the GC safety contract: never descends into .git, never follows symlinks,
// counts only regular files. A directory whose basename matches patternSet
// is treated as an artifact subtree — its size is added to both totals and
// the walk does not descend further so the size matches what os.RemoveAll
// would reclaim if the GC ran cleanTaskArtifacts on it.
func taskSize(taskDir string, patternSet map[string]struct{}) (totalBytes int64, artifactBytes int64) {
if taskDir == "" {
return
}
absRoot, err := filepath.Abs(taskDir)
if err != nil {
return
}
_ = filepath.WalkDir(absRoot, func(path string, entry os.DirEntry, err error) error {
if err != nil {
return nil
}
if path == absRoot {
return nil
}
// Symlinks: never followed, never counted. WalkDir already refuses to
// descend through them, but a symlinked file would otherwise show up
// here as a non-dir entry — drop it explicitly so the size stays
// consistent with cleanTaskArtifacts' refusal to touch link targets.
if entry.Type()&os.ModeSymlink != 0 {
return nil
}
if entry.IsDir() {
if entry.Name() == ".git" {
return filepath.SkipDir
}
if _, ok := patternSet[entry.Name()]; ok {
rel, relErr := filepath.Rel(absRoot, path)
if relErr != nil || rel == "" || rel == "." || strings.HasPrefix(rel, "..") {
return filepath.SkipDir
}
size := dirSize(path)
totalBytes += size
artifactBytes += size
return filepath.SkipDir
}
return nil
}
info, infoErr := entry.Info()
if infoErr != nil {
return nil
}
if info.Mode().IsRegular() {
totalBytes += info.Size()
}
return nil
})
return
}
// ShortID returns the first 8 chars (dashes stripped) of a UUID, falling back
// to the raw input when shorter. Mirrors execenv.shortID, which lives in an
// internal subpackage and isn't exported.
func ShortID(id string) string {
s := strings.ReplaceAll(id, "-", "")
if len(s) > 8 {
return s[:8]
}
return s
}

View File

@@ -0,0 +1,358 @@
package daemon
import (
"encoding/json"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
"time"
"github.com/multica-ai/multica/server/internal/daemon/execenv"
)
func writeFile(t *testing.T, path string, size int) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatal(err)
}
buf := make([]byte, size)
for i := range buf {
buf[i] = 'x'
}
if err := os.WriteFile(path, buf, 0o644); err != nil {
t.Fatal(err)
}
}
// TestScanDiskUsage_AggregatesAndCategorizes verifies the happy-path: each
// task directory is sized, categorized by GC meta kind, and aggregated into
// per-workspace totals matching the per-task totals.
func TestScanDiskUsage_AggregatesAndCategorizes(t *testing.T) {
t.Parallel()
root := t.TempDir()
wsA := "11111111-1111-1111-1111-111111111111"
wsB := "22222222-2222-2222-2222-222222222222"
taskA1 := filepath.Join(root, wsA, "aaaaaaaa")
writeFile(t, filepath.Join(taskA1, "workdir/main.go"), 1000)
writeFile(t, filepath.Join(taskA1, "workdir/node_modules/dep/index.js"), 4000)
mustWriteMeta(t, taskA1, execenv.GCMeta{
Kind: execenv.GCKindIssue,
IssueID: "issue-1",
WorkspaceID: wsA,
CompletedAt: time.Now().Add(-3 * time.Hour),
})
taskA2 := filepath.Join(root, wsA, "bbbbbbbb")
writeFile(t, filepath.Join(taskA2, "workdir/notes.md"), 500)
mustWriteMeta(t, taskA2, execenv.GCMeta{
Kind: execenv.GCKindChat,
ChatSessionID: "chat-1",
WorkspaceID: wsA,
CompletedAt: time.Now().Add(-1 * time.Hour),
})
taskB1 := filepath.Join(root, wsB, "cccccccc")
writeFile(t, filepath.Join(taskB1, "workdir/result.txt"), 2000)
// No meta — exercises the unknown-kind / mtime-fallback path. Backdate
// the dir mtime so the fallback produces a measurable age (a freshly
// created dir has mtime=now, which would round to 0 seconds).
backdate := time.Now().Add(-2 * time.Hour)
if err := os.Chtimes(taskB1, backdate, backdate); err != nil {
t.Fatal(err)
}
report, err := ScanDiskUsage(root, []string{"node_modules", ".next", ".turbo"})
if err != nil {
t.Fatalf("ScanDiskUsage: %v", err)
}
if len(report.Tasks) != 3 {
t.Fatalf("expected 3 tasks, got %d", len(report.Tasks))
}
byShort := map[string]TaskDiskUsage{}
for _, task := range report.Tasks {
byShort[task.TaskShort] = task
}
a1 := byShort["aaaaaaaa"]
if a1.Kind != string(execenv.GCKindIssue) {
t.Errorf("task a1 kind = %q, want %q", a1.Kind, execenv.GCKindIssue)
}
// Size includes main.go (1000) + node_modules subtree (4000) + the
// .gc_meta.json control file we wrote. Bound the meta overhead so we
// don't drift if the meta JSON shape changes.
if a1.SizeBytes < 5000 || a1.SizeBytes > 5000+1024 {
t.Errorf("task a1 size = %d, want in [5000, 6024]", a1.SizeBytes)
}
if a1.ArtifactSizeBytes != 4000 {
t.Errorf("task a1 artifact size = %d, want 4000", a1.ArtifactSizeBytes)
}
if a1.AgeSeconds < 60 {
t.Errorf("task a1 age_seconds = %d, want >= 60 (CompletedAt -3h)", a1.AgeSeconds)
}
if a1.WorkspaceShort != ShortID(wsA) {
t.Errorf("task a1 workspace_short = %q, want %q", a1.WorkspaceShort, ShortID(wsA))
}
a2 := byShort["bbbbbbbb"]
if a2.Kind != string(execenv.GCKindChat) {
t.Errorf("task a2 kind = %q, want chat", a2.Kind)
}
if a2.SizeBytes < 500 || a2.SizeBytes > 500+1024 {
t.Errorf("task a2 size = %d, want in [500, 1524]", a2.SizeBytes)
}
if a2.ArtifactSizeBytes != 0 {
t.Errorf("task a2 artifact size = %d, want 0", a2.ArtifactSizeBytes)
}
b1 := byShort["cccccccc"]
if b1.Kind != DiskUsageKindUnknown {
t.Errorf("task b1 kind = %q, want %q", b1.Kind, DiskUsageKindUnknown)
}
if b1.SizeBytes != 2000 {
t.Errorf("task b1 size = %d, want 2000 (no meta file)", b1.SizeBytes)
}
if b1.AgeSeconds < 60 {
t.Errorf("task b1 age_seconds = %d, want >= 60 (mtime backdated 2h)", b1.AgeSeconds)
}
if report.TotalSizeBytes != a1.SizeBytes+a2.SizeBytes+b1.SizeBytes {
t.Errorf("total size = %d, want sum of per-task sizes (%d)",
report.TotalSizeBytes, a1.SizeBytes+a2.SizeBytes+b1.SizeBytes)
}
if report.TotalArtifactSizeBytes != 4000 {
t.Errorf("total artifact size = %d, want 4000", report.TotalArtifactSizeBytes)
}
wsByID := map[string]WorkspaceDiskUsage{}
for _, ws := range report.Workspaces {
wsByID[ws.WorkspaceID] = ws
}
if wsByID[wsA].SizeBytes != a1.SizeBytes+a2.SizeBytes {
t.Errorf("workspace A size = %d, want %d (a1+a2)",
wsByID[wsA].SizeBytes, a1.SizeBytes+a2.SizeBytes)
}
if wsByID[wsA].ArtifactSizeBytes != 4000 {
t.Errorf("workspace A artifact size = %d, want 4000", wsByID[wsA].ArtifactSizeBytes)
}
if wsByID[wsA].TaskCount != 2 {
t.Errorf("workspace A task count = %d, want 2", wsByID[wsA].TaskCount)
}
if wsByID[wsB].SizeBytes != 2000 {
t.Errorf("workspace B size = %d, want 2000", wsByID[wsB].SizeBytes)
}
// Workspace A's artifact ratio: 4000 reclaimable / a1+a2 size. Match
// within float tolerance so a small meta-file delta doesn't break it.
wantARatio := 4000.0 / float64(a1.SizeBytes+a2.SizeBytes)
if got := wsByID[wsA].ArtifactRatio; got < wantARatio-0.005 || got > wantARatio+0.005 {
t.Errorf("workspace A artifact_ratio = %f, want ~%f", got, wantARatio)
}
// Workspace B has no artifact subtree at all → ratio must be 0, not NaN.
if got := wsByID[wsB].ArtifactRatio; got != 0 {
t.Errorf("workspace B artifact_ratio = %f, want 0", got)
}
// Scan-wide counts must reflect the full scan, not the (un-truncated
// here) slice — they're the contract callers rely on once --top kicks in.
if report.TotalTaskCount != 3 {
t.Errorf("total_task_count = %d, want 3", report.TotalTaskCount)
}
if report.TotalWorkspaceCount != 2 {
t.Errorf("total_workspace_count = %d, want 2", report.TotalWorkspaceCount)
}
if report.TotalArtifactRatio <= 0 || report.TotalArtifactRatio > 1 {
t.Errorf("total_artifact_ratio = %f, want in (0, 1]", report.TotalArtifactRatio)
}
// Tasks must be sorted by size descending — the consumer treats this as
// a stable contract for `--top N` slicing.
for i := 1; i < len(report.Tasks); i++ {
if report.Tasks[i-1].SizeBytes < report.Tasks[i].SizeBytes {
t.Errorf("tasks not sorted by size desc: %d < %d at idx %d",
report.Tasks[i-1].SizeBytes, report.Tasks[i].SizeBytes, i)
}
}
// JSON round-trip — guards the field names the issue spec calls out.
raw, err := json.Marshal(report)
if err != nil {
t.Fatalf("marshal report: %v", err)
}
for _, want := range []string{
`"kind"`,
`"parent_status"`,
`"age_seconds"`,
`"size_bytes"`,
`"artifact_size_bytes"`,
`"workspace_id"`,
`"task_short"`,
`"artifact_ratio"`,
`"total_task_count"`,
`"total_workspace_count"`,
`"total_artifact_ratio"`,
} {
if !strings.Contains(string(raw), want) {
t.Errorf("JSON missing required field %s: %s", want, raw)
}
}
}
// TestScanDiskUsage_EmptyWorkspaceArtifactRatio guards the total=0 edge:
// a workspace whose tasks have no measurable bytes (or no files at all) must
// still report ArtifactRatio=0, never NaN. The CLI table renders this column,
// and `NaN%` would surface in the user's terminal otherwise.
func TestScanDiskUsage_EmptyWorkspaceArtifactRatio(t *testing.T) {
t.Parallel()
root := t.TempDir()
wsID := "00000000-0000-0000-0000-000000000000"
taskDir := filepath.Join(root, wsID, "tttttttt")
if err := os.MkdirAll(filepath.Join(taskDir, "workdir"), 0o755); err != nil {
t.Fatal(err)
}
report, err := ScanDiskUsage(root, []string{"node_modules"})
if err != nil {
t.Fatalf("ScanDiskUsage: %v", err)
}
if len(report.Workspaces) != 1 {
t.Fatalf("expected 1 workspace, got %d", len(report.Workspaces))
}
if got := report.Workspaces[0].ArtifactRatio; got != 0 {
t.Errorf("empty workspace artifact_ratio = %f, want 0 (no NaN)", got)
}
if got := report.TotalArtifactRatio; got != 0 {
t.Errorf("empty scan total_artifact_ratio = %f, want 0 (no NaN)", got)
}
}
// TestScanDiskUsage_DoesNotEnterGit guards the GC safety contract: anything
// inside a .git directory must not be counted, even if it would otherwise
// match an artifact basename. Reflects the same constraint cleanTaskArtifacts
// enforces so the disk-usage report stays in sync with what GC reclaims.
func TestScanDiskUsage_DoesNotEnterGit(t *testing.T) {
t.Parallel()
root := t.TempDir()
wsID := "wwwwwwww-wwww-wwww-wwww-wwwwwwwwwwww"
taskDir := filepath.Join(root, wsID, "tttttttt")
writeFile(t, filepath.Join(taskDir, "workdir/.git/objects/pack"), 9999)
writeFile(t, filepath.Join(taskDir, "workdir/.git/node_modules/x"), 5555)
writeFile(t, filepath.Join(taskDir, "workdir/main.go"), 100)
report, err := ScanDiskUsage(root, []string{"node_modules"})
if err != nil {
t.Fatalf("ScanDiskUsage: %v", err)
}
if len(report.Tasks) != 1 {
t.Fatalf("expected 1 task, got %d", len(report.Tasks))
}
got := report.Tasks[0]
if got.SizeBytes != 100 {
t.Errorf("size_bytes = %d, want 100 (only main.go; .git tree skipped)", got.SizeBytes)
}
if got.ArtifactSizeBytes != 0 {
t.Errorf("artifact_size_bytes = %d, want 0 (node_modules under .git is invisible)", got.ArtifactSizeBytes)
}
}
// TestScanDiskUsage_DoesNotFollowSymlinks guards the second safety
// constraint. A symlinked artifact directory must not be sized — neither
// the link itself nor its target — because cleanTaskArtifacts won't reclaim
// it either.
func TestScanDiskUsage_DoesNotFollowSymlinks(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("symlink semantics differ on windows")
}
root := t.TempDir()
outside := t.TempDir()
writeFile(t, filepath.Join(outside, "huge.bin"), 10000)
wsID := "ssssssss-ssss-ssss-ssss-ssssssssssss"
taskDir := filepath.Join(root, wsID, "tttttttt")
writeFile(t, filepath.Join(taskDir, "workdir/main.go"), 100)
if err := os.Symlink(outside, filepath.Join(taskDir, "workdir/node_modules")); err != nil {
t.Skipf("symlink not supported: %v", err)
}
// Symlinked regular file too — the link's target lives outside taskDir
// and must not be summed.
if err := os.Symlink(filepath.Join(outside, "huge.bin"), filepath.Join(taskDir, "workdir/big-link")); err != nil {
t.Skipf("symlink not supported: %v", err)
}
report, err := ScanDiskUsage(root, []string{"node_modules"})
if err != nil {
t.Fatalf("ScanDiskUsage: %v", err)
}
if len(report.Tasks) != 1 {
t.Fatalf("expected 1 task, got %d", len(report.Tasks))
}
got := report.Tasks[0]
if got.SizeBytes != 100 {
t.Errorf("size_bytes = %d, want 100 (only main.go; symlinks ignored)", got.SizeBytes)
}
if got.ArtifactSizeBytes != 0 {
t.Errorf("artifact_size_bytes = %d, want 0 (symlinked node_modules ignored)", got.ArtifactSizeBytes)
}
}
// TestScanDiskUsage_MissingRoot ensures a daemon that has never run yet
// (workspaces dir doesn't exist) returns an empty report, not an error.
func TestScanDiskUsage_MissingRoot(t *testing.T) {
t.Parallel()
report, err := ScanDiskUsage(filepath.Join(t.TempDir(), "does-not-exist"), nil)
if err != nil {
t.Fatalf("ScanDiskUsage on missing root returned error: %v", err)
}
if len(report.Tasks) != 0 || len(report.Workspaces) != 0 {
t.Errorf("expected empty report, got %+v", report)
}
}
// TestScanDiskUsage_RejectsPatternsWithSeparators mirrors the GC safety check:
// a pattern containing "/" or "\\" is meaningless for basename matching and
// must be silently dropped, not interpreted as a path.
func TestScanDiskUsage_RejectsPatternsWithSeparators(t *testing.T) {
t.Parallel()
root := t.TempDir()
wsID := "rrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr"
taskDir := filepath.Join(root, wsID, "tttttttt")
writeFile(t, filepath.Join(taskDir, "workdir/node_modules/x"), 1000)
report, err := ScanDiskUsage(root, []string{"workdir/node_modules", "../etc"})
if err != nil {
t.Fatalf("ScanDiskUsage: %v", err)
}
if got := report.Tasks[0].ArtifactSizeBytes; got != 0 {
t.Errorf("artifact_size_bytes = %d, want 0 (separator-bearing patterns dropped)", got)
}
if got := report.ArtifactPatterns; len(got) != 0 {
t.Errorf("ArtifactPatterns = %v, want empty (all dropped)", got)
}
}
func mustWriteMeta(t *testing.T, taskDir string, meta execenv.GCMeta) {
t.Helper()
data, err := json.Marshal(meta)
if err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(taskDir, 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(taskDir, ".gc_meta.json"), data, 0o644); err != nil {
t.Fatal(err)
}
}