Files
multica/server/pkg/taskfailure/classify_test.go
Multica Eve 10afd1af1b feat(server): introduce pkg/taskfailure classifier and switch in-flight failure_reason writes (MUL-2946) (#3693)
Lift MUL-1949's offline backfill failure_reason taxonomy into a shared
in-flight classifier so the agent_task_queue.failure_reason column is
written with refined values (provider_auth_or_access, context_overflow,
provider_capacity_or_rate_limit, …) at write time rather than waiting on
SQL backfill to re-classify after the fact. PR1 of the Grafana board
plan in MUL-2328 — the upcoming PR2 reuses pkg/taskfailure.AllReasons()
to pre-warm the Prometheus failure_reason label set.

* server/pkg/taskfailure: new package with the canonical 21 Reason
  constants (7 platform-side + 14 agent_error.* sub-reasons),
  AllReasons() returning a defensive copy, IsAgentError() prefix check,
  and Classify(rawError) Reason mirroring the SQL CASE rules from
  MUL-1949 (db-boy's analysis). 100% statement coverage.
* server/internal/daemon/daemon.go: route the 'agent_error' coarse
  fallback paths (StartTask error, runTask early-return error, CompleteTask
  permanent rejection, reportTaskResult default branch) and the
  executeAndDrain default error case (chained after classifyPoisonedError)
  through taskfailure.Classify so blocked / timeout / unknown-status
  results all carry a refined reason on the wire.
* server/internal/service/task.go: FailTask classifies errMsg when the
  daemon-supplied failureReason is empty, eliminating the legacy
  COALESCE(.., 'agent_error') landing.
* server/internal/daemon/poisoned.go: alias FailureReasonIterationLimit
  and FailureReasonAPIInvalidRequest to the canonical taskfailure
  constants. agent_fallback_message and codex_semantic_inactivity are
  pre-existing operational reasons not in the canonical 21 — kept as
  literals for now and revisited in a follow-up PR.

Backfill SQL from MUL-1949 stays as the authoritative offline source of
truth; this PR keeps the in-flight classifier in lock-step with the SQL
CASE expression so historical and future rows share the same taxonomy.
No behavior change for the platform-side reasons (queued_expired,
runtime_offline, runtime_recovery, timeout, etc.) which already align
with the canonical set.

Co-authored-by: Eve <eve@multica-ai.local>
Co-authored-by: multica-agent <github@multica.ai>
2026-06-03 13:52:56 +08:00

251 lines
12 KiB
Go

package taskfailure
import "testing"
// TestClassifyEmptyAndWhitespace pins the empty/whitespace contract.
// Daemon callers should never hand us empty error text — but if they
// do, returning the catchall is safer than panicking.
func TestClassifyEmptyAndWhitespace(t *testing.T) {
t.Parallel()
cases := []string{"", " ", "\n\t \n"}
for _, in := range cases {
if got := Classify(in); got != ReasonAgentUnknown {
t.Errorf("Classify(%q) = %q, want %q", in, got, ReasonAgentUnknown)
}
}
}
// TestClassifyRules walks every classifier rule with a real-world
// sample taken from MUL-1949's db-boy production analysis (top error
// prefixes from `agent_task_queue.error` over a 7-day window). When
// MUL-1949's SQL grows a new rule, add a fixture here so the in-flight
// classifier and the offline backfill stay in lock-step.
//
// One test case per rule is the minimum bar; rules with notable
// boundary conditions (e.g. the 5xx regex) get a dedicated subtest
// further down.
func TestClassifyRules(t *testing.T) {
t.Parallel()
cases := []struct {
name string
in string
want Reason
}{
// 1. Context overflow.
{"context length exceeded", "Error: context length exceeded for model gpt-4", ReasonAgentContextOverflow},
{"context_length_exceeded code", `{"error":{"code":"context_length_exceeded"}}`, ReasonAgentContextOverflow},
{"maximum context", "Maximum context window of 200000 tokens has been exceeded", ReasonAgentContextOverflow},
{"prompt is too long", "API Error: prompt is too long: 250000 tokens > 200000 maximum", ReasonAgentContextOverflow},
{"context size has been exceeded", "context size has been exceeded; consider /compact", ReasonAgentContextOverflow},
{"token limit", "Hit the token limit for this conversation", ReasonAgentContextOverflow},
// 2. Missing config.
{"missing env var", "Missing environment variable: `MIFY_API_KEY`.", ReasonAgentMissingConfig},
{"missing api_key", "Failed to authenticate: missing api_key in config", ReasonAgentMissingConfig},
{"api key required", "An api key is required to use this provider", ReasonAgentMissingConfig},
{"no llm provider configured", "no llm provider configured; set OPENAI_API_KEY", ReasonAgentMissingConfig},
{"no provider configured", "no provider configured for runtime", ReasonAgentMissingConfig},
// 3. Provider auth / access.
{"401", "API Error: 401 Unauthorized", ReasonAgentProviderAuthOrAccess},
{"403", "API Error: 403 Forbidden", ReasonAgentProviderAuthOrAccess},
{"unauthorized text", "Request unauthorized for this organization", ReasonAgentProviderAuthOrAccess},
{"login required", "login required: please run /login", ReasonAgentProviderAuthOrAccess},
{"not logged in", "Not logged in · Please run /login", ReasonAgentProviderAuthOrAccess},
{"please login again", "Session expired, please login again", ReasonAgentProviderAuthOrAccess},
{"refresh token", "refresh token has expired", ReasonAgentProviderAuthOrAccess},
{"invalid api key", "Invalid API key provided", ReasonAgentProviderAuthOrAccess},
{"access token", "access token has been revoked", ReasonAgentProviderAuthOrAccess},
{"subscription access", "Your organization has disabled Claude subscription access for Claude Code", ReasonAgentProviderAuthOrAccess},
{"does not have access", "Your account does not have access to this model", ReasonAgentProviderAuthOrAccess},
{"may not have access", "you may not have access to claude-3-opus", ReasonAgentProviderAuthOrAccess},
// 4. Provider quota / billing.
{"402", "API Error: 402 Payment Required", ReasonAgentProviderQuotaLimit},
{"insufficient_balance", `{"error":{"code":"insufficient_balance"}}`, ReasonAgentProviderQuotaLimit},
{"balance is too low", "balance is too low to make this request", ReasonAgentProviderQuotaLimit},
{"monthly usage limit", "You've hit your org's monthly usage limit", ReasonAgentProviderQuotaLimit},
{"usage limit", "Account exceeded the daily usage limit", ReasonAgentProviderQuotaLimit},
{"hit your limit ascii", "you've hit your limit; upgrade to continue", ReasonAgentProviderQuotaLimit},
{"hit your limit curly", "you\u2019ve hit your limit", ReasonAgentProviderQuotaLimit},
{"credits", "Your account has 0 credits remaining", ReasonAgentProviderQuotaLimit},
{"quota", "quota exceeded for project foo", ReasonAgentProviderQuotaLimit},
// 5. Capacity / rate limit.
{"429", "API Error: 429 Too Many Requests", ReasonAgentProviderCapacityOrRateLimit},
{"529", "Server overloaded: HTTP 529", ReasonAgentProviderCapacityOrRateLimit},
{"rate limit", "rate limit exceeded for tier 3", ReasonAgentProviderCapacityOrRateLimit},
{"overloaded", "overloaded_error: please retry", ReasonAgentProviderCapacityOrRateLimit},
{"no capacity available", "no capacity available; try again later", ReasonAgentProviderCapacityOrRateLimit},
// 6. Provider 5xx / server error.
{"server had an error", "the server had an error processing your request", ReasonAgentProviderServerError},
{"provider returned error", "provider returned error: malformed response", ReasonAgentProviderServerError},
{"internal error", "An internal error occurred while serving the request", ReasonAgentProviderServerError},
{"500 with delimiter", "API Error: 500 Internal Server Error", ReasonAgentProviderServerError},
{"503 anywhere", "got HTTP 503 from provider", ReasonAgentProviderServerError},
{"503 at start", "503 service degraded", ReasonAgentProviderServerError},
{"504 at end", "upstream returned 504", ReasonAgentProviderServerError},
{"service unavailable", "service unavailable, retry later", ReasonAgentProviderServerError},
{"bad gateway", "Bad Gateway: upstream rejected", ReasonAgentProviderServerError},
// 7. Provider network.
{"stream disconnected", "stream disconnected before completion", ReasonAgentProviderNetwork},
{"error sending request", "error sending request for url (https://api.example.com/v1)", ReasonAgentProviderNetwork},
{"unable to connect", "unable to connect to provider", ReasonAgentProviderNetwork},
{"dial tcp", "dial tcp 1.2.3.4:443: connect: connection refused", ReasonAgentProviderNetwork},
{"connection refused alone", "connection refused", ReasonAgentProviderNetwork},
{"connectionrefused single", "ConnectionRefused", ReasonAgentProviderNetwork},
{"dns", "dns lookup failed", ReasonAgentProviderNetwork},
{"i/o timeout", "read tcp 1.2.3.4:443: i/o timeout", ReasonAgentProviderNetwork},
// 8. Model not found / unavailable.
{"model not found", "Error: model claude-3-opus-99 not found", ReasonAgentModelNotFoundOrUnavailable},
{"model not found phrase", "the model was not found in this account", ReasonAgentModelNotFoundOrUnavailable},
{"unknown model", "unknown model 'foo-1.0'", ReasonAgentModelNotFoundOrUnavailable},
{"selected model", "the selected model is no longer supported", ReasonAgentModelNotFoundOrUnavailable},
{"http 404", "HTTP 404: model endpoint not registered", ReasonAgentModelNotFoundOrUnavailable},
{"404 page not found", "404 page not found", ReasonAgentModelNotFoundOrUnavailable},
// 9. Empty / unparseable output.
{"returned empty output", "openclaw returned empty output", ReasonAgentEmptyOrUnparseableOutput},
{"returned no parseable output", "kimi returned no parseable output", ReasonAgentEmptyOrUnparseableOutput},
// 10. Agent timeout.
{"timed out after", "claude timed out after 2h0m0s", ReasonAgentTimeout},
// 11. Runtime missing executable.
{"executable not found", "executable not found in $PATH", ReasonAgentRuntimeMissingExecutable},
// 12. Runtime version unsupported.
{"below the minimum supported version", "claude CLI 0.1.0 is below the minimum supported version 0.5.0", ReasonAgentRuntimeVersionUnsupported},
{"requires a newer version", "this protocol requires a newer version of the runtime", ReasonAgentRuntimeVersionUnsupported},
// 13. Process failure.
{"exit status", "agent exit status 137", ReasonAgentProcessFailure},
{"signal", "agent terminated by signal: killed", ReasonAgentProcessFailure},
{"panic", "panic: runtime error: invalid memory address", ReasonAgentProcessFailure},
{"sigsegv", "fatal error: SIGSEGV", ReasonAgentProcessFailure},
{"process exited", "process exited with status 1", ReasonAgentProcessFailure},
{"pipe has been ended", "the pipe has been ended", ReasonAgentProcessFailure},
{"file already closed", "write |1: file already closed", ReasonAgentProcessFailure},
{"initialize failed", "initialize failed: backend not ready", ReasonAgentProcessFailure},
// 14. Catchall.
{"unrecognized", "the agent gave up for reasons unknown", ReasonAgentUnknown},
{"sentence with no marker", "Hello world.", ReasonAgentUnknown},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := Classify(c.in); got != c.want {
t.Fatalf("Classify(%q) = %q, want %q", c.in, got, c.want)
}
})
}
}
// TestClassifyOrderingPriorities pins the rule precedence between
// overlapping rules. These cases caught regressions during MUL-2946 PR1
// review: the SQL CASE ordering matters and a naive Go switch could
// silently route them differently.
func TestClassifyOrderingPriorities(t *testing.T) {
t.Parallel()
cases := []struct {
name string
in string
want Reason
}{
// "token limit" mentions both "context-ish" tokens AND
// "limit". The context_overflow rule must win because the
// quota-limit rule's "limit" trigger would otherwise swallow
// it.
{"token limit beats quota", "you exceeded the token limit", ReasonAgentContextOverflow},
// 401 + missing api_key: the missing_config rule runs before
// auth precisely so we don't classify a config error as an
// auth rejection.
{"missing api key beats 401", "missing api_key for openai (401 returned downstream)", ReasonAgentMissingConfig},
// Both "429" and "rate limit" present — should still land in
// the capacity bucket, not the quota bucket.
{"429 rate limit", "API Error: 429 rate limit reached", ReasonAgentProviderCapacityOrRateLimit},
// "exit status" co-occurring with a stronger upstream marker
// — the upstream classification should win because the
// process_failure rule is checked last.
{"exit status with 401 upstream", "exit status 1: API Error: 401 Unauthorized", ReasonAgentProviderAuthOrAccess},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := Classify(c.in); got != c.want {
t.Errorf("Classify(%q) = %q, want %q", c.in, got, c.want)
}
})
}
}
// TestClassify5xxRegex pins the boundary behavior of the 5xx HTTP
// status detector. The SQL classifier uses an anchored regex
// `(^|[^0-9])5[0-9][0-9]([^0-9]|$)`; this Go classifier mirrors it via
// providerHTTP5xxRe. Without the anchors, "1500ms" and "1.5.0" would
// be misclassified as a server error.
func TestClassify5xxRegex(t *testing.T) {
t.Parallel()
hits := []string{
"503",
" 504 ",
"got 502 from upstream",
"upstream returned 599\n",
}
for _, in := range hits {
if got := Classify(in); got != ReasonAgentProviderServerError {
t.Errorf("Classify(%q) = %q, want %q", in, got, ReasonAgentProviderServerError)
}
}
misses := []string{
"1500ms latency observed",
"version 1.5.0 unsupported",
"5000 tokens generated",
"agent slept for 1500 seconds",
}
for _, in := range misses {
if got := Classify(in); got == ReasonAgentProviderServerError {
t.Errorf("Classify(%q) = %q, want NOT provider_server_error", in, got)
}
}
}
// TestClassifyAlwaysReturnsAgentSide guarantees Classify never returns
// a platform-side reason. Platform-side reasons originate from
// sweepers / scheduler / poisoned classifier paths that don't pass
// through Classify; the in-flight classifier's job is exclusively to
// pick among the 14 agent_error.* sub-reasons (or fall back to
// ReasonAgentUnknown). A future change that accidentally returned,
// say, ReasonRuntimeOffline from Classify would break Prometheus
// label semantics — pin it here.
func TestClassifyAlwaysReturnsAgentSide(t *testing.T) {
t.Parallel()
samples := []string{
"",
"random text",
"401 Unauthorized",
"context length exceeded",
"503 internal server error",
"timed out after 2h0m0s",
"exit status 1",
}
for _, s := range samples {
got := Classify(s)
if !got.IsAgentError() {
t.Errorf("Classify(%q) = %q, must be agent_error.* (in-flight classifier never returns platform-side reasons)", s, got)
}
}
}