mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-29 02:19:19 +02:00
Compare commits
7 Commits
codex/agen
...
agent/j/e4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38713b8c67 | ||
|
|
8b790dfcd8 | ||
|
|
63a3bf334b | ||
|
|
8dcac1bc08 | ||
|
|
824d579f27 | ||
|
|
3bf81edf48 | ||
|
|
51fb71e4c8 |
@@ -130,6 +130,15 @@ export interface Agent {
|
||||
status: AgentStatus;
|
||||
max_concurrent_tasks: number;
|
||||
model: string;
|
||||
/**
|
||||
* Runtime-native reasoning/effort token (e.g. Claude's
|
||||
* `low|medium|high|xhigh|max`, Codex's
|
||||
* `none|minimal|low|medium|high|xhigh`). Empty string means "use the
|
||||
* runtime/model default". The picker is per-runtime per-model — the
|
||||
* API never normalises across providers. Older backends omit this
|
||||
* field entirely; treat undefined as "" (MUL-2339).
|
||||
*/
|
||||
thinking_level?: string;
|
||||
owner_id: string | null;
|
||||
skills: AgentSkillSummary[];
|
||||
created_at: string;
|
||||
@@ -163,6 +172,8 @@ export interface CreateAgentRequest {
|
||||
visibility?: AgentVisibility;
|
||||
max_concurrent_tasks?: number;
|
||||
model?: string;
|
||||
/** Optional runtime-native reasoning/effort token. See `Agent.thinking_level`. */
|
||||
thinking_level?: string;
|
||||
/** Optional template slug used by the onboarding agent picker. Surfaced
|
||||
* as the `template` property on the `agent_created` PostHog event. */
|
||||
template?: string;
|
||||
@@ -251,6 +262,14 @@ export interface UpdateAgentRequest {
|
||||
status?: AgentStatus;
|
||||
max_concurrent_tasks?: number;
|
||||
model?: string;
|
||||
/**
|
||||
* Runtime-native reasoning/effort token. Tri-state semantics (MUL-2339):
|
||||
* - field omitted → no change
|
||||
* - "" → explicit clear (use runtime default)
|
||||
* - non-empty → set; validated server-side against the target
|
||||
* runtime's provider enum, rejected with 400 if not recognised
|
||||
*/
|
||||
thinking_level?: string;
|
||||
}
|
||||
|
||||
// Skills
|
||||
@@ -431,6 +450,31 @@ export interface RuntimeModel {
|
||||
label: string;
|
||||
provider?: string;
|
||||
default?: boolean;
|
||||
/**
|
||||
* Per-model reasoning/effort catalog discovered by the daemon. Currently
|
||||
* populated for claude and codex runtimes only; omitted (or undefined)
|
||||
* for every other provider, which the UI treats as "no thinking-level
|
||||
* picker for this model". See MUL-2339.
|
||||
*/
|
||||
thinking?: RuntimeModelThinking;
|
||||
}
|
||||
|
||||
export interface RuntimeModelThinking {
|
||||
/** Levels the user is allowed to pick for this model. */
|
||||
supported_levels: RuntimeModelThinkingLevel[];
|
||||
/** The level the runtime defaults to when no override is sent. The UI
|
||||
* uses this to badge the default and prefill new agents. */
|
||||
default_level?: string;
|
||||
}
|
||||
|
||||
export interface RuntimeModelThinkingLevel {
|
||||
/** Runtime-native token passed to the CLI; never normalised. */
|
||||
value: string;
|
||||
/** Display label matching each CLI's own UI (`Low`, `Extra high`, …). */
|
||||
label: string;
|
||||
/** Optional helper copy lifted from upstream catalog
|
||||
* (`codex debug models` emits one per level). */
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export type RuntimeModelListStatus =
|
||||
|
||||
@@ -1337,22 +1337,49 @@ func (d *Daemon) handleModelList(ctx context.Context, rt Runtime, requestID stri
|
||||
}
|
||||
|
||||
// Wire format matches handler.ModelEntry. Use a struct (not
|
||||
// map[string]string) so the Default bool round-trips — without
|
||||
// it the UI loses its "default" badge on the advertised pick.
|
||||
// map[string]string) so the Default bool and the per-model
|
||||
// Thinking catalog round-trip — without it the UI loses its
|
||||
// "default" badge on the advertised pick and the thinking-level
|
||||
// picker for claude/codex (MUL-2339).
|
||||
type thinkingLevelWire struct {
|
||||
Value string `json:"value"`
|
||||
Label string `json:"label"`
|
||||
Description string `json:"description,omitempty"`
|
||||
}
|
||||
type modelThinkingWire struct {
|
||||
SupportedLevels []thinkingLevelWire `json:"supported_levels"`
|
||||
DefaultLevel string `json:"default_level,omitempty"`
|
||||
}
|
||||
type modelWire struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
Default bool `json:"default,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
Default bool `json:"default,omitempty"`
|
||||
Thinking *modelThinkingWire `json:"thinking,omitempty"`
|
||||
}
|
||||
wire := make([]modelWire, 0, len(models))
|
||||
for _, m := range models {
|
||||
wire = append(wire, modelWire{
|
||||
entry := modelWire{
|
||||
ID: m.ID,
|
||||
Label: m.Label,
|
||||
Provider: m.Provider,
|
||||
Default: m.Default,
|
||||
})
|
||||
}
|
||||
if m.Thinking != nil {
|
||||
levels := make([]thinkingLevelWire, 0, len(m.Thinking.SupportedLevels))
|
||||
for _, lvl := range m.Thinking.SupportedLevels {
|
||||
levels = append(levels, thinkingLevelWire{
|
||||
Value: lvl.Value,
|
||||
Label: lvl.Label,
|
||||
Description: lvl.Description,
|
||||
})
|
||||
}
|
||||
entry.Thinking = &modelThinkingWire{
|
||||
SupportedLevels: levels,
|
||||
DefaultLevel: m.Thinking.DefaultLevel,
|
||||
}
|
||||
}
|
||||
wire = append(wire, entry)
|
||||
}
|
||||
d.reportModelListResult(ctx, rt, requestID, map[string]any{
|
||||
"status": "completed",
|
||||
@@ -2408,6 +2435,38 @@ func (d *Daemon) runTask(ctx context.Context, task Task, provider string, slot i
|
||||
if model == "" {
|
||||
model = entry.Model
|
||||
}
|
||||
thinkingLevel := ""
|
||||
if task.Agent != nil {
|
||||
thinkingLevel = task.Agent.ThinkingLevel
|
||||
}
|
||||
// Per-model guard: the server validates the literal token against the
|
||||
// provider's enum, but per-model gaps (Claude's `xhigh` on a non-Opus
|
||||
// model, Codex's per-model `supported_reasoning_levels`) only resolve
|
||||
// here, against the daemon's local CLI catalog. Invalid combinations
|
||||
// log a warning and drop the level rather than failing the task, so a
|
||||
// stale persisted value never blocks execution. Empty model is passed
|
||||
// through unchanged — ValidateThinkingLevel resolves it to the
|
||||
// provider's default model internally so default-model tasks aren't
|
||||
// misjudged. Discovery errors fail open: if we can't list models, we
|
||||
// keep the persisted level and let the CLI surface any objection.
|
||||
if thinkingLevel != "" {
|
||||
ok, err := agent.ValidateThinkingLevel(ctx, provider, entry.Path, model, thinkingLevel)
|
||||
if err != nil {
|
||||
taskLog.Warn("thinking_level: catalog lookup failed; passing through",
|
||||
"provider", provider,
|
||||
"model", model,
|
||||
"thinking_level", thinkingLevel,
|
||||
"error", err,
|
||||
)
|
||||
} else if !ok {
|
||||
taskLog.Warn("thinking_level: not valid for this (provider, model); skipping injection",
|
||||
"provider", provider,
|
||||
"model", model,
|
||||
"thinking_level", thinkingLevel,
|
||||
)
|
||||
thinkingLevel = ""
|
||||
}
|
||||
}
|
||||
execOpts := agent.ExecOptions{
|
||||
Cwd: env.WorkDir,
|
||||
Model: model,
|
||||
@@ -2417,6 +2476,7 @@ func (d *Daemon) runTask(ctx context.Context, task Task, provider string, slot i
|
||||
ExtraArgs: extraArgs,
|
||||
CustomArgs: customArgs,
|
||||
McpConfig: mcpConfig,
|
||||
ThinkingLevel: thinkingLevel,
|
||||
}
|
||||
// Some providers do not reliably load the per-task runtime config files we
|
||||
// write into the task workdir:
|
||||
|
||||
@@ -76,14 +76,15 @@ type ChatAttachmentMeta struct {
|
||||
|
||||
// AgentData holds agent details returned by the claim endpoint.
|
||||
type AgentData struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Instructions string `json:"instructions"`
|
||||
Skills []SkillData `json:"skills"`
|
||||
CustomEnv map[string]string `json:"custom_env,omitempty"`
|
||||
CustomArgs []string `json:"custom_args,omitempty"`
|
||||
McpConfig json.RawMessage `json:"mcp_config,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Instructions string `json:"instructions"`
|
||||
Skills []SkillData `json:"skills"`
|
||||
CustomEnv map[string]string `json:"custom_env,omitempty"`
|
||||
CustomArgs []string `json:"custom_args,omitempty"`
|
||||
McpConfig json.RawMessage `json:"mcp_config,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
ThinkingLevel string `json:"thinking_level,omitempty"`
|
||||
}
|
||||
|
||||
// SkillData represents a structured skill for task execution.
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/multica-ai/multica/server/internal/analytics"
|
||||
"github.com/multica-ai/multica/server/internal/logger"
|
||||
"github.com/multica-ai/multica/server/internal/service"
|
||||
"github.com/multica-ai/multica/server/pkg/agent"
|
||||
db "github.com/multica-ai/multica/server/pkg/db/generated"
|
||||
"github.com/multica-ai/multica/server/pkg/protocol"
|
||||
)
|
||||
@@ -45,12 +46,16 @@ type AgentResponse struct {
|
||||
Status string `json:"status"`
|
||||
MaxConcurrentTasks int32 `json:"max_concurrent_tasks"`
|
||||
Model string `json:"model"`
|
||||
OwnerID *string `json:"owner_id"`
|
||||
Skills []AgentSkillSummary `json:"skills"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
ArchivedAt *string `json:"archived_at"`
|
||||
ArchivedBy *string `json:"archived_by"`
|
||||
// ThinkingLevel is the runtime-native reasoning/effort token persisted
|
||||
// for this agent (empty = use runtime default). The picker is per-runtime
|
||||
// per-model; the API never normalizes across providers. See MUL-2339.
|
||||
ThinkingLevel string `json:"thinking_level"`
|
||||
OwnerID *string `json:"owner_id"`
|
||||
Skills []AgentSkillSummary `json:"skills"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
ArchivedAt *string `json:"archived_at"`
|
||||
ArchivedBy *string `json:"archived_by"`
|
||||
}
|
||||
|
||||
func agentToResponse(a db.Agent) AgentResponse {
|
||||
@@ -104,6 +109,7 @@ func agentToResponse(a db.Agent) AgentResponse {
|
||||
Status: a.Status,
|
||||
MaxConcurrentTasks: a.MaxConcurrentTasks,
|
||||
Model: a.Model.String,
|
||||
ThinkingLevel: a.ThinkingLevel.String,
|
||||
OwnerID: uuidToPtr(a.OwnerID),
|
||||
Skills: []AgentSkillSummary{},
|
||||
CreatedAt: timestampToString(a.CreatedAt),
|
||||
@@ -194,14 +200,15 @@ type ChatAttachmentMeta struct {
|
||||
// TaskAgentData holds agent info included in claim responses so the daemon
|
||||
// can set up the execution environment (branch naming, skill files, instructions).
|
||||
type TaskAgentData struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Instructions string `json:"instructions"`
|
||||
Skills []service.AgentSkillData `json:"skills,omitempty"`
|
||||
CustomEnv map[string]string `json:"custom_env,omitempty"`
|
||||
CustomArgs []string `json:"custom_args,omitempty"`
|
||||
McpConfig json.RawMessage `json:"mcp_config,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Instructions string `json:"instructions"`
|
||||
Skills []service.AgentSkillData `json:"skills,omitempty"`
|
||||
CustomEnv map[string]string `json:"custom_env,omitempty"`
|
||||
CustomArgs []string `json:"custom_args,omitempty"`
|
||||
McpConfig json.RawMessage `json:"mcp_config,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
ThinkingLevel string `json:"thinking_level,omitempty"`
|
||||
}
|
||||
|
||||
func taskToResponse(t db.AgentTaskQueue) AgentTaskResponse {
|
||||
@@ -392,6 +399,7 @@ type CreateAgentRequest struct {
|
||||
Visibility string `json:"visibility"`
|
||||
MaxConcurrentTasks int32 `json:"max_concurrent_tasks"`
|
||||
Model string `json:"model"`
|
||||
ThinkingLevel string `json:"thinking_level"`
|
||||
// Template records which template slug was used to seed this agent
|
||||
// (e.g. "coding" / "planning" / "writing" / "assistant"). Empty when
|
||||
// the caller didn't come from a template picker — the `agent_created`
|
||||
@@ -482,6 +490,15 @@ func (h *Handler) CreateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// thinking_level validation: provider-level enum only. Per-model gaps
|
||||
// are enforced by the daemon at execution time (MUL-2339, Trump's
|
||||
// review note — keep API behaviour consistent: literal-invalid →
|
||||
// always 400; combination-invalid → daemon-side task error).
|
||||
if !agent.IsKnownThinkingValue(runtime.Provider, req.ThinkingLevel) {
|
||||
writeError(w, http.StatusBadRequest, fmt.Sprintf("thinking_level %q is not a recognised value for runtime %q", req.ThinkingLevel, runtime.Provider))
|
||||
return
|
||||
}
|
||||
|
||||
// Probe workspace agent count BEFORE the insert so the funnel has a
|
||||
// clean "first agent ever in this workspace" signal — Step 4 of
|
||||
// onboarding always lands in this branch. A non-fatal read: if the
|
||||
@@ -512,7 +529,7 @@ func (h *Handler) CreateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
mc = append([]byte(nil), rawMcpConfig...)
|
||||
}
|
||||
|
||||
agent, err := h.Queries.CreateAgent(r.Context(), db.CreateAgentParams{
|
||||
created, err := h.Queries.CreateAgent(r.Context(), db.CreateAgentParams{
|
||||
WorkspaceID: wsUUID,
|
||||
Name: req.Name,
|
||||
Description: req.Description,
|
||||
@@ -528,6 +545,7 @@ func (h *Handler) CreateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
CustomArgs: ca,
|
||||
McpConfig: mc,
|
||||
Model: pgtype.Text{String: req.Model, Valid: req.Model != ""},
|
||||
ThinkingLevel: pgtype.Text{String: req.ThinkingLevel, Valid: req.ThinkingLevel != ""},
|
||||
})
|
||||
if err != nil {
|
||||
// Unique constraint on (workspace_id, name) — return a clear conflict error
|
||||
@@ -541,21 +559,21 @@ func (h *Handler) CreateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
writeError(w, http.StatusInternalServerError, "failed to create agent: "+err.Error())
|
||||
return
|
||||
}
|
||||
slog.Info("agent created", append(logger.RequestAttrs(r), "agent_id", uuidToString(agent.ID), "name", agent.Name, "workspace_id", workspaceID)...)
|
||||
slog.Info("agent created", append(logger.RequestAttrs(r), "agent_id", uuidToString(created.ID), "name", created.Name, "workspace_id", workspaceID)...)
|
||||
|
||||
if runtime.Status == "online" {
|
||||
h.TaskService.ReconcileAgentStatus(r.Context(), agent.ID)
|
||||
agent, _ = h.Queries.GetAgent(r.Context(), agent.ID)
|
||||
h.TaskService.ReconcileAgentStatus(r.Context(), created.ID)
|
||||
created, _ = h.Queries.GetAgent(r.Context(), created.ID)
|
||||
}
|
||||
|
||||
resp := agentToResponse(agent)
|
||||
resp := agentToResponse(created)
|
||||
actorType, actorID := h.resolveActor(r, ownerID, workspaceID)
|
||||
h.publish(protocol.EventAgentCreated, workspaceID, actorType, actorID, map[string]any{"agent": resp})
|
||||
|
||||
h.Analytics.Capture(analytics.AgentCreated(
|
||||
ownerID,
|
||||
workspaceID,
|
||||
uuidToString(agent.ID),
|
||||
uuidToString(created.ID),
|
||||
runtime.Provider,
|
||||
runtime.RuntimeMode,
|
||||
req.Template,
|
||||
@@ -579,6 +597,13 @@ type UpdateAgentRequest struct {
|
||||
Status *string `json:"status"`
|
||||
MaxConcurrentTasks *int32 `json:"max_concurrent_tasks"`
|
||||
Model *string `json:"model"`
|
||||
// ThinkingLevel is treated as a tri-state per-MUL-2339:
|
||||
// - field omitted → no change (leave existing value alone)
|
||||
// - field present with "" → explicit clear (use runtime default)
|
||||
// - field present with non-empty value → set (validated server-side)
|
||||
// Distinguishing those modes is why this is a pointer; the raw-fields
|
||||
// map captured at decode time tells us whether the key was sent.
|
||||
ThinkingLevel *string `json:"thinking_level"`
|
||||
}
|
||||
|
||||
// canViewAgentEnv checks whether the requesting user is allowed to see the
|
||||
@@ -633,11 +658,11 @@ func (h *Handler) canManageAgent(w http.ResponseWriter, r *http.Request, agent d
|
||||
|
||||
func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
agent, ok := h.loadAgentForUser(w, r, id)
|
||||
existing, ok := h.loadAgentForUser(w, r, id)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if !h.canManageAgent(w, r, agent) {
|
||||
if !h.canManageAgent(w, r, existing) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -649,7 +674,7 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
params := db.UpdateAgentParams{
|
||||
ID: agent.ID,
|
||||
ID: existing.ID,
|
||||
}
|
||||
if req.Name != nil {
|
||||
params.Name = pgtype.Text{String: *req.Name, Valid: true}
|
||||
@@ -684,6 +709,12 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
if hasMcpConfig && !shouldClearMcpConfig {
|
||||
params.McpConfig = append([]byte(nil), rawMcpConfig...)
|
||||
}
|
||||
|
||||
// Resolve the runtime that will be in force after this update so the
|
||||
// thinking_level validation hits the right provider enum. When the
|
||||
// request doesn't move the agent, we still need to load the *current*
|
||||
// runtime to validate a thinking_level change. Resolve once and reuse.
|
||||
targetRuntimeID := existing.RuntimeID
|
||||
if req.RuntimeID != nil {
|
||||
runtimeUUID, ok := parseUUIDOrBadRequest(w, *req.RuntimeID, "runtime_id")
|
||||
if !ok {
|
||||
@@ -691,7 +722,7 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
runtime, err := h.Queries.GetAgentRuntimeForWorkspace(r.Context(), db.GetAgentRuntimeForWorkspaceParams{
|
||||
ID: runtimeUUID,
|
||||
WorkspaceID: agent.WorkspaceID,
|
||||
WorkspaceID: existing.WorkspaceID,
|
||||
})
|
||||
if err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid runtime_id")
|
||||
@@ -700,7 +731,7 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
// Same gate as CreateAgent — prevents UpdateAgent from being used to
|
||||
// re-bind an agent onto someone else's private runtime, which would
|
||||
// otherwise be a quiet end-run around the CreateAgent check.
|
||||
member, ok := h.workspaceMember(w, r, uuidToString(agent.WorkspaceID))
|
||||
member, ok := h.workspaceMember(w, r, uuidToString(existing.WorkspaceID))
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
@@ -710,6 +741,7 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
params.RuntimeID = runtime.ID
|
||||
params.RuntimeMode = pgtype.Text{String: runtime.RuntimeMode, Valid: true}
|
||||
targetRuntimeID = runtime.ID
|
||||
}
|
||||
if req.Visibility != nil {
|
||||
params.Visibility = pgtype.Text{String: *req.Visibility, Valid: true}
|
||||
@@ -724,32 +756,110 @@ func (h *Handler) UpdateAgent(w http.ResponseWriter, r *http.Request) {
|
||||
params.Model = pgtype.Text{String: *req.Model, Valid: true}
|
||||
}
|
||||
|
||||
agent, err = h.Queries.UpdateAgent(r.Context(), params)
|
||||
// thinking_level handling (MUL-2339). Tri-state semantics:
|
||||
// - field omitted → leave column alone (COALESCE narg), but if a
|
||||
// runtime change in this same request would make the *existing*
|
||||
// value literal-invalid for the new provider, reject 400. This
|
||||
// closes the gap Elon's review flagged: previously, switching a
|
||||
// Claude agent storing `max` to a Codex runtime would silently
|
||||
// keep `max` and forward it to the daemon.
|
||||
// - field set to "" → explicit clear (run ClearAgentThinkingLevel post-update)
|
||||
// - field set to value → validate against the target runtime's provider
|
||||
// enum; reject literal-invalid with 400. Per-model combination checks
|
||||
// run in the daemon at execution time, not here — see Trump's review
|
||||
// constraint that API behaviour stays consistent across change paths.
|
||||
shouldClearThinkingLevel := false
|
||||
if req.ThinkingLevel != nil {
|
||||
value := *req.ThinkingLevel
|
||||
if value == "" {
|
||||
shouldClearThinkingLevel = true
|
||||
} else {
|
||||
// Need the target runtime's provider to validate. Re-fetch only when
|
||||
// we haven't already loaded it above (i.e. the request didn't change
|
||||
// runtime_id), to keep the no-change path one DB roundtrip.
|
||||
provider, ok := h.resolveAgentProvider(r, existing.WorkspaceID, targetRuntimeID)
|
||||
if !ok {
|
||||
writeError(w, http.StatusInternalServerError, "failed to resolve runtime for thinking_level validation")
|
||||
return
|
||||
}
|
||||
if !agent.IsKnownThinkingValue(provider, value) {
|
||||
writeError(w, http.StatusBadRequest, fmt.Sprintf("thinking_level %q is not a recognised value for runtime %q", value, provider))
|
||||
return
|
||||
}
|
||||
params.ThinkingLevel = pgtype.Text{String: value, Valid: true}
|
||||
}
|
||||
} else if req.RuntimeID != nil && existing.ThinkingLevel.Valid && existing.ThinkingLevel.String != "" {
|
||||
// Runtime is changing but the caller didn't touch thinking_level.
|
||||
// If the existing value is not in the new provider's enum at all,
|
||||
// preserving it would smuggle a literal-invalid token to the daemon.
|
||||
// Hold the same line as the explicit-set path: always 400 on
|
||||
// literal-invalid, never silently coerce. The caller can either
|
||||
// pass `thinking_level: ""` to clear or pick a value valid for the
|
||||
// new runtime.
|
||||
provider, ok := h.resolveAgentProvider(r, existing.WorkspaceID, targetRuntimeID)
|
||||
if !ok {
|
||||
writeError(w, http.StatusInternalServerError, "failed to resolve runtime for thinking_level validation")
|
||||
return
|
||||
}
|
||||
if !agent.IsKnownThinkingValue(provider, existing.ThinkingLevel.String) {
|
||||
writeError(w, http.StatusBadRequest, fmt.Sprintf(
|
||||
"existing thinking_level %q is not valid for runtime %q; pass thinking_level=\"\" to clear or set a value valid for the new runtime",
|
||||
existing.ThinkingLevel.String, provider,
|
||||
))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
updated, err := h.Queries.UpdateAgent(r.Context(), params)
|
||||
if err != nil {
|
||||
slog.Warn("update agent failed", append(logger.RequestAttrs(r), "error", err, "agent_id", id)...)
|
||||
writeError(w, http.StatusInternalServerError, "failed to update agent: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// mcp_config: null in the request means explicitly clear the field.
|
||||
// COALESCE in UpdateAgent cannot set a column to NULL, so we use a dedicated query.
|
||||
// mcp_config / thinking_level: null/empty in the request means explicitly
|
||||
// clear the field. COALESCE in UpdateAgent cannot set a column to NULL,
|
||||
// so we use dedicated clear queries.
|
||||
if shouldClearMcpConfig {
|
||||
agent, err = h.Queries.ClearAgentMcpConfig(r.Context(), agent.ID)
|
||||
updated, err = h.Queries.ClearAgentMcpConfig(r.Context(), updated.ID)
|
||||
if err != nil {
|
||||
slog.Warn("clear agent mcp_config failed", append(logger.RequestAttrs(r), "error", err, "agent_id", id)...)
|
||||
writeError(w, http.StatusInternalServerError, "failed to clear mcp_config: "+err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
if shouldClearThinkingLevel {
|
||||
updated, err = h.Queries.ClearAgentThinkingLevel(r.Context(), updated.ID)
|
||||
if err != nil {
|
||||
slog.Warn("clear agent thinking_level failed", append(logger.RequestAttrs(r), "error", err, "agent_id", id)...)
|
||||
writeError(w, http.StatusInternalServerError, "failed to clear thinking_level: "+err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
resp := agentToResponse(agent)
|
||||
slog.Info("agent updated", append(logger.RequestAttrs(r), "agent_id", id, "workspace_id", uuidToString(agent.WorkspaceID))...)
|
||||
resp := agentToResponse(updated)
|
||||
slog.Info("agent updated", append(logger.RequestAttrs(r), "agent_id", id, "workspace_id", uuidToString(updated.WorkspaceID))...)
|
||||
userID := requestUserID(r)
|
||||
actorType, actorID := h.resolveActor(r, userID, uuidToString(agent.WorkspaceID))
|
||||
h.publish(protocol.EventAgentStatus, uuidToString(agent.WorkspaceID), actorType, actorID, map[string]any{"agent": resp})
|
||||
actorType, actorID := h.resolveActor(r, userID, uuidToString(updated.WorkspaceID))
|
||||
h.publish(protocol.EventAgentStatus, uuidToString(updated.WorkspaceID), actorType, actorID, map[string]any{"agent": resp})
|
||||
writeJSON(w, http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// resolveAgentProvider returns the provider name for the runtime that
|
||||
// will own this agent after the in-flight update applies. Used by the
|
||||
// thinking_level validator so a runtime/model swap and a level swap
|
||||
// validated in the same request both consult the same provider.
|
||||
func (h *Handler) resolveAgentProvider(r *http.Request, workspaceID pgtype.UUID, runtimeID pgtype.UUID) (string, bool) {
|
||||
rt, err := h.Queries.GetAgentRuntimeForWorkspace(r.Context(), db.GetAgentRuntimeForWorkspaceParams{
|
||||
ID: runtimeID,
|
||||
WorkspaceID: workspaceID,
|
||||
})
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return rt.Provider, true
|
||||
}
|
||||
|
||||
func (h *Handler) ArchiveAgent(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
agent, ok := h.loadAgentForUser(w, r, id)
|
||||
|
||||
372
server/internal/handler/agent_thinking_test.go
Normal file
372
server/internal/handler/agent_thinking_test.go
Normal file
@@ -0,0 +1,372 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestCreateAgent_ThinkingLevel_ValidationConsistency exercises the
|
||||
// MUL-2339 invariant: when an HTTP caller sends a literal-invalid
|
||||
// thinking_level the API MUST return 400, regardless of which other
|
||||
// field combination the same request mutates. The constraint comes
|
||||
// from Trump's PR1 review: "invalid value 的 API 行为请保持一致,
|
||||
// 不要同一类变更有时 400、有时静默清空".
|
||||
func TestCreateAgent_ThinkingLevel_ValidationConsistency(t *testing.T) {
|
||||
if testHandler == nil {
|
||||
t.Skip("database not available")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
claudeRuntimeID := createClaudeProviderRuntime(t)
|
||||
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(ctx,
|
||||
`DELETE FROM agent WHERE workspace_id = $1 AND name LIKE 'thinking-test-%'`,
|
||||
testWorkspaceID,
|
||||
)
|
||||
})
|
||||
|
||||
t.Run("empty value succeeds", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"name": "thinking-test-empty",
|
||||
"runtime_id": claudeRuntimeID,
|
||||
"visibility": "private",
|
||||
"max_concurrent_tasks": 1,
|
||||
"thinking_level": "",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
testHandler.CreateAgent(w, newRequest(http.MethodPost, "/api/agents", body))
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("empty thinking_level: expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("known claude value succeeds", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"name": "thinking-test-known",
|
||||
"runtime_id": claudeRuntimeID,
|
||||
"visibility": "private",
|
||||
"max_concurrent_tasks": 1,
|
||||
"thinking_level": "high",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
testHandler.CreateAgent(w, newRequest(http.MethodPost, "/api/agents", body))
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("thinking_level=high: expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "high" {
|
||||
t.Errorf("expected thinking_level=high in response, got %v", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("codex-only token rejected for claude runtime", func(t *testing.T) {
|
||||
// `none` is a valid Codex token but NOT a Claude token. The
|
||||
// gate must always 400 regardless of which other fields the
|
||||
// request also tried to change.
|
||||
body := map[string]any{
|
||||
"name": "thinking-test-codex-only",
|
||||
"runtime_id": claudeRuntimeID,
|
||||
"visibility": "private",
|
||||
"max_concurrent_tasks": 1,
|
||||
"thinking_level": "none",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
testHandler.CreateAgent(w, newRequest(http.MethodPost, "/api/agents", body))
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("codex-only thinking_level on claude runtime: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("garbage value rejected", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"name": "thinking-test-garbage",
|
||||
"runtime_id": claudeRuntimeID,
|
||||
"visibility": "private",
|
||||
"max_concurrent_tasks": 1,
|
||||
"thinking_level": "supersonic",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
testHandler.CreateAgent(w, newRequest(http.MethodPost, "/api/agents", body))
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("garbage thinking_level: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestUpdateAgent_ThinkingLevel_TriState covers the three modes of
|
||||
// the field on PATCH:
|
||||
// - field omitted → leave the existing value alone (the silent-clear
|
||||
// anti-pattern flagged by Trump's review must NOT happen here)
|
||||
// - explicit "" → clear back to NULL
|
||||
// - non-empty → validate against the CURRENT runtime's provider enum
|
||||
//
|
||||
// All three branches share the same 400 / 200 outcome rule: validation
|
||||
// failures are always 400, never auto-clear.
|
||||
func TestUpdateAgent_ThinkingLevel_TriState(t *testing.T) {
|
||||
if testHandler == nil {
|
||||
t.Skip("database not available")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
claudeRuntimeID := createClaudeProviderRuntime(t)
|
||||
agentID := createAgentOnRuntime(t, "thinking-update-test", claudeRuntimeID, "high")
|
||||
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(ctx, `DELETE FROM agent WHERE id = $1`, agentID)
|
||||
})
|
||||
|
||||
// 1. Omitted field — name-only update must NOT touch thinking_level.
|
||||
t.Run("omitted field leaves value alone", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"name": "thinking-update-test-renamed",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("name-only update: expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "high" {
|
||||
t.Errorf("name-only update silently changed thinking_level: got %v, want high", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
|
||||
// 2. Explicit "" — must clear.
|
||||
t.Run("empty string clears", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"thinking_level": "",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("clear update: expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "" {
|
||||
t.Errorf("empty thinking_level should clear: got %v", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
|
||||
// 3. Garbage value — always 400, never silently clear.
|
||||
t.Run("garbage value is always 400", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"thinking_level": "warp-speed",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("garbage thinking_level: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
// 4. Codex-only token while bound to a Claude runtime → 400. This
|
||||
// is the "consistency" case from Trump's review: the API does
|
||||
// NOT auto-clear or coerce; the same token that's valid for a
|
||||
// Codex runtime is rejected here.
|
||||
t.Run("codex token on claude runtime is 400, not silent clear", func(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"thinking_level": "minimal",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("codex token on claude runtime: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestUpdateAgent_RuntimeSwitch_PreservesValidValueRejectsInvalid covers
|
||||
// the gap Elon flagged in PR1 review: a PATCH that switches `runtime_id`
|
||||
// without explicitly touching `thinking_level` used to silently keep
|
||||
// the existing value, so a Claude agent storing `max` could land on a
|
||||
// Codex runtime where `max` is not a recognised token at all, and the
|
||||
// daemon would receive a literal-invalid level.
|
||||
//
|
||||
// The contract the test pins, matching the existing "always 400 on
|
||||
// literal-invalid" rule:
|
||||
//
|
||||
// - existing value still valid for the new runtime → 200, value kept
|
||||
// - existing value invalid for the new runtime → 400, never silent
|
||||
// clear or coerce
|
||||
// - caller can recover by re-sending with `thinking_level: ""` to clear
|
||||
// in the same PATCH
|
||||
func TestUpdateAgent_RuntimeSwitch_PreservesValidValueRejectsInvalid(t *testing.T) {
|
||||
if testHandler == nil {
|
||||
t.Skip("database not available")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
claudeRuntimeID := createClaudeProviderRuntime(t)
|
||||
codexRuntimeID := createCodexProviderRuntime(t)
|
||||
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(ctx, `DELETE FROM agent WHERE workspace_id = $1 AND name LIKE 'runtime-switch-%'`, testWorkspaceID)
|
||||
})
|
||||
|
||||
t.Run("existing value still valid for new runtime is kept", func(t *testing.T) {
|
||||
// `high` is valid for both Claude and Codex enums — switching
|
||||
// runtime without touching thinking_level should keep it.
|
||||
agentID := createAgentOnRuntime(t, "runtime-switch-keep", claudeRuntimeID, "high")
|
||||
body := map[string]any{
|
||||
"runtime_id": codexRuntimeID,
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 when existing value is still valid, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "high" {
|
||||
t.Errorf("expected thinking_level=high preserved across runtime switch, got %v", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("existing value invalid for new runtime is 400, not silent", func(t *testing.T) {
|
||||
// `max` is Claude-only; switching to Codex must NOT silently
|
||||
// keep it. Behaviour stays consistent with the explicit-set
|
||||
// path: always 400 on literal-invalid.
|
||||
agentID := createAgentOnRuntime(t, "runtime-switch-reject", claudeRuntimeID, "max")
|
||||
body := map[string]any{
|
||||
"runtime_id": codexRuntimeID,
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400 when existing value is invalid for new runtime, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("simultaneous explicit clear lets the switch through", func(t *testing.T) {
|
||||
// The 400 above is recoverable: pass `thinking_level: ""` in
|
||||
// the same PATCH and the switch goes through with a cleared
|
||||
// value. This is the documented escape hatch in the error
|
||||
// message; the test pins it so the contract holds.
|
||||
agentID := createAgentOnRuntime(t, "runtime-switch-clear", claudeRuntimeID, "max")
|
||||
body := map[string]any{
|
||||
"runtime_id": codexRuntimeID,
|
||||
"thinking_level": "",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 with simultaneous clear, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "" {
|
||||
t.Errorf("expected thinking_level cleared, got %v", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("simultaneous explicit set to valid value lets the switch through", func(t *testing.T) {
|
||||
// The other recovery: caller picks a value valid for the new
|
||||
// runtime. Same PATCH, no need for a separate roundtrip.
|
||||
agentID := createAgentOnRuntime(t, "runtime-switch-replace", claudeRuntimeID, "max")
|
||||
body := map[string]any{
|
||||
"runtime_id": codexRuntimeID,
|
||||
"thinking_level": "minimal",
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
req := withURLParam(newRequest(http.MethodPatch, "/api/agents/"+agentID, body), "id", agentID)
|
||||
testHandler.UpdateAgent(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 with simultaneous set, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]any
|
||||
_ = json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["thinking_level"] != "minimal" {
|
||||
t.Errorf("expected thinking_level=minimal, got %v", resp["thinking_level"])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// createCodexProviderRuntime mirrors createClaudeProviderRuntime but for
|
||||
// the codex provider, so runtime-switch tests can exercise a real
|
||||
// cross-provider transition.
|
||||
func createCodexProviderRuntime(t *testing.T) string {
|
||||
t.Helper()
|
||||
var runtimeID string
|
||||
err := testPool.QueryRow(context.Background(), `
|
||||
INSERT INTO agent_runtime (
|
||||
workspace_id, daemon_id, name, runtime_mode, provider, status,
|
||||
device_info, metadata, last_seen_at, owner_id
|
||||
)
|
||||
VALUES ($1, NULL, $2, 'cloud', 'codex', 'online', $3, '{}'::jsonb, now(), $4)
|
||||
RETURNING id
|
||||
`, testWorkspaceID, "Codex Thinking Runtime", "Codex thinking-level test runtime", testUserID).Scan(&runtimeID)
|
||||
if err != nil {
|
||||
t.Fatalf("create codex runtime: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(context.Background(), `DELETE FROM agent_runtime WHERE id = $1`, runtimeID)
|
||||
})
|
||||
return runtimeID
|
||||
}
|
||||
|
||||
// createClaudeProviderRuntime stands up a runtime row with provider
|
||||
// "claude" so the thinking_level gate runs against the real Claude
|
||||
// enum (the default test runtime uses a fake provider). The runtime
|
||||
// is workspace-private but visible to the test owner.
|
||||
func createClaudeProviderRuntime(t *testing.T) string {
|
||||
t.Helper()
|
||||
var runtimeID string
|
||||
err := testPool.QueryRow(context.Background(), `
|
||||
INSERT INTO agent_runtime (
|
||||
workspace_id, daemon_id, name, runtime_mode, provider, status,
|
||||
device_info, metadata, last_seen_at, owner_id
|
||||
)
|
||||
VALUES ($1, NULL, $2, 'cloud', 'claude', 'online', $3, '{}'::jsonb, now(), $4)
|
||||
RETURNING id
|
||||
`, testWorkspaceID, "Claude Thinking Runtime", "Claude thinking-level test runtime", testUserID).Scan(&runtimeID)
|
||||
if err != nil {
|
||||
t.Fatalf("create claude runtime: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(context.Background(), `DELETE FROM agent_runtime WHERE id = $1`, runtimeID)
|
||||
})
|
||||
return runtimeID
|
||||
}
|
||||
|
||||
// createAgentOnRuntime seeds an agent row bound to the given runtime
|
||||
// with the given initial thinking_level (empty for NULL).
|
||||
func createAgentOnRuntime(t *testing.T, name, runtimeID, level string) string {
|
||||
t.Helper()
|
||||
var agentID string
|
||||
var levelArg any
|
||||
if level == "" {
|
||||
levelArg = nil
|
||||
} else {
|
||||
levelArg = level
|
||||
}
|
||||
err := testPool.QueryRow(context.Background(), `
|
||||
INSERT INTO agent (
|
||||
workspace_id, name, description, runtime_mode, runtime_config,
|
||||
runtime_id, visibility, max_concurrent_tasks, owner_id,
|
||||
instructions, custom_env, custom_args, thinking_level
|
||||
)
|
||||
VALUES ($1, $2, '', 'cloud', '{}'::jsonb, $3, 'private', 1, $4, '', '{}'::jsonb, '[]'::jsonb, $5)
|
||||
RETURNING id
|
||||
`, testWorkspaceID, name, runtimeID, testUserID, levelArg).Scan(&agentID)
|
||||
if err != nil {
|
||||
t.Fatalf("create agent on runtime %s: %v", runtimeID, err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
testPool.Exec(context.Background(), `DELETE FROM agent WHERE id = $1`, agentID)
|
||||
})
|
||||
return agentID
|
||||
}
|
||||
@@ -1145,14 +1145,15 @@ func (h *Handler) ClaimTaskByRuntime(w http.ResponseWriter, r *http.Request) {
|
||||
mcpConfig = json.RawMessage(agent.McpConfig)
|
||||
}
|
||||
resp.Agent = &TaskAgentData{
|
||||
ID: uuidToString(agent.ID),
|
||||
Name: agent.Name,
|
||||
Instructions: agent.Instructions,
|
||||
Skills: skills,
|
||||
CustomEnv: customEnv,
|
||||
CustomArgs: customArgs,
|
||||
McpConfig: mcpConfig,
|
||||
Model: agent.Model.String,
|
||||
ID: uuidToString(agent.ID),
|
||||
Name: agent.Name,
|
||||
Instructions: agent.Instructions,
|
||||
Skills: skills,
|
||||
CustomEnv: customEnv,
|
||||
CustomArgs: customArgs,
|
||||
McpConfig: mcpConfig,
|
||||
Model: agent.Model.String,
|
||||
ThinkingLevel: agent.ThinkingLevel.String,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -66,11 +66,38 @@ type ModelListRequest struct {
|
||||
// model the runtime advertises as its preferred pick (e.g. Claude
|
||||
// Code's shipped default, or hermes' currentModelId) so the UI can
|
||||
// badge it — don't drop it when marshalling.
|
||||
//
|
||||
// `Thinking` carries the per-model reasoning-effort catalog discovered
|
||||
// by the daemon for runtimes that support it (claude, codex — see
|
||||
// MUL-2339). nil means "no picker for this model"; the UI hides the
|
||||
// thinking_level selector. Older daemons (pre-2026-05) won't send this
|
||||
// field, which is fine: the UI hides the selector and the agent runs
|
||||
// with the runtime default.
|
||||
type ModelEntry struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
Default bool `json:"default,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
Default bool `json:"default,omitempty"`
|
||||
Thinking *ModelThinking `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
// ModelThinking is the wire shape for the per-model thinking catalog.
|
||||
// Mirrors agent.ModelThinking so the daemon's report passes through
|
||||
// without remapping.
|
||||
type ModelThinking struct {
|
||||
SupportedLevels []ThinkingLevel `json:"supported_levels"`
|
||||
DefaultLevel string `json:"default_level,omitempty"`
|
||||
}
|
||||
|
||||
// ThinkingLevel is the wire shape for a single entry in a model's
|
||||
// reasoning-effort catalog. `Value` is the literal token the daemon
|
||||
// passes to the CLI; `Label` is the human-readable display string;
|
||||
// `Description` is optional helper copy (Codex's debug-models output
|
||||
// includes one per level).
|
||||
type ThinkingLevel struct {
|
||||
Value string `json:"value"`
|
||||
Label string `json:"label"`
|
||||
Description string `json:"description,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
|
||||
1
server/migrations/095_agent_thinking_level.down.sql
Normal file
1
server/migrations/095_agent_thinking_level.down.sql
Normal file
@@ -0,0 +1 @@
|
||||
ALTER TABLE agent DROP COLUMN IF EXISTS thinking_level;
|
||||
8
server/migrations/095_agent_thinking_level.up.sql
Normal file
8
server/migrations/095_agent_thinking_level.up.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
-- Per-agent thinking / reasoning effort setting. Stored as the
|
||||
-- runtime-native string (e.g. Claude's "low|medium|high|xhigh|max",
|
||||
-- Codex's "none|minimal|low|medium|high|xhigh") rather than a
|
||||
-- cross-runtime abstraction, so the user-visible value matches what
|
||||
-- each CLI's own UI advertises (see MUL-2339). NULL means "use the
|
||||
-- runtime/model default" — every backend treats this as "do not
|
||||
-- inject --effort / reasoning_effort" and lets the CLI pick.
|
||||
ALTER TABLE agent ADD COLUMN thinking_level TEXT;
|
||||
@@ -35,6 +35,15 @@ type ExecOptions struct {
|
||||
ExtraArgs []string // daemon-wide default CLI arguments appended before CustomArgs; currently read by claude and codex backends only
|
||||
CustomArgs []string // per-agent CLI arguments appended after ExtraArgs
|
||||
McpConfig json.RawMessage // if non-nil, MCP server config to pass via --mcp-config
|
||||
// ThinkingLevel is the runtime-native reasoning/effort value (e.g.
|
||||
// Claude's "low|medium|high|xhigh|max", Codex's "none|minimal|low|
|
||||
// medium|high|xhigh"). Empty means "use the runtime/model default" —
|
||||
// every backend that consumes this skips its --effort / reasoning_effort
|
||||
// injection so the upstream CLI's own default applies. Currently honoured
|
||||
// by the claude and codex backends only; other backends ignore the
|
||||
// field rather than fail (so MUL-2339 can grow runtime support
|
||||
// incrementally without breaking unrelated agents).
|
||||
ThinkingLevel string
|
||||
}
|
||||
|
||||
// Session represents a running agent execution.
|
||||
|
||||
@@ -411,6 +411,13 @@ var claudeBlockedArgs = map[string]blockedArgMode{
|
||||
"--input-format": blockedWithValue, // stream-json protocol
|
||||
"--permission-mode": blockedWithValue, // bypassPermissions for autonomous operation
|
||||
"--mcp-config": blockedWithValue, // set by daemon from agent.mcp_config
|
||||
// `--effort` is owned by the per-agent thinking_level picker so a
|
||||
// user-supplied custom_arg cannot silently outvote it. The daemon
|
||||
// injects --effort only when opts.ThinkingLevel is set; if a user
|
||||
// nevertheless writes it in custom_args we drop the duplicate and
|
||||
// log a warning rather than letting the CLI receive two conflicting
|
||||
// --effort values.
|
||||
"--effort": blockedWithValue,
|
||||
}
|
||||
|
||||
func buildClaudeArgs(opts ExecOptions, logger *slog.Logger) []string {
|
||||
@@ -432,6 +439,13 @@ func buildClaudeArgs(opts ExecOptions, logger *slog.Logger) []string {
|
||||
if opts.Model != "" {
|
||||
args = append(args, "--model", opts.Model)
|
||||
}
|
||||
if opts.ThinkingLevel != "" {
|
||||
// Slotted right after --model so the per-session effort runs
|
||||
// against the same model selection the args advertise; the CLI
|
||||
// itself accepts the flag in any order but this ordering makes
|
||||
// the launch line readable in `agent command` logs.
|
||||
args = append(args, "--effort", opts.ThinkingLevel)
|
||||
}
|
||||
if opts.MaxTurns > 0 {
|
||||
args = append(args, "--max-turns", fmt.Sprintf("%d", opts.MaxTurns))
|
||||
}
|
||||
|
||||
@@ -214,12 +214,19 @@ func (b *codexBackend) Execute(ctx context.Context, prompt string, opts ExecOpti
|
||||
}
|
||||
|
||||
// 3. Send turn and wait for completion
|
||||
_, err = c.request(runCtx, "turn/start", map[string]any{
|
||||
turnParams := map[string]any{
|
||||
"threadId": threadID,
|
||||
"input": []map[string]any{
|
||||
{"type": "text", "text": prompt},
|
||||
},
|
||||
})
|
||||
}
|
||||
// Per-turn reasoning override. Mirrors the per-thread injection in
|
||||
// startOrResumeThread; keeping both in sync is enforced by the
|
||||
// shared `codexReasoningInjection` fixture in codex_test.go (see
|
||||
// MUL-2339 — Trump's constraint that the three injection points
|
||||
// must not drift independently).
|
||||
applyCodexReasoningEffort(turnParams, opts.ThinkingLevel)
|
||||
_, err = c.request(runCtx, "turn/start", turnParams)
|
||||
if err != nil {
|
||||
drainAndWait() // flush os/exec stderr goroutine before sampling Tail
|
||||
finalStatus = "failed"
|
||||
@@ -342,12 +349,19 @@ func (c *codexClient) startOrResumeThread(ctx context.Context, opts ExecOptions,
|
||||
if priorThreadID := opts.ResumeSessionID; priorThreadID != "" {
|
||||
// thread/resume reuses the thread's persisted model and reasoning
|
||||
// effort; only override fields the daemon actually cares about.
|
||||
resumeResult, err := c.request(ctx, "thread/resume", map[string]any{
|
||||
resumeParams := map[string]any{
|
||||
"threadId": priorThreadID,
|
||||
"cwd": opts.Cwd,
|
||||
"model": nilIfEmpty(opts.Model),
|
||||
"developerInstructions": nilIfEmpty(opts.SystemPrompt),
|
||||
})
|
||||
}
|
||||
// Explicit override of the persisted reasoning effort: without
|
||||
// this, a Codex resume silently reuses whatever level the prior
|
||||
// session was created with, even when the user has flipped the
|
||||
// agent's thinking_level since. See MUL-2339 — Elon flagged that
|
||||
// resume must honour the live config, not the stored one.
|
||||
applyCodexReasoningEffort(resumeParams, opts.ThinkingLevel)
|
||||
resumeResult, err := c.request(ctx, "thread/resume", resumeParams)
|
||||
if err == nil {
|
||||
if threadID := extractThreadID(resumeResult); threadID != "" {
|
||||
return threadID, true, nil
|
||||
@@ -358,7 +372,7 @@ func (c *codexClient) startOrResumeThread(ctx context.Context, opts ExecOptions,
|
||||
}
|
||||
}
|
||||
|
||||
startResult, err := c.request(ctx, "thread/start", map[string]any{
|
||||
startParams := map[string]any{
|
||||
"model": nilIfEmpty(opts.Model),
|
||||
"modelProvider": nil,
|
||||
"profile": nil,
|
||||
@@ -372,7 +386,9 @@ func (c *codexClient) startOrResumeThread(ctx context.Context, opts ExecOptions,
|
||||
"includeApplyPatchTool": nil,
|
||||
"experimentalRawEvents": false,
|
||||
"persistExtendedHistory": true,
|
||||
})
|
||||
}
|
||||
applyCodexReasoningEffort(startParams, opts.ThinkingLevel)
|
||||
startResult, err := c.request(ctx, "thread/start", startParams)
|
||||
if err != nil {
|
||||
return "", false, fmt.Errorf("codex thread/start failed: %w", err)
|
||||
}
|
||||
@@ -383,6 +399,39 @@ func (c *codexClient) startOrResumeThread(ctx context.Context, opts ExecOptions,
|
||||
return threadID, false, nil
|
||||
}
|
||||
|
||||
// applyCodexReasoningEffort writes the per-agent thinking_level into a
|
||||
// Codex app-server request. The three points — thread/start.config,
|
||||
// thread/resume.config, turn/start.effort — all flow through this helper
|
||||
// so any future protocol/key change touches one site rather than three
|
||||
// (per Trump's MUL-2339 review constraint).
|
||||
//
|
||||
// The shape is detected from the params keys:
|
||||
// - turn/start always carries `input`, and the schema exposes the
|
||||
// reasoning override as the top-level `effort` field.
|
||||
// - thread/start and thread/resume nest it under
|
||||
// `config.model_reasoning_effort`.
|
||||
//
|
||||
// Empty `level` is a no-op: we deliberately do NOT emit a key when the
|
||||
// caller didn't request an override, so the upstream defaults (config
|
||||
// file, account-scoped model preference) stay in charge. This also
|
||||
// guarantees `effort: ""` never reaches the CLI — Codex rejects empty
|
||||
// strings on this field.
|
||||
func applyCodexReasoningEffort(params map[string]any, level string) {
|
||||
if params == nil || level == "" {
|
||||
return
|
||||
}
|
||||
if _, isTurnStart := params["input"]; isTurnStart {
|
||||
params["effort"] = level
|
||||
return
|
||||
}
|
||||
cfg, _ := params["config"].(map[string]any)
|
||||
if cfg == nil {
|
||||
cfg = map[string]any{}
|
||||
}
|
||||
cfg["model_reasoning_effort"] = level
|
||||
params["config"] = cfg
|
||||
}
|
||||
|
||||
func resetTimer(timer *time.Timer, d time.Duration) {
|
||||
if !timer.Stop() {
|
||||
select {
|
||||
|
||||
@@ -29,6 +29,37 @@ type Model struct {
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider,omitempty"`
|
||||
Default bool `json:"default,omitempty"`
|
||||
// Thinking advertises the runtime's reasoning/effort catalog for this
|
||||
// model. nil means the runtime/model has no thinking-level control
|
||||
// (or the daemon couldn't discover one); the UI hides its picker. The
|
||||
// catalog is per-model because Codex's `codex debug models` is itself
|
||||
// per-model and Claude's `--effort` superset has known per-model gaps
|
||||
// (`xhigh` is Opus-only, `max` is session-only). See MUL-2339.
|
||||
Thinking *ModelThinking `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
// ModelThinking carries the per-model reasoning/effort catalog
|
||||
// surfaced by an agent runtime. Values are runtime-native — Codex
|
||||
// emits "none|minimal|low|medium|high|xhigh"; Claude emits
|
||||
// "low|medium|high|xhigh|max". The frontend renders SupportedLevels
|
||||
// as-is so what users see matches each CLI's own UI.
|
||||
type ModelThinking struct {
|
||||
SupportedLevels []ThinkingLevel `json:"supported_levels"`
|
||||
// DefaultLevel is the value the runtime picks when no override is
|
||||
// provided. Empty means "the runtime picks, we don't know" — the
|
||||
// UI shows "Default" as a generic option.
|
||||
DefaultLevel string `json:"default_level,omitempty"`
|
||||
}
|
||||
|
||||
// ThinkingLevel is one entry in a ModelThinking.SupportedLevels list.
|
||||
// Value is the literal token passed to the CLI (Claude `--effort <value>`
|
||||
// or Codex `model_reasoning_effort=<value>`); Label is a display string;
|
||||
// Description is optional helper copy lifted from the upstream catalog
|
||||
// when available (Codex's `description` field).
|
||||
type ThinkingLevel struct {
|
||||
Value string `json:"value"`
|
||||
Label string `json:"label"`
|
||||
Description string `json:"description,omitempty"`
|
||||
}
|
||||
|
||||
// modelCache memoizes dynamic discovery calls so repeated UI loads
|
||||
@@ -51,14 +82,24 @@ const modelCacheTTL = 60 * time.Second
|
||||
// openclaw) it shells out with caching and falls back to the static
|
||||
// list on failure.
|
||||
//
|
||||
// For claude and codex, the static catalog is augmented with per-model
|
||||
// thinking-level options discovered from the local CLI (see
|
||||
// discoverClaudeThinking / discoverCodexThinking). Discovery failures
|
||||
// silently leave Thinking == nil on each entry, which the UI treats
|
||||
// as "no picker for this model" rather than blocking model selection.
|
||||
//
|
||||
// executablePath lets the caller point at a non-default binary; pass
|
||||
// "" to use the provider's default name on PATH.
|
||||
func ListModels(ctx context.Context, providerType, executablePath string) ([]Model, error) {
|
||||
switch providerType {
|
||||
case "claude":
|
||||
return claudeStaticModels(), nil
|
||||
models := claudeStaticModels()
|
||||
annotateClaudeThinking(ctx, models, executablePath)
|
||||
return models, nil
|
||||
case "codex":
|
||||
return codexStaticModels(), nil
|
||||
models := codexStaticModels()
|
||||
annotateCodexThinking(ctx, models, executablePath)
|
||||
return models, nil
|
||||
case "gemini":
|
||||
return geminiStaticModels(), nil
|
||||
case "cursor":
|
||||
|
||||
467
server/pkg/agent/thinking.go
Normal file
467
server/pkg/agent/thinking.go
Normal file
@@ -0,0 +1,467 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// thinking.go discovers per-model reasoning/effort catalogs for the
|
||||
// claude and codex backends so the daemon can advertise them to the
|
||||
// UI without hard-coding (and getting wrong) what's installed locally.
|
||||
//
|
||||
// MUL-2339: we deliberately do not flatten Claude's `low|medium|high|
|
||||
// xhigh|max` and Codex's `none|minimal|low|medium|high|xhigh` onto a
|
||||
// shared enum — what users pick must round-trip exactly through each
|
||||
// CLI's own value vocabulary.
|
||||
|
||||
// ── Cache ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Discovery is keyed on (provider, executablePath, cliVersion). Bumping
|
||||
// the local CLI invalidates entries that referenced the older version's
|
||||
// help/`debug models` output, which is exactly the failure mode we hit
|
||||
// when Anthropic / OpenAI add or remove a level (Elon's review note).
|
||||
|
||||
type thinkingCacheKey struct {
|
||||
provider string
|
||||
executablePath string
|
||||
cliVersion string
|
||||
}
|
||||
|
||||
type thinkingCacheEntry struct {
|
||||
value map[string]*ModelThinking // keyed by model ID
|
||||
expiresAt time.Time
|
||||
}
|
||||
|
||||
const thinkingDiscoveryTTL = 10 * time.Minute
|
||||
|
||||
var (
|
||||
thinkingCacheMu sync.Mutex
|
||||
thinkingCache = map[thinkingCacheKey]thinkingCacheEntry{}
|
||||
)
|
||||
|
||||
func thinkingCacheGet(key thinkingCacheKey) (map[string]*ModelThinking, bool) {
|
||||
thinkingCacheMu.Lock()
|
||||
defer thinkingCacheMu.Unlock()
|
||||
entry, ok := thinkingCache[key]
|
||||
if !ok || time.Now().After(entry.expiresAt) {
|
||||
return nil, false
|
||||
}
|
||||
return entry.value, true
|
||||
}
|
||||
|
||||
func thinkingCachePut(key thinkingCacheKey, value map[string]*ModelThinking) {
|
||||
thinkingCacheMu.Lock()
|
||||
defer thinkingCacheMu.Unlock()
|
||||
thinkingCache[key] = thinkingCacheEntry{value: value, expiresAt: time.Now().Add(thinkingDiscoveryTTL)}
|
||||
}
|
||||
|
||||
// resetThinkingCacheForTests is exposed for tests only; production code
|
||||
// must rely on the TTL or process restart for invalidation.
|
||||
func resetThinkingCacheForTests() {
|
||||
thinkingCacheMu.Lock()
|
||||
thinkingCache = map[thinkingCacheKey]thinkingCacheEntry{}
|
||||
thinkingCacheMu.Unlock()
|
||||
}
|
||||
|
||||
// ── Claude ───────────────────────────────────────────────────────────
|
||||
//
|
||||
// `claude --help` advertises `--effort <level>` with the full superset
|
||||
// in parentheses; we parse that line to learn which levels the CLI
|
||||
// version on this host accepts. Per-model gaps (Opus-only `xhigh`,
|
||||
// session-only `max`) come from a hand-maintained table because the
|
||||
// CLI does not expose model→effort mappings programmatically.
|
||||
|
||||
// claudeEffortRe matches the help line emitted by `claude --help`:
|
||||
//
|
||||
// --effort <level> Effort level for the current session (low, medium, high, xhigh, max)
|
||||
//
|
||||
// Anchored on `--effort` and lenient about whitespace so flag-name
|
||||
// reformats (`--effort=…`, indented help blocks) do not break parsing.
|
||||
var claudeEffortRe = regexp.MustCompile(`--effort\s*(?:<[^>]+>)?\s*(?:Effort level[^(]*)?\(([^)]+)\)`)
|
||||
|
||||
// claudeEffortLabel maps Claude's raw level token to the display label
|
||||
// the UI should render. Title-case matches Anthropic's own slash UI.
|
||||
var claudeEffortLabel = map[string]string{
|
||||
"low": "Low",
|
||||
"medium": "Medium",
|
||||
"high": "High",
|
||||
"xhigh": "Extra high",
|
||||
"max": "Max",
|
||||
}
|
||||
|
||||
// claudeModelEffortAllow restricts the level set per model where the
|
||||
// upstream documentation says only some are valid. Empty / missing
|
||||
// model → use the parsed superset as-is (current Claude Code default).
|
||||
// Update this map when Anthropic publishes a new model that does not
|
||||
// support `xhigh` / `max`.
|
||||
var claudeModelEffortAllow = map[string]map[string]bool{
|
||||
// Opus is the only model that publicly supports xhigh; the help
|
||||
// list still includes it for Sonnet / Haiku so we filter here.
|
||||
"claude-opus-4-7": {"low": true, "medium": true, "high": true, "xhigh": true, "max": true},
|
||||
"claude-opus-4-6": {"low": true, "medium": true, "high": true, "xhigh": true, "max": true},
|
||||
"claude-sonnet-4-6": {"low": true, "medium": true, "high": true, "max": true},
|
||||
"claude-sonnet-4-5": {"low": true, "medium": true, "high": true, "max": true},
|
||||
"claude-haiku-4-5-20251001": {"low": true, "medium": true, "high": true},
|
||||
}
|
||||
|
||||
// claudeStaticEffortFallback is the conservative subset used when
|
||||
// parsing the `--effort` help line fails (binary missing, output drift,
|
||||
// etc.). Picked from the lowest-common-denominator across recent
|
||||
// Claude Code releases.
|
||||
var claudeStaticEffortFallback = []string{"low", "medium", "high"}
|
||||
|
||||
// claudeStaticEffortFullSuperset is what `claude --help` listed on
|
||||
// 2.1.121. Used as the catalog superset when a model isn't in the
|
||||
// per-model allow-list — we'd rather over-offer and let the CLI
|
||||
// reject than artificially block valid combinations.
|
||||
var claudeStaticEffortFullSuperset = []string{"low", "medium", "high", "xhigh", "max"}
|
||||
|
||||
// annotateClaudeThinking populates each entry's Thinking field by
|
||||
// running `claude --help` once and projecting the parsed superset
|
||||
// through claudeModelEffortAllow. Errors are silently absorbed so a
|
||||
// missing CLI doesn't break model listing — the UI just hides the
|
||||
// picker for that model.
|
||||
func annotateClaudeThinking(ctx context.Context, models []Model, executablePath string) {
|
||||
mapping := loadClaudeThinkingByModel(ctx, executablePath)
|
||||
for i := range models {
|
||||
if t, ok := mapping[models[i].ID]; ok && t != nil {
|
||||
models[i].Thinking = t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func loadClaudeThinkingByModel(ctx context.Context, executablePath string) map[string]*ModelThinking {
|
||||
if executablePath == "" {
|
||||
executablePath = "claude"
|
||||
}
|
||||
version, _ := DetectVersion(ctx, executablePath)
|
||||
key := thinkingCacheKey{provider: "claude", executablePath: executablePath, cliVersion: version}
|
||||
if cached, ok := thinkingCacheGet(key); ok {
|
||||
return cached
|
||||
}
|
||||
|
||||
superset := claudeEffortSuperset(ctx, executablePath)
|
||||
result := map[string]*ModelThinking{}
|
||||
for _, m := range claudeStaticModels() {
|
||||
allow := claudeModelEffortAllow[m.ID]
|
||||
levels := projectClaudeLevels(superset, allow)
|
||||
if len(levels) == 0 {
|
||||
continue
|
||||
}
|
||||
result[m.ID] = &ModelThinking{
|
||||
SupportedLevels: levels,
|
||||
DefaultLevel: "medium",
|
||||
}
|
||||
}
|
||||
thinkingCachePut(key, result)
|
||||
return result
|
||||
}
|
||||
|
||||
// claudeEffortSuperset returns the parsed `--effort` value list. When
|
||||
// parsing fails it returns the static fallback rather than nothing so
|
||||
// callers can still render a usable picker.
|
||||
func claudeEffortSuperset(ctx context.Context, executablePath string) []string {
|
||||
cmd := exec.CommandContext(ctx, executablePath, "--help")
|
||||
hideAgentWindow(cmd)
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return append([]string(nil), claudeStaticEffortFallback...)
|
||||
}
|
||||
parsed := parseClaudeEffortHelp(string(out))
|
||||
if len(parsed) == 0 {
|
||||
// Help format drifted — fall back to the last known good
|
||||
// superset rather than the conservative subset, so newer
|
||||
// levels are still offered until we hand-edit the fallback.
|
||||
return append([]string(nil), claudeStaticEffortFullSuperset...)
|
||||
}
|
||||
return parsed
|
||||
}
|
||||
|
||||
// parseClaudeEffortHelp extracts the comma-separated value list from a
|
||||
// `--effort` help line. Returns nil if the line is missing or the
|
||||
// captured group is empty so callers can pick a fallback path.
|
||||
func parseClaudeEffortHelp(helpText string) []string {
|
||||
match := claudeEffortRe.FindStringSubmatch(helpText)
|
||||
if len(match) < 2 {
|
||||
return nil
|
||||
}
|
||||
var out []string
|
||||
for _, raw := range strings.Split(match[1], ",") {
|
||||
token := strings.TrimSpace(raw)
|
||||
if token == "" {
|
||||
continue
|
||||
}
|
||||
out = append(out, token)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func projectClaudeLevels(superset []string, allow map[string]bool) []ThinkingLevel {
|
||||
out := make([]ThinkingLevel, 0, len(superset))
|
||||
for _, value := range superset {
|
||||
if allow != nil && !allow[value] {
|
||||
continue
|
||||
}
|
||||
label, ok := claudeEffortLabel[value]
|
||||
if !ok {
|
||||
// New value the daemon hasn't been taught yet — surface
|
||||
// it raw so power users can still pick it.
|
||||
label = strings.Title(value) //nolint:staticcheck
|
||||
}
|
||||
out = append(out, ThinkingLevel{Value: value, Label: label})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ── Codex ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// `codex debug models` is the structured discovery hook Elon's review
|
||||
// flagged. It returns the per-model reasoning catalog directly,
|
||||
// including the model's documented default. We prefer this over the
|
||||
// older config-error probe trick because:
|
||||
// 1. It gives us per-model subsets without hand-maintained tables.
|
||||
// 2. The schema is stable across CLI versions (Codex 0.131.0+).
|
||||
// 3. It doesn't pollute stderr with an intentional misconfiguration.
|
||||
//
|
||||
// The subcommand emits JSON on stdout by default — there is no
|
||||
// `--output json` flag (a prior version of this code passed one and
|
||||
// silently failed on 0.131.0). We add `--bundled` to skip the network
|
||||
// refresh: discovery runs on every daemon poll and a network hop here
|
||||
// would block the picker behind whatever the user's connection allows.
|
||||
// The bundled catalog is what determines which `model_reasoning_effort`
|
||||
// tokens the local binary actually accepts, which is the only thing we
|
||||
// need for validation.
|
||||
//
|
||||
// On older Codex versions / failures, the picker just disappears for
|
||||
// that model rather than offering a wrong list.
|
||||
|
||||
// codexEffortLabel is the human display string for each Codex effort
|
||||
// value, matching Codex's own TUI (`Extra high`, `Minimal`, …) so
|
||||
// users see the same labels across our picker and `codex /model`.
|
||||
var codexEffortLabel = map[string]string{
|
||||
"none": "None",
|
||||
"minimal": "Minimal",
|
||||
"low": "Low",
|
||||
"medium": "Medium",
|
||||
"high": "High",
|
||||
"xhigh": "Extra high",
|
||||
}
|
||||
|
||||
// codexDebugModelsResponse mirrors the JSON shape emitted by
|
||||
// `codex debug models` (Codex 0.131.0+). Only the fields we
|
||||
// consume are typed; unknown keys are ignored.
|
||||
type codexDebugModelsResponse struct {
|
||||
Models []struct {
|
||||
Slug string `json:"slug"`
|
||||
DefaultReasoningLevel string `json:"default_reasoning_level"`
|
||||
SupportedReasoningLevel []struct {
|
||||
Effort string `json:"effort"`
|
||||
Description string `json:"description"`
|
||||
} `json:"supported_reasoning_levels"`
|
||||
} `json:"models"`
|
||||
}
|
||||
|
||||
// annotateCodexThinking decorates each model entry with its reasoning
|
||||
// catalog. Models the CLI doesn't know about (older codex install,
|
||||
// brand-new ID we haven't shipped) get Thinking=nil — the UI hides
|
||||
// the picker for those rows rather than guessing.
|
||||
func annotateCodexThinking(ctx context.Context, models []Model, executablePath string) {
|
||||
mapping := loadCodexThinkingByModel(ctx, executablePath)
|
||||
for i := range models {
|
||||
if t, ok := mapping[models[i].ID]; ok && t != nil {
|
||||
models[i].Thinking = t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func loadCodexThinkingByModel(ctx context.Context, executablePath string) map[string]*ModelThinking {
|
||||
if executablePath == "" {
|
||||
executablePath = "codex"
|
||||
}
|
||||
version, _ := DetectVersion(ctx, executablePath)
|
||||
key := thinkingCacheKey{provider: "codex", executablePath: executablePath, cliVersion: version}
|
||||
if cached, ok := thinkingCacheGet(key); ok {
|
||||
return cached
|
||||
}
|
||||
|
||||
raw, err := runCodexDebugModels(ctx, executablePath)
|
||||
if err != nil {
|
||||
// Cache the empty result so repeated UI polls don't re-shell
|
||||
// the missing binary; TTL eventually retries.
|
||||
thinkingCachePut(key, map[string]*ModelThinking{})
|
||||
return map[string]*ModelThinking{}
|
||||
}
|
||||
parsed := parseCodexDebugModels(raw)
|
||||
thinkingCachePut(key, parsed)
|
||||
return parsed
|
||||
}
|
||||
|
||||
// codexDebugModelsArgs is the argv we pass to discover the local Codex
|
||||
// catalog. Kept as a package-level var (not a literal at the call site)
|
||||
// so tests can assert the exact form a real `codex` invocation receives,
|
||||
// not just the parser behavior on a fixture string. The argv shape is
|
||||
// the contract that broke under PR1 review; the test that pins it sits
|
||||
// in thinking_test.go.
|
||||
var codexDebugModelsArgs = []string{"debug", "models", "--bundled"}
|
||||
|
||||
func runCodexDebugModels(ctx context.Context, executablePath string) ([]byte, error) {
|
||||
cmd := exec.CommandContext(ctx, executablePath, codexDebugModelsArgs...)
|
||||
hideAgentWindow(cmd)
|
||||
return cmd.Output()
|
||||
}
|
||||
|
||||
// parseCodexDebugModels takes the JSON payload from `codex debug
|
||||
// models` and projects it into a per-model thinking catalog.
|
||||
// Returns an empty map (never nil) so callers can compose safely
|
||||
// without nil-checking the result.
|
||||
func parseCodexDebugModels(raw []byte) map[string]*ModelThinking {
|
||||
out := map[string]*ModelThinking{}
|
||||
var resp codexDebugModelsResponse
|
||||
if err := json.Unmarshal(raw, &resp); err != nil {
|
||||
return out
|
||||
}
|
||||
for _, m := range resp.Models {
|
||||
if m.Slug == "" || len(m.SupportedReasoningLevel) == 0 {
|
||||
continue
|
||||
}
|
||||
levels := make([]ThinkingLevel, 0, len(m.SupportedReasoningLevel))
|
||||
for _, lvl := range m.SupportedReasoningLevel {
|
||||
if lvl.Effort == "" {
|
||||
continue
|
||||
}
|
||||
label, ok := codexEffortLabel[lvl.Effort]
|
||||
if !ok {
|
||||
label = strings.Title(lvl.Effort) //nolint:staticcheck
|
||||
}
|
||||
levels = append(levels, ThinkingLevel{
|
||||
Value: lvl.Effort,
|
||||
Label: label,
|
||||
Description: lvl.Description,
|
||||
})
|
||||
}
|
||||
if len(levels) == 0 {
|
||||
continue
|
||||
}
|
||||
out[m.Slug] = &ModelThinking{
|
||||
SupportedLevels: levels,
|
||||
DefaultLevel: m.DefaultReasoningLevel,
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ── Shared validation ────────────────────────────────────────────────
|
||||
|
||||
// ValidateThinkingLevel reports whether `value` is in the supported
|
||||
// catalog for the given (provider, model) pair. Empty value is always
|
||||
// valid — it means "use the runtime default".
|
||||
//
|
||||
// Empty model is treated as "use the provider's default model"; we
|
||||
// resolve it through ListModels so the daemon's pre-execution guard
|
||||
// behaves the same whether the agent picked an explicit model or
|
||||
// inherited the runtime default. Without this, a default-model task
|
||||
// with a valid thinking_level would be rejected on the grounds that
|
||||
// the empty string is not in the catalog — exactly the misjudgement
|
||||
// Elon flagged in the PR1 review.
|
||||
//
|
||||
// The lookup goes through ListModels so it sees the *current* CLI
|
||||
// catalog (including dynamic discovery for codex), not just a static
|
||||
// map. The function is intentionally pure of HTTP concerns so the
|
||||
// daemon's pre-execution guard and the server's UpdateAgent gate can
|
||||
// share the same source of truth.
|
||||
func ValidateThinkingLevel(ctx context.Context, providerType, executablePath, model, value string) (bool, error) {
|
||||
if value == "" {
|
||||
return true, nil
|
||||
}
|
||||
models, err := ListModels(ctx, providerType, executablePath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
target := model
|
||||
if target == "" {
|
||||
// Default model = the entry the catalog marks as Default. If no
|
||||
// entry is flagged, fall through to the no-match return; that
|
||||
// matches the existing semantics where an unknown model fails
|
||||
// closed rather than guessing.
|
||||
for _, m := range models {
|
||||
if m.Default {
|
||||
target = m.ID
|
||||
break
|
||||
}
|
||||
}
|
||||
if target == "" {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
for _, m := range models {
|
||||
if m.ID != target {
|
||||
continue
|
||||
}
|
||||
if m.Thinking == nil {
|
||||
return false, nil
|
||||
}
|
||||
for _, lvl := range m.Thinking.SupportedLevels {
|
||||
if lvl.Value == value {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// providerThinkingEnums is the server-side accept-list for each runtime's
|
||||
// reasoning-effort vocabulary. The server doesn't have local CLI binaries,
|
||||
// so it cannot do per-model discovery the way the daemon can; what it CAN
|
||||
// do is reject values that are not in any version of the provider's enum
|
||||
// at all. Per-model gaps (e.g. user sets `xhigh` while the chosen model
|
||||
// only supports up to `high`) surface as a daemon-side task failure with
|
||||
// a clear error, not a server-side 400 — that split is intentional so the
|
||||
// API behaviour stays consistent (always-400 on literal-invalid, never
|
||||
// auto-clear on combination-invalid). See MUL-2339 review notes.
|
||||
//
|
||||
// Keep these lists permissive: they're a "is this a known token in this
|
||||
// runtime's universe" check, not an "is this the right level for this
|
||||
// model" check. Adding a new level upstream means adding it here too so
|
||||
// users can persist it before the next discovery refresh.
|
||||
var providerThinkingEnums = map[string]map[string]bool{
|
||||
"claude": {
|
||||
"low": true,
|
||||
"medium": true,
|
||||
"high": true,
|
||||
"xhigh": true,
|
||||
"max": true,
|
||||
},
|
||||
"codex": {
|
||||
"none": true,
|
||||
"minimal": true,
|
||||
"low": true,
|
||||
"medium": true,
|
||||
"high": true,
|
||||
"xhigh": true,
|
||||
},
|
||||
}
|
||||
|
||||
// IsKnownThinkingValue reports whether `value` is a recognised effort
|
||||
// token for the given provider. Empty string is always accepted (means
|
||||
// "use runtime default"). Unknown providers (no thinking concept) accept
|
||||
// only empty.
|
||||
//
|
||||
// This is the cheap synchronous gate the server uses on CreateAgent /
|
||||
// UpdateAgent. Unlike ValidateThinkingLevel it does NOT consult the live
|
||||
// catalog or per-model subset.
|
||||
func IsKnownThinkingValue(providerType, value string) bool {
|
||||
if value == "" {
|
||||
return true
|
||||
}
|
||||
enum, ok := providerThinkingEnums[providerType]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return enum[value]
|
||||
}
|
||||
650
server/pkg/agent/thinking_test.go
Normal file
650
server/pkg/agent/thinking_test.go
Normal file
@@ -0,0 +1,650 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ── Claude help parsing ──────────────────────────────────────────────
|
||||
|
||||
func TestParseClaudeEffortHelp_OldFormat(t *testing.T) {
|
||||
t.Parallel()
|
||||
// claude 2.1.109 — the older help omits xhigh.
|
||||
help := `Usage: claude [options]
|
||||
|
||||
Options:
|
||||
--model <model> Model to use
|
||||
--effort <level> Effort level for the current session (low, medium, high, max)
|
||||
--verbose
|
||||
`
|
||||
got := parseClaudeEffortHelp(help)
|
||||
want := []string{"low", "medium", "high", "max"}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("parseClaudeEffortHelp: got %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseClaudeEffortHelp_NewFormat(t *testing.T) {
|
||||
t.Parallel()
|
||||
// claude 2.1.121 — the newer help adds xhigh.
|
||||
help := `Usage: claude [options]
|
||||
|
||||
Options:
|
||||
--effort <level> Effort level for the current session (low, medium, high, xhigh, max)
|
||||
`
|
||||
got := parseClaudeEffortHelp(help)
|
||||
want := []string{"low", "medium", "high", "xhigh", "max"}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("parseClaudeEffortHelp: got %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseClaudeEffortHelp_Missing(t *testing.T) {
|
||||
t.Parallel()
|
||||
help := `Usage: claude [options]
|
||||
|
||||
Options:
|
||||
--model <model> Model to use
|
||||
--verbose
|
||||
`
|
||||
got := parseClaudeEffortHelp(help)
|
||||
if got != nil {
|
||||
t.Fatalf("parseClaudeEffortHelp: expected nil, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProjectClaudeLevels_PerModelSubset(t *testing.T) {
|
||||
t.Parallel()
|
||||
superset := []string{"low", "medium", "high", "xhigh", "max"}
|
||||
// Sonnet should drop xhigh per claudeModelEffortAllow.
|
||||
got := projectClaudeLevels(superset, claudeModelEffortAllow["claude-sonnet-4-6"])
|
||||
values := make([]string, 0, len(got))
|
||||
for _, lvl := range got {
|
||||
values = append(values, lvl.Value)
|
||||
}
|
||||
want := []string{"low", "medium", "high", "max"}
|
||||
if !reflect.DeepEqual(values, want) {
|
||||
t.Fatalf("projectClaudeLevels: got %v, want %v", values, want)
|
||||
}
|
||||
// Opus keeps xhigh.
|
||||
got = projectClaudeLevels(superset, claudeModelEffortAllow["claude-opus-4-7"])
|
||||
values = values[:0]
|
||||
for _, lvl := range got {
|
||||
values = append(values, lvl.Value)
|
||||
}
|
||||
if !reflect.DeepEqual(values, superset) {
|
||||
t.Fatalf("projectClaudeLevels for Opus: got %v, want %v", values, superset)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Codex discovery argv ────────────────────────────────────────────
|
||||
//
|
||||
// Elon's PR1 review found that `codex debug models --output json` is
|
||||
// rejected by codex-cli 0.131.0 — there is no `--output` flag on the
|
||||
// subcommand. The fix was to drop the flag and add `--bundled` (which
|
||||
// just skips network refresh). These two tests pin the contract:
|
||||
//
|
||||
// - TestCodexDebugModelsArgs_Pinned asserts the literal argv we pass
|
||||
// so a future "let's add a flag" refactor breaks loudly instead of
|
||||
// silently swallowing the discovery output.
|
||||
// - TestRunCodexDebugModels_ArgvSeenByBinary plugs a fake `codex`
|
||||
// binary on PATH and verifies that what *actually* reaches the
|
||||
// process matches the pinned argv, not just what the var holds.
|
||||
|
||||
func TestCodexDebugModelsArgs_Pinned(t *testing.T) {
|
||||
t.Parallel()
|
||||
want := []string{"debug", "models", "--bundled"}
|
||||
if !reflect.DeepEqual(codexDebugModelsArgs, want) {
|
||||
t.Fatalf("codexDebugModelsArgs drifted: got %v, want %v", codexDebugModelsArgs, want)
|
||||
}
|
||||
for _, arg := range codexDebugModelsArgs {
|
||||
if arg == "--output" || arg == "-o" {
|
||||
t.Errorf("--output / -o leaked back into argv (codex CLI does not accept it): %v", codexDebugModelsArgs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunCodexDebugModels_ArgvSeenByBinary executes runCodexDebugModels
|
||||
// against a shell-script stand-in for `codex` that records its argv to
|
||||
// a file and prints a minimal valid JSON payload. The check is on what
|
||||
// the binary actually received (one argument per element, no merging
|
||||
// or splitting), not just the package var — the original bug surfaced
|
||||
// because a real codex saw `--output json` as two extra unknown args.
|
||||
func TestRunCodexDebugModels_ArgvSeenByBinary(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-script fake binary requires a POSIX shell")
|
||||
}
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
argvFile := filepath.Join(dir, "argv.txt")
|
||||
fake := filepath.Join(dir, "codex")
|
||||
script := "#!/bin/sh\n" +
|
||||
"printf '%s\\n' \"$@\" > '" + argvFile + "'\n" +
|
||||
"echo '{\"models\":[]}'\n"
|
||||
if err := os.WriteFile(fake, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake codex: %v", err)
|
||||
}
|
||||
|
||||
raw, err := runCodexDebugModels(context.Background(), fake)
|
||||
if err != nil {
|
||||
t.Fatalf("runCodexDebugModels: %v (output=%q)", err, raw)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(argvFile)
|
||||
if err != nil {
|
||||
t.Fatalf("read argv file: %v", err)
|
||||
}
|
||||
got := splitNonEmptyLines(string(data))
|
||||
want := []string{"debug", "models", "--bundled"}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("fake codex received argv %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func splitNonEmptyLines(s string) []string {
|
||||
var out []string
|
||||
start := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == '\n' {
|
||||
if i > start {
|
||||
out = append(out, s[start:i])
|
||||
}
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
if start < len(s) {
|
||||
out = append(out, s[start:])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ── Codex debug models JSON parsing ──────────────────────────────────
|
||||
|
||||
func TestParseCodexDebugModels(t *testing.T) {
|
||||
t.Parallel()
|
||||
raw := []byte(`{
|
||||
"models": [
|
||||
{
|
||||
"slug": "gpt-5.5",
|
||||
"default_reasoning_level": "medium",
|
||||
"supported_reasoning_levels": [
|
||||
{"effort": "low", "description": "Fast"},
|
||||
{"effort": "medium", "description": "Balanced"},
|
||||
{"effort": "high", "description": "Deeper"},
|
||||
{"effort": "xhigh", "description": "Maximum"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"slug": "gpt-5",
|
||||
"default_reasoning_level": "low",
|
||||
"supported_reasoning_levels": [
|
||||
{"effort": "minimal", "description": "Quick"},
|
||||
{"effort": "low", "description": "Fast"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"slug": "no-reasoning",
|
||||
"supported_reasoning_levels": []
|
||||
}
|
||||
]
|
||||
}`)
|
||||
got := parseCodexDebugModels(raw)
|
||||
|
||||
gpt55, ok := got["gpt-5.5"]
|
||||
if !ok || gpt55 == nil {
|
||||
t.Fatalf("missing gpt-5.5 entry: %+v", got)
|
||||
}
|
||||
if gpt55.DefaultLevel != "medium" {
|
||||
t.Errorf("gpt-5.5 default: got %q, want medium", gpt55.DefaultLevel)
|
||||
}
|
||||
if len(gpt55.SupportedLevels) != 4 {
|
||||
t.Errorf("gpt-5.5 supported count: got %d, want 4", len(gpt55.SupportedLevels))
|
||||
}
|
||||
// Labels should come from codexEffortLabel mapping, not from raw effort.
|
||||
for _, lvl := range gpt55.SupportedLevels {
|
||||
if lvl.Value == "xhigh" && lvl.Label != "Extra high" {
|
||||
t.Errorf("xhigh label: got %q, want Extra high", lvl.Label)
|
||||
}
|
||||
}
|
||||
|
||||
gpt5, ok := got["gpt-5"]
|
||||
if !ok || gpt5 == nil {
|
||||
t.Fatalf("missing gpt-5 entry: %+v", got)
|
||||
}
|
||||
if gpt5.DefaultLevel != "low" {
|
||||
t.Errorf("gpt-5 default: got %q, want low", gpt5.DefaultLevel)
|
||||
}
|
||||
|
||||
// Models with empty supported_reasoning_levels should be omitted to
|
||||
// keep the wire payload small and avoid rendering empty pickers.
|
||||
if _, ok := got["no-reasoning"]; ok {
|
||||
t.Errorf("no-reasoning should be omitted, got %+v", got["no-reasoning"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCodexDebugModels_Malformed(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := parseCodexDebugModels([]byte("not json"))
|
||||
if len(got) != 0 {
|
||||
t.Fatalf("expected empty map on malformed input, got %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// ── IsKnownThinkingValue (server-side enum gate) ─────────────────────
|
||||
|
||||
func TestIsKnownThinkingValue(t *testing.T) {
|
||||
t.Parallel()
|
||||
tests := []struct {
|
||||
provider string
|
||||
value string
|
||||
want bool
|
||||
}{
|
||||
{"claude", "", true},
|
||||
{"claude", "low", true},
|
||||
{"claude", "xhigh", true},
|
||||
{"claude", "max", true},
|
||||
{"claude", "none", false}, // Codex-only token rejected for Claude
|
||||
{"codex", "", true},
|
||||
{"codex", "none", true},
|
||||
{"codex", "minimal", true},
|
||||
{"codex", "xhigh", true},
|
||||
{"codex", "max", false}, // Claude-only token rejected for Codex
|
||||
{"hermes", "", true},
|
||||
{"hermes", "low", false}, // hermes has no thinking concept
|
||||
}
|
||||
for _, tc := range tests {
|
||||
if got := IsKnownThinkingValue(tc.provider, tc.value); got != tc.want {
|
||||
t.Errorf("IsKnownThinkingValue(%q, %q) = %v, want %v",
|
||||
tc.provider, tc.value, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── ValidateThinkingLevel default-model handling ─────────────────────
|
||||
//
|
||||
// Elon's PR1 review called out that an empty model on a default-model
|
||||
// task must not be misjudged as "unknown model → reject". The fix is to
|
||||
// resolve empty model to the catalog's default entry inside the
|
||||
// validator. Both the daemon's per-model guard and the server's API
|
||||
// layer call this; if it gets default-model wrong, any agent without an
|
||||
// explicit model set would have its thinking_level dropped silently.
|
||||
|
||||
func TestValidateThinkingLevel_EmptyModelResolvesToDefault(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-script fake binary requires a POSIX shell")
|
||||
}
|
||||
t.Parallel()
|
||||
|
||||
// We need a `claude` whose --help advertises the full superset
|
||||
// (low/medium/high/xhigh/max) so per-model projection actually has
|
||||
// something to filter. A non-existent path falls back to a conservative
|
||||
// [low,medium,high] which would hide the per-model behaviour we're
|
||||
// trying to verify.
|
||||
fakeClaude := writeFakeClaudeHelpBinary(t)
|
||||
resetThinkingCacheForTests()
|
||||
defer resetThinkingCacheForTests()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("valid level on default model passes", func(t *testing.T) {
|
||||
// Claude's catalog flags Sonnet 4.6 as Default. Sonnet supports
|
||||
// low/medium/high/max (no xhigh) per claudeModelEffortAllow, so
|
||||
// "high" must round-trip when model is left empty.
|
||||
ok, err := ValidateThinkingLevel(ctx, "claude", fakeClaude, "", "high")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Errorf("default-model high should be valid for claude; got false")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid level on default model fails", func(t *testing.T) {
|
||||
// "xhigh" is opus-only; resolving "" to default (sonnet 4.6)
|
||||
// should reject it, not silently accept.
|
||||
ok, err := ValidateThinkingLevel(ctx, "claude", fakeClaude, "", "xhigh")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Errorf("xhigh should be invalid on sonnet (the default model); got true")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty value always valid", func(t *testing.T) {
|
||||
// Empty value means "use runtime default" — should pass
|
||||
// regardless of model resolution.
|
||||
ok, err := ValidateThinkingLevel(ctx, "claude", fakeClaude, "", "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Errorf("empty value must always be valid")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestValidateThinkingLevel_ExplicitModel(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("shell-script fake binary requires a POSIX shell")
|
||||
}
|
||||
t.Parallel()
|
||||
fakeClaude := writeFakeClaudeHelpBinary(t)
|
||||
resetThinkingCacheForTests()
|
||||
defer resetThinkingCacheForTests()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// xhigh IS valid on Opus 4.7.
|
||||
ok, err := ValidateThinkingLevel(ctx, "claude", fakeClaude, "claude-opus-4-7", "xhigh")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Errorf("xhigh should be valid on opus-4-7; got false")
|
||||
}
|
||||
|
||||
// xhigh is NOT valid on Sonnet — should fail.
|
||||
ok, err = ValidateThinkingLevel(ctx, "claude", fakeClaude, "claude-sonnet-4-6", "xhigh")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Errorf("xhigh must not be valid on sonnet-4-6; got true")
|
||||
}
|
||||
|
||||
// An unknown model with a valid token still fails closed (no guess).
|
||||
ok, err = ValidateThinkingLevel(ctx, "claude", fakeClaude, "claude-nonexistent", "high")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Errorf("unknown model must fail closed; got true")
|
||||
}
|
||||
}
|
||||
|
||||
// writeFakeClaudeHelpBinary writes a small shell script that mimics
|
||||
// `claude --help`, emitting the full effort superset line so per-model
|
||||
// projection has something to filter. Returns the path to the executable.
|
||||
func writeFakeClaudeHelpBinary(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "claude")
|
||||
script := "#!/bin/sh\n" +
|
||||
"cat <<'EOF'\n" +
|
||||
"Usage: claude [options]\n" +
|
||||
"\n" +
|
||||
"Options:\n" +
|
||||
" --model <model> Model to use\n" +
|
||||
" --effort <level> Effort level for the current session (low, medium, high, xhigh, max)\n" +
|
||||
"EOF\n"
|
||||
if err := os.WriteFile(path, []byte(script), 0o755); err != nil {
|
||||
t.Fatalf("write fake claude: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
// ── Cache key invalidation ───────────────────────────────────────────
|
||||
|
||||
func TestThinkingCacheKeyDistinct(t *testing.T) {
|
||||
t.Parallel()
|
||||
resetThinkingCacheForTests()
|
||||
defer resetThinkingCacheForTests()
|
||||
|
||||
a := thinkingCacheKey{provider: "claude", executablePath: "/bin/claude", cliVersion: "2.1.121"}
|
||||
b := thinkingCacheKey{provider: "claude", executablePath: "/bin/claude", cliVersion: "2.1.122"}
|
||||
c := thinkingCacheKey{provider: "claude", executablePath: "/opt/claude", cliVersion: "2.1.121"}
|
||||
|
||||
thinkingCachePut(a, map[string]*ModelThinking{"x": {DefaultLevel: "a"}})
|
||||
thinkingCachePut(b, map[string]*ModelThinking{"x": {DefaultLevel: "b"}})
|
||||
thinkingCachePut(c, map[string]*ModelThinking{"x": {DefaultLevel: "c"}})
|
||||
|
||||
if got, _ := thinkingCacheGet(a); got["x"].DefaultLevel != "a" {
|
||||
t.Errorf("cache key A: got %q, want a", got["x"].DefaultLevel)
|
||||
}
|
||||
if got, _ := thinkingCacheGet(b); got["x"].DefaultLevel != "b" {
|
||||
t.Errorf("cache key B: got %q, want b", got["x"].DefaultLevel)
|
||||
}
|
||||
if got, _ := thinkingCacheGet(c); got["x"].DefaultLevel != "c" {
|
||||
t.Errorf("cache key C: got %q, want c", got["x"].DefaultLevel)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Shared injection fixture (Trump's MUL-2339 constraint) ───────────
|
||||
//
|
||||
// The three Codex injection points (thread/start.config,
|
||||
// thread/resume.config, turn/start.effort) must encode the same
|
||||
// thinking_level value, in the same shape per call type, with no
|
||||
// drift. This fixture defines the expected payload once and asserts
|
||||
// it across all three sites so a future refactor of any one site
|
||||
// breaks the test if the other two aren't kept in sync.
|
||||
|
||||
// codexReasoningInjection is the shared expectation table for the
|
||||
// three Codex injection points. value→{turnStartEffort, configKey}.
|
||||
// One row per scenario.
|
||||
type codexReasoningCase struct {
|
||||
name string
|
||||
level string
|
||||
}
|
||||
|
||||
var codexReasoningCases = []codexReasoningCase{
|
||||
{"empty-level-is-noop", ""},
|
||||
{"low", "low"},
|
||||
{"medium", "medium"},
|
||||
{"high", "high"},
|
||||
{"xhigh", "xhigh"},
|
||||
{"none-codex-only", "none"},
|
||||
}
|
||||
|
||||
func TestApplyCodexReasoningEffort_ThreePoints(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, tc := range codexReasoningCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
// 1. thread/start params shape.
|
||||
startParams := map[string]any{
|
||||
"model": "gpt-5.5",
|
||||
"cwd": "/work",
|
||||
}
|
||||
applyCodexReasoningEffort(startParams, tc.level)
|
||||
assertCodexThreadConfigEffort(t, "thread/start", startParams, tc.level)
|
||||
|
||||
// 2. thread/resume params shape.
|
||||
resumeParams := map[string]any{
|
||||
"threadId": "thr_prior",
|
||||
"cwd": "/work",
|
||||
"model": "gpt-5.5",
|
||||
}
|
||||
applyCodexReasoningEffort(resumeParams, tc.level)
|
||||
assertCodexThreadConfigEffort(t, "thread/resume", resumeParams, tc.level)
|
||||
|
||||
// 3. turn/start params shape.
|
||||
turnParams := map[string]any{
|
||||
"threadId": "thr_x",
|
||||
"input": []map[string]any{{"type": "text", "text": "hi"}},
|
||||
}
|
||||
applyCodexReasoningEffort(turnParams, tc.level)
|
||||
assertCodexTurnEffort(t, "turn/start", turnParams, tc.level)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// assertCodexThreadConfigEffort verifies the nested
|
||||
// `config.model_reasoning_effort` shape used by thread/start and
|
||||
// thread/resume. Empty level means the helper must be a no-op
|
||||
// (no key emitted), not an empty-string value.
|
||||
func assertCodexThreadConfigEffort(t *testing.T, method string, params map[string]any, want string) {
|
||||
t.Helper()
|
||||
cfgAny, hasCfg := params["config"]
|
||||
if want == "" {
|
||||
// Empty level → helper must not touch `config`. We allow the
|
||||
// caller to have pre-populated config with other keys, but the
|
||||
// reasoning effort key must NOT appear.
|
||||
if !hasCfg {
|
||||
return
|
||||
}
|
||||
cfg, _ := cfgAny.(map[string]any)
|
||||
if _, has := cfg["model_reasoning_effort"]; has {
|
||||
t.Errorf("%s: empty level must not emit model_reasoning_effort, got %v", method, cfg["model_reasoning_effort"])
|
||||
}
|
||||
return
|
||||
}
|
||||
if !hasCfg {
|
||||
t.Fatalf("%s: expected config block when level=%q", method, want)
|
||||
}
|
||||
cfg, ok := cfgAny.(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("%s: config has wrong type %T", method, cfgAny)
|
||||
}
|
||||
got, ok := cfg["model_reasoning_effort"]
|
||||
if !ok {
|
||||
t.Fatalf("%s: missing config.model_reasoning_effort for level=%q (params=%+v)", method, want, params)
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("%s: config.model_reasoning_effort = %v, want %q", method, got, want)
|
||||
}
|
||||
// `effort` (turn/start key) must NOT leak into a thread call.
|
||||
if _, leaked := params["effort"]; leaked {
|
||||
t.Errorf("%s: top-level effort key leaked into thread params: %+v", method, params)
|
||||
}
|
||||
}
|
||||
|
||||
// assertCodexTurnEffort verifies the top-level `effort` shape used by
|
||||
// turn/start. Empty level means the helper must be a no-op (no key
|
||||
// emitted), not an empty-string value.
|
||||
func assertCodexTurnEffort(t *testing.T, method string, params map[string]any, want string) {
|
||||
t.Helper()
|
||||
got, has := params["effort"]
|
||||
if want == "" {
|
||||
if has {
|
||||
t.Errorf("%s: empty level must not emit effort, got %v", method, got)
|
||||
}
|
||||
// Nested config must also stay empty for the turn/start shape.
|
||||
if cfg, hasCfg := params["config"]; hasCfg {
|
||||
t.Errorf("%s: turn-shape params must not gain a config block, got %v", method, cfg)
|
||||
}
|
||||
return
|
||||
}
|
||||
if !has {
|
||||
t.Fatalf("%s: missing top-level effort for level=%q (params=%+v)", method, want, params)
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("%s: effort = %v, want %q", method, got, want)
|
||||
}
|
||||
// `config.model_reasoning_effort` must NOT leak into a turn call.
|
||||
if cfg, hasCfg := params["config"]; hasCfg {
|
||||
cfgMap, _ := cfg.(map[string]any)
|
||||
if _, leaked := cfgMap["model_reasoning_effort"]; leaked {
|
||||
t.Errorf("%s: config.model_reasoning_effort leaked into turn params: %+v", method, params)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyCodexReasoningEffort_NilParamsSafe(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Must not panic — defensive against future call sites passing nil.
|
||||
applyCodexReasoningEffort(nil, "high")
|
||||
}
|
||||
|
||||
func TestApplyCodexReasoningEffort_PreservesPreExistingConfig(t *testing.T) {
|
||||
t.Parallel()
|
||||
// thread/start may already have other config keys (e.g. future Codex
|
||||
// fields). Reasoning effort must be additive, not destructive.
|
||||
startParams := map[string]any{
|
||||
"model": "gpt-5.5",
|
||||
"config": map[string]any{
|
||||
"some_future_key": "preserve_me",
|
||||
},
|
||||
}
|
||||
applyCodexReasoningEffort(startParams, "high")
|
||||
cfg, _ := startParams["config"].(map[string]any)
|
||||
if cfg["some_future_key"] != "preserve_me" {
|
||||
t.Errorf("pre-existing config key was clobbered: %+v", cfg)
|
||||
}
|
||||
if cfg["model_reasoning_effort"] != "high" {
|
||||
t.Errorf("reasoning effort not injected: %+v", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
// ── End-to-end: build*Args + thinking_level wiring ───────────────────
|
||||
|
||||
func TestBuildClaudeArgs_InjectsEffort(t *testing.T) {
|
||||
t.Parallel()
|
||||
args := buildClaudeArgs(ExecOptions{Model: "claude-opus-4-7", ThinkingLevel: "xhigh"}, slog.Default())
|
||||
if !containsAdjacent(args, "--effort", "xhigh") {
|
||||
t.Errorf("expected --effort xhigh in args: %v", args)
|
||||
}
|
||||
// Must appear after --model (cosmetic but enforced for log readability).
|
||||
modelIdx := argIndexOf(args, "--model")
|
||||
effortIdx := argIndexOf(args, "--effort")
|
||||
if modelIdx < 0 || effortIdx < 0 || modelIdx > effortIdx {
|
||||
t.Errorf("expected --model before --effort: %v", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildClaudeArgs_OmitsEffortWhenEmpty(t *testing.T) {
|
||||
t.Parallel()
|
||||
args := buildClaudeArgs(ExecOptions{Model: "claude-sonnet-4-6"}, slog.Default())
|
||||
if argIndexOf(args, "--effort") >= 0 {
|
||||
t.Errorf("expected no --effort when level empty: %v", args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildClaudeArgs_BlocksUserEffortOverride(t *testing.T) {
|
||||
t.Parallel()
|
||||
args := buildClaudeArgs(ExecOptions{
|
||||
Model: "claude-opus-4-7",
|
||||
ThinkingLevel: "high",
|
||||
CustomArgs: []string{"--effort", "max", "--keep-me"},
|
||||
}, slog.Default())
|
||||
// Daemon-injected --effort survives.
|
||||
if !containsAdjacent(args, "--effort", "high") {
|
||||
t.Errorf("daemon-injected --effort high should remain: %v", args)
|
||||
}
|
||||
// User attempt to override is filtered out: no second --effort,
|
||||
// no `max` token.
|
||||
count := 0
|
||||
for _, a := range args {
|
||||
if a == "--effort" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("expected exactly one --effort, got %d: %v", count, args)
|
||||
}
|
||||
if argIndexOf(args, "max") >= 0 {
|
||||
t.Errorf("filtered user --effort value still appears: %v", args)
|
||||
}
|
||||
// Other custom args pass through.
|
||||
if argIndexOf(args, "--keep-me") < 0 {
|
||||
t.Errorf("non-blocked custom arg was dropped: %v", args)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
func containsAdjacent(haystack []string, a, b string) bool {
|
||||
for i := 0; i < len(haystack)-1; i++ {
|
||||
if haystack[i] == a && haystack[i+1] == b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func argIndexOf(slice []string, target string) int {
|
||||
for i, v := range slice {
|
||||
if v == target {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
const archiveAgent = `-- name: ArchiveAgent :one
|
||||
UPDATE agent SET archived_at = now(), archived_by = $2, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
type ArchiveAgentParams struct {
|
||||
@@ -47,6 +47,7 @@ func (q *Queries) ArchiveAgent(ctx context.Context, arg ArchiveAgentParams) (Age
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -55,7 +56,7 @@ const archiveAgentsByRuntime = `-- name: ArchiveAgentsByRuntime :many
|
||||
UPDATE agent
|
||||
SET archived_at = now(), archived_by = $1, updated_at = now()
|
||||
WHERE runtime_id = ANY($2::uuid[]) AND archived_at IS NULL
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
type ArchiveAgentsByRuntimeParams struct {
|
||||
@@ -98,6 +99,7 @@ func (q *Queries) ArchiveAgentsByRuntime(ctx context.Context, arg ArchiveAgentsB
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -518,7 +520,7 @@ func (q *Queries) ClaimAgentTask(ctx context.Context, agentID pgtype.UUID) (Agen
|
||||
const clearAgentMcpConfig = `-- name: ClearAgentMcpConfig :one
|
||||
UPDATE agent SET mcp_config = NULL, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
func (q *Queries) ClearAgentMcpConfig(ctx context.Context, id pgtype.UUID) (Agent, error) {
|
||||
@@ -546,6 +548,46 @@ func (q *Queries) ClearAgentMcpConfig(ctx context.Context, id pgtype.UUID) (Agen
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const clearAgentThinkingLevel = `-- name: ClearAgentThinkingLevel :one
|
||||
UPDATE agent SET thinking_level = NULL, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
// Explicit NULL-clear for thinking_level. COALESCE-based UpdateAgent cannot
|
||||
// set the column back to NULL, so the API layer routes "user picked Default"
|
||||
// through this dedicated query.
|
||||
func (q *Queries) ClearAgentThinkingLevel(ctx context.Context, id pgtype.UUID) (Agent, error) {
|
||||
row := q.db.QueryRow(ctx, clearAgentThinkingLevel, id)
|
||||
var i Agent
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.WorkspaceID,
|
||||
&i.Name,
|
||||
&i.AvatarUrl,
|
||||
&i.RuntimeMode,
|
||||
&i.RuntimeConfig,
|
||||
&i.Visibility,
|
||||
&i.Status,
|
||||
&i.MaxConcurrentTasks,
|
||||
&i.OwnerID,
|
||||
&i.CreatedAt,
|
||||
&i.UpdatedAt,
|
||||
&i.Description,
|
||||
&i.RuntimeID,
|
||||
&i.Instructions,
|
||||
&i.ArchivedAt,
|
||||
&i.ArchivedBy,
|
||||
&i.CustomEnv,
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -618,9 +660,9 @@ const createAgent = `-- name: CreateAgent :one
|
||||
INSERT INTO agent (
|
||||
workspace_id, name, description, avatar_url, runtime_mode,
|
||||
runtime_config, runtime_id, visibility, max_concurrent_tasks, owner_id,
|
||||
instructions, custom_env, custom_args, mcp_config, model
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
instructions, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
type CreateAgentParams struct {
|
||||
@@ -639,6 +681,7 @@ type CreateAgentParams struct {
|
||||
CustomArgs []byte `json:"custom_args"`
|
||||
McpConfig []byte `json:"mcp_config"`
|
||||
Model pgtype.Text `json:"model"`
|
||||
ThinkingLevel pgtype.Text `json:"thinking_level"`
|
||||
}
|
||||
|
||||
func (q *Queries) CreateAgent(ctx context.Context, arg CreateAgentParams) (Agent, error) {
|
||||
@@ -658,6 +701,7 @@ func (q *Queries) CreateAgent(ctx context.Context, arg CreateAgentParams) (Agent
|
||||
arg.CustomArgs,
|
||||
arg.McpConfig,
|
||||
arg.Model,
|
||||
arg.ThinkingLevel,
|
||||
)
|
||||
var i Agent
|
||||
err := row.Scan(
|
||||
@@ -682,6 +726,7 @@ func (q *Queries) CreateAgent(ctx context.Context, arg CreateAgentParams) (Agent
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -1091,7 +1136,7 @@ func (q *Queries) FailStaleTasks(ctx context.Context, arg FailStaleTasksParams)
|
||||
}
|
||||
|
||||
const getAgent = `-- name: GetAgent :one
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model FROM agent
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level FROM agent
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
@@ -1120,12 +1165,13 @@ func (q *Queries) GetAgent(ctx context.Context, id pgtype.UUID) (Agent, error) {
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getAgentInWorkspace = `-- name: GetAgentInWorkspace :one
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model FROM agent
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level FROM agent
|
||||
WHERE id = $1 AND workspace_id = $2
|
||||
`
|
||||
|
||||
@@ -1159,6 +1205,7 @@ func (q *Queries) GetAgentInWorkspace(ctx context.Context, arg GetAgentInWorkspa
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -1570,7 +1617,7 @@ func (q *Queries) ListAgentTasks(ctx context.Context, agentID pgtype.UUID) ([]Ag
|
||||
}
|
||||
|
||||
const listAgents = `-- name: ListAgents :many
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model FROM agent
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level FROM agent
|
||||
WHERE workspace_id = $1 AND archived_at IS NULL
|
||||
ORDER BY created_at ASC
|
||||
`
|
||||
@@ -1606,6 +1653,7 @@ func (q *Queries) ListAgents(ctx context.Context, workspaceID pgtype.UUID) ([]Ag
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1618,7 +1666,7 @@ func (q *Queries) ListAgents(ctx context.Context, workspaceID pgtype.UUID) ([]Ag
|
||||
}
|
||||
|
||||
const listAllAgents = `-- name: ListAllAgents :many
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model FROM agent
|
||||
SELECT id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level FROM agent
|
||||
WHERE workspace_id = $1
|
||||
ORDER BY created_at ASC
|
||||
`
|
||||
@@ -1654,6 +1702,7 @@ func (q *Queries) ListAllAgents(ctx context.Context, workspaceID pgtype.UUID) ([
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1979,7 +2028,7 @@ SET status = CASE WHEN EXISTS (
|
||||
) THEN 'working' ELSE 'idle' END,
|
||||
updated_at = now()
|
||||
WHERE a.id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
func (q *Queries) RefreshAgentStatusFromTasks(ctx context.Context, id pgtype.UUID) (Agent, error) {
|
||||
@@ -2007,6 +2056,7 @@ func (q *Queries) RefreshAgentStatusFromTasks(ctx context.Context, id pgtype.UUI
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -2014,7 +2064,7 @@ func (q *Queries) RefreshAgentStatusFromTasks(ctx context.Context, id pgtype.UUI
|
||||
const restoreAgent = `-- name: RestoreAgent :one
|
||||
UPDATE agent SET archived_at = NULL, archived_by = NULL, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
func (q *Queries) RestoreAgent(ctx context.Context, id pgtype.UUID) (Agent, error) {
|
||||
@@ -2042,6 +2092,7 @@ func (q *Queries) RestoreAgent(ctx context.Context, id pgtype.UUID) (Agent, erro
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -2102,9 +2153,10 @@ UPDATE agent SET
|
||||
custom_args = COALESCE($13, custom_args),
|
||||
mcp_config = COALESCE($14, mcp_config),
|
||||
model = COALESCE($15, model),
|
||||
thinking_level = COALESCE($16, thinking_level),
|
||||
updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
type UpdateAgentParams struct {
|
||||
@@ -2123,6 +2175,7 @@ type UpdateAgentParams struct {
|
||||
CustomArgs []byte `json:"custom_args"`
|
||||
McpConfig []byte `json:"mcp_config"`
|
||||
Model pgtype.Text `json:"model"`
|
||||
ThinkingLevel pgtype.Text `json:"thinking_level"`
|
||||
}
|
||||
|
||||
func (q *Queries) UpdateAgent(ctx context.Context, arg UpdateAgentParams) (Agent, error) {
|
||||
@@ -2142,6 +2195,7 @@ func (q *Queries) UpdateAgent(ctx context.Context, arg UpdateAgentParams) (Agent
|
||||
arg.CustomArgs,
|
||||
arg.McpConfig,
|
||||
arg.Model,
|
||||
arg.ThinkingLevel,
|
||||
)
|
||||
var i Agent
|
||||
err := row.Scan(
|
||||
@@ -2166,6 +2220,7 @@ func (q *Queries) UpdateAgent(ctx context.Context, arg UpdateAgentParams) (Agent
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@@ -2173,7 +2228,7 @@ func (q *Queries) UpdateAgent(ctx context.Context, arg UpdateAgentParams) (Agent
|
||||
const updateAgentStatus = `-- name: UpdateAgentStatus :one
|
||||
UPDATE agent SET status = $2, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model
|
||||
RETURNING id, workspace_id, name, avatar_url, runtime_mode, runtime_config, visibility, status, max_concurrent_tasks, owner_id, created_at, updated_at, description, runtime_id, instructions, archived_at, archived_by, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
`
|
||||
|
||||
type UpdateAgentStatusParams struct {
|
||||
@@ -2206,6 +2261,7 @@ func (q *Queries) UpdateAgentStatus(ctx context.Context, arg UpdateAgentStatusPa
|
||||
&i.CustomArgs,
|
||||
&i.McpConfig,
|
||||
&i.Model,
|
||||
&i.ThinkingLevel,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ type Agent struct {
|
||||
CustomArgs []byte `json:"custom_args"`
|
||||
McpConfig []byte `json:"mcp_config"`
|
||||
Model pgtype.Text `json:"model"`
|
||||
ThinkingLevel pgtype.Text `json:"thinking_level"`
|
||||
}
|
||||
|
||||
type AgentRuntime struct {
|
||||
|
||||
@@ -20,8 +20,8 @@ WHERE id = $1 AND workspace_id = $2;
|
||||
INSERT INTO agent (
|
||||
workspace_id, name, description, avatar_url, runtime_mode,
|
||||
runtime_config, runtime_id, visibility, max_concurrent_tasks, owner_id,
|
||||
instructions, custom_env, custom_args, mcp_config, model
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
|
||||
instructions, custom_env, custom_args, mcp_config, model, thinking_level
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
|
||||
RETURNING *;
|
||||
|
||||
-- name: UpdateAgent :one
|
||||
@@ -40,10 +40,19 @@ UPDATE agent SET
|
||||
custom_args = COALESCE(sqlc.narg('custom_args'), custom_args),
|
||||
mcp_config = COALESCE(sqlc.narg('mcp_config'), mcp_config),
|
||||
model = COALESCE(sqlc.narg('model'), model),
|
||||
thinking_level = COALESCE(sqlc.narg('thinking_level'), thinking_level),
|
||||
updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING *;
|
||||
|
||||
-- name: ClearAgentThinkingLevel :one
|
||||
-- Explicit NULL-clear for thinking_level. COALESCE-based UpdateAgent cannot
|
||||
-- set the column back to NULL, so the API layer routes "user picked Default"
|
||||
-- through this dedicated query.
|
||||
UPDATE agent SET thinking_level = NULL, updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING *;
|
||||
|
||||
-- name: ClearAgentMcpConfig :one
|
||||
UPDATE agent SET mcp_config = NULL, updated_at = now()
|
||||
WHERE id = $1
|
||||
|
||||
Reference in New Issue
Block a user