feat(agent): inspector picker for thinking_level (MUL-2339) (#2912)

* feat(agent): inspector picker for thinking_level (MUL-2339)

PR1 (#2865) shipped the backend — column, daemon-side discovery,
Claude/Codex injection, API validation — but the agent detail inspector
had no UI to set the value. Users could only configure thinking_level
via custom_env / API. This wires up the picker so it lives next to
Runtime and Model where everything else editable already lives.

Picker is per-(runtime, model): it reuses the same `runtimeModelsOptions`
query the Model picker already runs (60s cache, no extra round-trip)
and reads the active model's `thinking.supported_levels`. When the list
is empty — every provider except Claude/Codex today, or a Claude model
that doesn't expose `--effort` — the entire PropRow is hidden, not just
rendered inert. The picker never gets to invent value/label pairs
itself; they come verbatim from each CLI's own catalog (`Low`,
`Extra high`, …) so the user sees exactly what `claude --effort` /
`/effort` and Codex's TUI show.

The `default_level` from the catalog is badged inside the popover so
the user knows which value `""` (the persisted "use model default"
sentinel) maps to. The clear footer sends `""` explicitly, which the
backend already understands as the tri-state "explicit clear" branch
of UpdateAgent. Invalid combinations (e.g. picking a value not in the
target provider's enum after a runtime swap in the same PATCH) hit
the existing 400 path on the server and surface as a toast via the
inspector's standard `onUpdate` error handler — no extra client-side
guard needed.

Exports `RuntimeModelThinking` and `RuntimeModelThinkingLevel` from
`@multica/core/types` so views consumers can refer to them by name.
i18n keys added in EN and zh-Hans (parity test green).

Co-authored-by: multica-agent <github@multica.ai>

* fix(agent): preserve unknown thinking_level in picker label

Stale persisted values (model swap, CLI catalog shrink) used to render
as 'Default' even though the backend would still ship the orphaned
token. Fall back to the raw value when no entry matches so the user
sees what's actually saved and can clear it.

Co-authored-by: multica-agent <github@multica.ai>

* test(agent): unit tests for thinking-picker label + clear flow

Covers the default-vs-set trigger label, the unknown-token preservation
path added in 3452fae3f, the read-only display, picking and re-picking
into onChange, and the clear footer's empty-string emission.

Co-authored-by: multica-agent <github@multica.ai>

* fix(agent): keep Thinking row visible when value is stale (MUL-2339)

Inspector was hiding the row whenever the active model had no
supported_levels, which also hid persisted orphan tokens (model swap
into a non-thinking runtime, or a CLI catalog that shrank). PR1's
per-model invalid behavior is daemon-side warn/drop, not a synchronous
DB clear, so the frontend has to surface the raw value and let the
user explicit-clear it via the picker footer.

Render the row when levels are empty AND value is empty; otherwise
keep it. Extract ThinkingPropRow into its own file so the row-level
logic is unit-testable.

Co-authored-by: multica-agent <github@multica.ai>

---------

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
Bohan Jiang
2026-05-20 13:47:19 +08:00
committed by GitHub
parent 2bec2221d2
commit 9d3b6e2241
8 changed files with 521 additions and 2 deletions

View File

@@ -36,6 +36,8 @@ export type {
RuntimeUpdate,
RuntimeUpdateStatus,
RuntimeModel,
RuntimeModelThinking,
RuntimeModelThinkingLevel,
RuntimeModelListRequest,
RuntimeModelListStatus,
RuntimeModelsResult,

View File

@@ -43,6 +43,7 @@ import { ConcurrencyPicker } from "./inspector/concurrency-picker";
import { ModelPicker } from "./inspector/model-picker";
import { RuntimePicker } from "./inspector/runtime-picker";
import { SkillAttach } from "./inspector/skill-attach";
import { ThinkingPropRow } from "./inspector/thinking-prop-row";
import { VisibilityPicker } from "./inspector/visibility-picker";
interface InspectorProps {
@@ -130,6 +131,14 @@ export function AgentDetailInspector({
onChange={(m) => update({ model: m })}
/>
</PropRow>
<ThinkingPropRow
runtimeId={agent.runtime_id}
runtimeOnline={!!isOnline}
model={agent.model ?? ""}
value={agent.thinking_level ?? ""}
canEdit={canEdit}
onChange={(v) => update({ thinking_level: v })}
/>
<PropRow label={t(($) => $.inspector.prop_visibility)} interactive={false}>
<VisibilityPicker
value={agent.visibility}

View File

@@ -0,0 +1,111 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, cleanup } from "@testing-library/react";
import type { RuntimeModelThinkingLevel } from "@multica/core/types";
import { I18nProvider } from "@multica/core/i18n/react";
import enCommon from "../../../locales/en/common.json";
import enAgents from "../../../locales/en/agents.json";
import enIssues from "../../../locales/en/issues.json";
import { ThinkingPicker } from "./thinking-picker";
const TEST_RESOURCES = {
en: { common: enCommon, agents: enAgents, issues: enIssues },
};
const CODEX_LEVELS: RuntimeModelThinkingLevel[] = [
{ value: "minimal", label: "Minimal", description: "Fast, light reasoning" },
{ value: "low", label: "Low" },
{ value: "medium", label: "Medium" },
{ value: "high", label: "High" },
];
function renderPicker(props: Partial<React.ComponentProps<typeof ThinkingPicker>> = {}) {
const onChange = vi.fn();
const utils = render(
<I18nProvider locale="en" resources={TEST_RESOURCES}>
<ThinkingPicker
value=""
levels={CODEX_LEVELS}
defaultLevel="medium"
canEdit
onChange={onChange}
{...props}
/>
</I18nProvider>,
);
return { ...utils, onChange };
}
describe("ThinkingPicker", () => {
beforeEach(() => {
cleanup();
});
afterEach(() => {
cleanup();
});
it('renders "Default" when value is empty', () => {
renderPicker({ value: "" });
// The trigger and the tooltip both carry the label.
expect(screen.getAllByText("Default").length).toBeGreaterThan(0);
});
it("renders the matching level label when value is set", () => {
renderPicker({ value: "high" });
expect(screen.getAllByText("High").length).toBeGreaterThan(0);
});
it("renders the raw token when the saved value is no longer in the catalog", () => {
// Simulates a model swap that dropped the option the user previously
// picked — we still surface what's persisted so the user can clear it,
// rather than silently showing "Default".
renderPicker({ value: "xhigh", levels: CODEX_LEVELS });
expect(screen.getAllByText("xhigh").length).toBeGreaterThan(0);
});
it("renders a static read-only display when canEdit=false and exposes no popover trigger", () => {
renderPicker({ value: "low", canEdit: false });
expect(screen.getByText("Low")).toBeInTheDocument();
expect(screen.queryByRole("button")).toBeNull();
});
it("calls onChange with the picked value and skips when the user re-picks the current value", () => {
const { onChange } = renderPicker({ value: "low" });
fireEvent.click(screen.getByRole("button"));
// Picking a new level fires onChange with the runtime-native value.
fireEvent.click(screen.getByText("High"));
expect(onChange).toHaveBeenCalledWith("high");
// Re-opening and clicking the already-selected value is a no-op so we
// don't enqueue a redundant PATCH. The trigger also reads "Low", so
// there are two matches in the DOM — target the listbox item by
// selecting the option button explicitly.
onChange.mockClear();
fireEvent.click(screen.getByRole("button"));
const lowOption = screen
.getAllByRole("button")
.find((b) => b.getAttribute("data-picker-item") !== null && b.textContent?.includes("Low"));
expect(lowOption).toBeDefined();
fireEvent.click(lowOption!);
expect(onChange).not.toHaveBeenCalled();
});
it("clears to empty string via the footer button when a value is set", () => {
const { onChange } = renderPicker({ value: "high" });
fireEvent.click(screen.getByRole("button"));
// Footer copy resolves through i18n — match a substring so we don't
// pin to the exact translated wording.
const clearButton = screen.getByTitle(/Clear and fall back/i);
fireEvent.click(clearButton);
expect(onChange).toHaveBeenCalledWith("");
});
it("does not render the clear button when value is already empty", () => {
renderPicker({ value: "" });
fireEvent.click(screen.getByRole("button"));
expect(screen.queryByTitle(/Clear and fall back/i)).toBeNull();
});
});

View File

@@ -0,0 +1,131 @@
"use client";
import { useState } from "react";
import type { RuntimeModelThinkingLevel } from "@multica/core/types";
import {
PickerItem,
PropertyPicker,
} from "../../../issues/components/pickers";
import { CHIP_CLASS } from "./chip";
import { useT } from "../../../i18n";
/**
* Per-agent reasoning/effort picker (MUL-2339). Renders only when the
* current model exposes a non-empty `supported_levels` set — Claude and
* Codex today; every other provider gets nothing. The catalog is daemon-
* discovered, so the value/label pairs match each CLI's own UI (`Low`,
* `Extra high`, …) verbatim; never normalised across providers.
*
* The empty string is the "use model default" sentinel and renders as
* "Default" in the chip, with the discovered `default_level` (when
* present) badged inside the popover so the user can see what they'll
* get if they clear.
*/
export function ThinkingPicker({
value,
levels,
defaultLevel,
canEdit = true,
onChange,
}: {
/** Persisted thinking_level — "" means "use model default". */
value: string;
/** Supported levels for the current (runtime, model) pair. Caller has
* already verified the list is non-empty before mounting this picker. */
levels: RuntimeModelThinkingLevel[];
/** Level the runtime uses when no override is sent. Surfaced as a badge
* in the popover. */
defaultLevel?: string;
/** When false, render a static read-only display and skip the popover. */
canEdit?: boolean;
onChange: (next: string) => Promise<void> | void;
}) {
const { t } = useT("agents");
const [open, setOpen] = useState(false);
const selected = value ? levels.find((l) => l.value === value) : undefined;
// Unknown-but-set value (model swap that dropped the option, CLI upgrade
// that trimmed the catalog): show the raw token so the user can see what
// is actually persisted and clear it, rather than silently labelling it
// "Default" when the backend would still send the stale value.
const triggerLabel = selected
? selected.label
: value || t(($) => $.pickers.thinking_default);
const triggerTitle = t(($) => $.pickers.thinking_tooltip, {
value: triggerLabel,
});
const select = async (next: string) => {
setOpen(false);
if (next !== value) await onChange(next);
};
if (!canEdit) {
return (
<span
className="min-w-0 truncate px-1.5 py-0.5 font-mono text-[11px] text-muted-foreground"
title={triggerTitle}
>
{triggerLabel}
</span>
);
}
return (
<PropertyPicker
open={open}
onOpenChange={setOpen}
width="w-auto min-w-[14rem] max-w-md"
align="start"
tooltip={triggerTitle}
triggerRender={
<button
type="button"
className={CHIP_CLASS}
aria-label={triggerTitle}
/>
}
trigger={
<span className="min-w-0 truncate font-mono text-[11px]">
{triggerLabel}
</span>
}
>
{levels.map((l) => (
<PickerItem
key={l.value}
selected={l.value === value}
onClick={() => void select(l.value)}
tooltip={l.description || (l.label !== l.value ? `${l.label} · ${l.value}` : l.value)}
>
<div className="min-w-0 flex-1">
<div className="flex items-center gap-1.5">
<span className="truncate font-medium">{l.label}</span>
{l.value === defaultLevel && (
<span className="shrink-0 rounded bg-primary/10 px-1 text-[10px] font-medium text-primary">
{t(($) => $.pickers.thinking_default_badge)}
</span>
)}
</div>
{l.description && (
<div className="truncate text-[10px] text-muted-foreground">
{l.description}
</div>
)}
</div>
</PickerItem>
))}
{value && (
<button
type="button"
onClick={() => void select("")}
className="mt-1 flex w-full items-center border-t px-3 py-2 text-left text-xs text-muted-foreground transition-colors hover:bg-accent/50"
title={t(($) => $.pickers.thinking_clear_title)}
>
{t(($) => $.pickers.thinking_clear)}
</button>
)}
</PropertyPicker>
);
}

View File

@@ -0,0 +1,185 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import {
cleanup,
fireEvent,
render,
screen,
waitFor,
} from "@testing-library/react";
import type {
RuntimeModel,
RuntimeModelListRequest,
} from "@multica/core/types";
import { I18nProvider } from "@multica/core/i18n/react";
import enCommon from "../../../locales/en/common.json";
import enAgents from "../../../locales/en/agents.json";
import enIssues from "../../../locales/en/issues.json";
const TEST_RESOURCES = {
en: { common: enCommon, agents: enAgents, issues: enIssues },
};
const mockInitiateListModels = vi.hoisted(() => vi.fn());
const mockGetListModelsResult = vi.hoisted(() => vi.fn());
vi.mock("@multica/core/api", () => ({
api: {
initiateListModels: (...args: unknown[]) =>
mockInitiateListModels(...args),
getListModelsResult: (...args: unknown[]) =>
mockGetListModelsResult(...args),
},
}));
import { ThinkingPropRow } from "./thinking-prop-row";
const CLAUDE_MODEL: RuntimeModel = {
id: "claude-sonnet-4-6",
label: "Claude Sonnet 4.6",
default: true,
thinking: {
supported_levels: [
{ value: "none", label: "None" },
{ value: "low", label: "Low" },
{ value: "medium", label: "Medium" },
{ value: "high", label: "High" },
],
default_level: "medium",
},
};
// Model without thinking metadata — what the row sees when the agent's
// model swap landed on a non-thinking runtime, or when the daemon catalog
// shrank and stopped emitting `thinking` for this id.
const NO_THINKING_MODEL: RuntimeModel = {
id: "gemini-2.5-pro",
label: "Gemini 2.5 Pro",
default: true,
};
function listResult(models: RuntimeModel[]): RuntimeModelListRequest {
return {
id: "req-1",
runtime_id: "runtime-1",
status: "completed",
models,
supported: true,
created_at: "2026-05-20T00:00:00Z",
updated_at: "2026-05-20T00:00:00Z",
};
}
function renderRow(
props: Partial<React.ComponentProps<typeof ThinkingPropRow>> = {},
) {
const onChange = vi.fn();
const queryClient = new QueryClient({
defaultOptions: { queries: { retry: false } },
});
const utils = render(
// PropRow uses CSS subgrid, so wrap with the same column tracks the
// inspector parent declares — otherwise the row mounts without a
// grid context and the column layout warns. Behaviour we care about
// (visibility + clear flow) is independent of layout.
<I18nProvider locale="en" resources={TEST_RESOURCES}>
<QueryClientProvider client={queryClient}>
<div className="grid grid-cols-[auto_1fr] gap-x-2 gap-y-0.5">
<ThinkingPropRow
runtimeId="runtime-1"
runtimeOnline
model="claude-sonnet-4-6"
value=""
canEdit
onChange={onChange}
{...props}
/>
</div>
</QueryClientProvider>
</I18nProvider>,
);
return { ...utils, onChange, queryClient };
}
describe("ThinkingPropRow", () => {
beforeEach(() => {
vi.clearAllMocks();
mockInitiateListModels.mockResolvedValue(listResult([CLAUDE_MODEL]));
mockGetListModelsResult.mockResolvedValue(listResult([CLAUDE_MODEL]));
});
afterEach(() => {
cleanup();
});
it("hides the row when the active model has no thinking levels and nothing is persisted", async () => {
mockInitiateListModels.mockResolvedValue(listResult([NO_THINKING_MODEL]));
renderRow({ model: "gemini-2.5-pro", value: "" });
// Wait for the query to settle. We assert by absence of the i18n
// label rather than by query state, so this also fails if the row
// re-renders later.
await waitFor(() => {
expect(mockInitiateListModels).toHaveBeenCalled();
});
expect(screen.queryByText("Thinking")).toBeNull();
});
it("hides the row while the runtime is offline (no query fires)", () => {
renderRow({ runtimeOnline: false, value: "" });
// Query disabled when runtimeOnline=false, so no models, levels stay
// empty, value is empty → row stays hidden.
expect(screen.queryByText("Thinking")).toBeNull();
expect(mockInitiateListModels).not.toHaveBeenCalled();
});
it("renders the row with the persisted raw token when levels are empty but value is set (stale orphan)", async () => {
// The agent persisted `thinking_level=xhigh` while it was on a
// thinking-capable model, then was swapped to gemini (or the CLI
// catalog shrank). PR1's behavior is daemon-side warn/drop, not a
// synchronous DB clear, so the frontend must surface the orphan
// token and let the user clear it explicitly.
mockInitiateListModels.mockResolvedValue(listResult([NO_THINKING_MODEL]));
renderRow({ model: "gemini-2.5-pro", value: "xhigh" });
await screen.findByText("Thinking");
// The picker chip carries the raw value when it's not in the catalog.
expect(await screen.findByText("xhigh")).toBeInTheDocument();
});
it("clears the orphan value via the picker footer, emitting onChange(\"\")", async () => {
mockInitiateListModels.mockResolvedValue(listResult([NO_THINKING_MODEL]));
const { onChange } = renderRow({
model: "gemini-2.5-pro",
value: "xhigh",
});
// Wait until the row mounts with the orphan value, then open the
// popover and fire the clear footer. The footer is the only target
// matching the i18n `thinking_clear_title` copy.
await screen.findByText("xhigh");
fireEvent.click(screen.getByRole("button"));
const clearButton = await screen.findByTitle(/Clear and fall back/i);
fireEvent.click(clearButton);
expect(onChange).toHaveBeenCalledWith("");
});
it("renders the row with the matched label when the model still advertises the value", async () => {
renderRow({ value: "high" });
await screen.findByText("Thinking");
// Both the chip and the tooltip carry "High".
expect((await screen.findAllByText("High")).length).toBeGreaterThan(0);
});
it("renders the row with \"Default\" when value is empty and the model exposes levels", async () => {
renderRow({ value: "" });
await screen.findByText("Thinking");
expect((await screen.findAllByText("Default")).length).toBeGreaterThan(0);
});
});

View File

@@ -0,0 +1,69 @@
"use client";
import { useQuery } from "@tanstack/react-query";
import type { RuntimeModel } from "@multica/core/types";
import { runtimeModelsOptions } from "@multica/core/runtimes";
import { PropRow } from "../../../common/prop-row";
import { useT } from "../../../i18n";
import { ThinkingPicker } from "./thinking-picker";
/**
* Thinking row for the agent inspector. Hidden when the active model has
* no `supported_levels` advertised AND nothing is persisted, so providers
* that don't expose reasoning never surface an empty row. But if the
* agent already has a `thinking_level` saved (model swap into a
* non-thinking runtime, or the daemon / CLI catalog shrank and dropped
* the entry), we still render the row so the user can see the orphan
* token the backend is still sending and explicit-clear it via the
* picker's "Use model default" footer. PR1's per-model invalid behavior
* is daemon-side warn/drop, not a synchronous DB clear, so the frontend
* has to surface the persisted state honestly.
*
* Reuses the shared runtime-models query so it hits the same 60s cache
* as the model picker; no extra round-trip on the inspector's hot path.
*/
export function ThinkingPropRow({
runtimeId,
runtimeOnline,
model,
value,
canEdit,
onChange,
}: {
runtimeId: string | null;
runtimeOnline: boolean;
model: string;
value: string;
canEdit: boolean;
onChange: (next: string) => Promise<void> | void;
}) {
const { t } = useT("agents");
const modelsQuery = useQuery(
runtimeModelsOptions(runtimeOnline ? runtimeId : null),
);
const models = modelsQuery.data?.models ?? [];
const entry = pickModelEntry(models, model);
const levels = entry?.thinking?.supported_levels ?? [];
if (levels.length === 0 && !value) return null;
return (
<PropRow label={t(($) => $.inspector.prop_thinking)} interactive={false}>
<ThinkingPicker
value={value}
levels={levels}
defaultLevel={entry?.thinking?.default_level}
canEdit={canEdit}
onChange={onChange}
/>
</PropRow>
);
}
function pickModelEntry(
models: RuntimeModel[],
model: string,
): RuntimeModel | undefined {
if (model) return models.find((m) => m.id === model);
return models.find((m) => m.default) ?? models[0];
}

View File

@@ -130,6 +130,7 @@
"section_skills": "Skills",
"prop_runtime": "Runtime",
"prop_model": "Model",
"prop_thinking": "Thinking",
"prop_visibility": "Visibility",
"prop_concurrency": "Concurrency",
"prop_owner": "Owner",
@@ -172,7 +173,12 @@
"model_custom_tooltip": "Use \"{{value}}\" as a custom model id",
"model_custom_use": "Use \"{{value}}\"",
"model_clear": "Clear (use provider default)",
"model_clear_title": "Clear and fall back to the runtime's provider default"
"model_clear_title": "Clear and fall back to the runtime's provider default",
"thinking_default": "Default",
"thinking_tooltip": "Thinking · {{value}}",
"thinking_default_badge": "default",
"thinking_clear": "Use model default",
"thinking_clear_title": "Clear and fall back to this model's default reasoning level"
},
"model_dropdown": {
"label": "Model",

View File

@@ -126,6 +126,7 @@
"section_skills": "skill",
"prop_runtime": "运行时",
"prop_model": "模型",
"prop_thinking": "思考",
"prop_visibility": "可见性",
"prop_concurrency": "并发",
"prop_owner": "所有者",
@@ -168,7 +169,12 @@
"model_custom_tooltip": "使用\"{{value}}\"作为自定义模型 ID",
"model_custom_use": "使用\"{{value}}\"",
"model_clear": "清除(使用提供方默认)",
"model_clear_title": "清除并回退到运行时的提供方默认"
"model_clear_title": "清除并回退到运行时的提供方默认",
"thinking_default": "默认",
"thinking_tooltip": "思考 · {{value}}",
"thinking_default_badge": "默认",
"thinking_clear": "使用模型默认",
"thinking_clear_title": "清除并回退到该模型的默认推理级别"
},
"model_dropdown": {
"label": "模型",