mirror of
https://github.com/multica-ai/multica.git
synced 2026-07-05 13:29:44 +02:00
fix(runtimes): price OpenAI Codex / GPT models so cost stops showing $0 (#2334)
* fix(runtimes): price OpenAI Codex / GPT models so cost stops showing $0 The runtime detail / usage charts compute cost client-side from MODEL_PRICING, but the table only had Claude entries. Codex CLI sessions report models like gpt-5-codex / gpt-5, so estimateCost() returned 0 for every Codex runtime — the dashboard read $0 even on runtimes with billions of tokens consumed. Add pricing rows for the GPT-5 family (incl. -codex/-mini/-nano), the o-series reasoning models, and GPT-4o, ordered so the startsWith() fallback resolves the more-specific variants first. Cover the new entries with a small unit test for utils.ts. Co-authored-by: multica-agent <github@multica.ai> * fix(runtimes): require explicit price rows for catalog SKUs (no startsWith fallback) Per review: the previous startsWith() fallback let `gpt-5.5*` / `gpt-5.4*` inherit the lower-tier `gpt-5` price. Address by: - Add explicit rows for every dotted Codex catalog SKU listed in server/pkg/agent/models.go: gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex. - Drop the startsWith fallback in resolvePricing entirely. Anything not exactly matching a row (after date-snapshot stripping) is now reported as unmapped — the diagnostic surfaces it rather than silently absorbing it into a near-named relative. - Extend the date-strip regex to also handle `2025-08-07`-style dashes (OpenAI snapshot format) in addition to the `20250929` Anthropic format. - Tests cover dotted SKUs at their own tier, gpt-5-2025-08-07 stripping, and explicitly assert that gpt-5.5-mini (catalog SKU without a published OpenAI price) is unmapped instead of borrowing gpt-5.5's row. Co-authored-by: multica-agent <github@multica.ai> --------- Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
132
packages/views/runtimes/utils.test.ts
Normal file
132
packages/views/runtimes/utils.test.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
|
||||
import { collectUnmappedModels, estimateCost, isModelPriced } from "./utils";
|
||||
|
||||
const zeroUsage = {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
cache_read_tokens: 0,
|
||||
cache_write_tokens: 0,
|
||||
};
|
||||
|
||||
describe("estimateCost", () => {
|
||||
it("prices the canonical Anthropic Sonnet 4.6 SKU", () => {
|
||||
const cost = estimateCost({
|
||||
...zeroUsage,
|
||||
model: "claude-sonnet-4-6",
|
||||
input_tokens: 1_000_000,
|
||||
output_tokens: 1_000_000,
|
||||
});
|
||||
// 1M × $3 input + 1M × $15 output = $18.
|
||||
expect(cost).toBeCloseTo(18, 5);
|
||||
});
|
||||
|
||||
it("prices a Codex CLI session reporting gpt-5-codex", () => {
|
||||
const cost = estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5-codex",
|
||||
input_tokens: 1_000_000,
|
||||
output_tokens: 1_000_000,
|
||||
cache_read_tokens: 2_000_000,
|
||||
});
|
||||
// 1M × $1.25 + 1M × $10 + 2M × $0.125 = $11.50.
|
||||
expect(cost).toBeCloseTo(11.5, 5);
|
||||
});
|
||||
|
||||
it("strips dated snapshots before resolving (gpt-5-2025-08-07 → gpt-5)", () => {
|
||||
const cost = estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5-2025-08-07",
|
||||
input_tokens: 1_000_000,
|
||||
});
|
||||
expect(cost).toBeCloseTo(1.25, 5);
|
||||
});
|
||||
|
||||
it("prices each dotted Codex catalog SKU at its own tier, not gpt-5", () => {
|
||||
// Every dotted minor version is priced independently. The resolver does
|
||||
// exact-match-after-date-strip (no startsWith fallback), so each row
|
||||
// must exist on its own.
|
||||
expect(
|
||||
estimateCost({ ...zeroUsage, model: "gpt-5.5", input_tokens: 1_000_000 }),
|
||||
).toBeCloseTo(5, 5);
|
||||
expect(
|
||||
estimateCost({ ...zeroUsage, model: "gpt-5.4", output_tokens: 1_000_000 }),
|
||||
).toBeCloseTo(15, 5);
|
||||
expect(
|
||||
estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5.4-mini",
|
||||
input_tokens: 1_000_000,
|
||||
output_tokens: 1_000_000,
|
||||
}),
|
||||
).toBeCloseTo(0.75 + 4.5, 5);
|
||||
expect(
|
||||
estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5.3-codex",
|
||||
input_tokens: 1_000_000,
|
||||
output_tokens: 1_000_000,
|
||||
}),
|
||||
).toBeCloseTo(1.75 + 14, 5);
|
||||
});
|
||||
|
||||
it("flags catalog SKUs without a published price (gpt-5.5-mini) as unmapped", () => {
|
||||
// `gpt-5.5-mini` is in the Codex catalog but OpenAI hasn't published a
|
||||
// public rate. We refuse to absorb it into `gpt-5.5` — the diagnostic
|
||||
// surfaces it instead so the team knows to add an explicit row.
|
||||
expect(isModelPriced("gpt-5.5-mini")).toBe(false);
|
||||
expect(
|
||||
estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5.5-mini",
|
||||
input_tokens: 1_000_000,
|
||||
}),
|
||||
).toBe(0);
|
||||
});
|
||||
|
||||
it("flags hypothetical future variants as unmapped instead of inheriting a relative's price", () => {
|
||||
// No exact match → unmapped. Covers both dotted families (`gpt-5.99-codex`)
|
||||
// and unknown sub-variants (`gpt-5-foo`); both must miss rather than
|
||||
// silently inherit `gpt-5` pricing.
|
||||
expect(isModelPriced("gpt-5.99-codex")).toBe(false);
|
||||
expect(isModelPriced("gpt-5-foo")).toBe(false);
|
||||
expect(
|
||||
estimateCost({
|
||||
...zeroUsage,
|
||||
model: "gpt-5.99-codex",
|
||||
input_tokens: 1_000_000,
|
||||
}),
|
||||
).toBe(0);
|
||||
});
|
||||
|
||||
it("returns 0 for a genuinely unknown model so the UI can flag it", () => {
|
||||
expect(
|
||||
estimateCost({
|
||||
...zeroUsage,
|
||||
model: "totally-made-up-model",
|
||||
input_tokens: 1_000_000,
|
||||
}),
|
||||
).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isModelPriced", () => {
|
||||
it("recognises both Claude and Codex/GPT families", () => {
|
||||
expect(isModelPriced("claude-sonnet-4-6")).toBe(true);
|
||||
expect(isModelPriced("gpt-5-codex")).toBe(true);
|
||||
expect(isModelPriced("gpt-5-mini")).toBe(true);
|
||||
expect(isModelPriced("o3")).toBe(true);
|
||||
expect(isModelPriced("totally-made-up-model")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("collectUnmappedModels", () => {
|
||||
it("only surfaces names that miss every pricing tier", () => {
|
||||
const rows = [
|
||||
{ ...zeroUsage, model: "claude-sonnet-4-6" },
|
||||
{ ...zeroUsage, model: "gpt-5-codex" },
|
||||
{ ...zeroUsage, model: "fictional-model-x" },
|
||||
];
|
||||
expect(collectUnmappedModels(rows)).toEqual(["fictional-model-x"]);
|
||||
});
|
||||
});
|
||||
@@ -114,21 +114,29 @@ export function formatTokens(n: number): string {
|
||||
// Cost estimation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Pricing per million tokens (USD). Sourced from
|
||||
// https://platform.claude.com/docs/en/about-claude/pricing — keep in sync
|
||||
// when Anthropic releases new models or adjusts prices. cacheWrite reflects
|
||||
// the 5-minute cache TTL (1.25× input); the daemon reports
|
||||
// cache_creation_input_tokens without TTL metadata, so 5m is the safest /
|
||||
// cheapest assumption (matches the API default).
|
||||
// Pricing per million tokens (USD). Anthropic figures sourced from
|
||||
// https://platform.claude.com/docs/en/about-claude/pricing; OpenAI figures
|
||||
// from https://openai.com/api/pricing — keep in sync when providers release
|
||||
// new models or adjust prices.
|
||||
//
|
||||
// Iteration order matters: the resolver's startsWith() fallback walks this
|
||||
// object in insertion order, so MORE SPECIFIC keys (e.g. claude-sonnet-4-5)
|
||||
// must precede SHORTER prefixes (e.g. claude-sonnet-4) of the same family.
|
||||
// Anthropic's cacheWrite reflects the 5-minute cache TTL (1.25× input); the
|
||||
// daemon reports cache_creation_input_tokens without TTL metadata, so 5m is
|
||||
// the safest / cheapest assumption (matches the API default). OpenAI does
|
||||
// not bill cache writes separately (cached input is just discounted on
|
||||
// subsequent reads), so cacheWrite mirrors input there.
|
||||
//
|
||||
// The resolver matches exact keys after stripping a trailing date snapshot
|
||||
// (see `resolvePricing` below). It deliberately does NOT do startsWith
|
||||
// fallbacks: every catalog SKU needs its own row. That keeps unfamiliar
|
||||
// variants (`gpt-5.5-mini`, hypothetical `gpt-5.4-foo`) from silently
|
||||
// inheriting the price of a near-named relative; they surface in the
|
||||
// unmapped diagnostic instead. Mirror new entries in
|
||||
// `server/pkg/agent/models.go` so the catalog and pricing stay in sync.
|
||||
const MODEL_PRICING: Record<
|
||||
string,
|
||||
{ input: number; output: number; cacheRead: number; cacheWrite: number }
|
||||
> = {
|
||||
// -- Current generation (4.5+ — Opus dropped from 15/75 to 5/25 here) --
|
||||
// -- Anthropic: current generation (4.5+ — Opus dropped from 15/75 to 5/25 here) --
|
||||
"claude-haiku-4-5": { input: 1, output: 5, cacheRead: 0.10, cacheWrite: 1.25 },
|
||||
"claude-sonnet-4-5": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 },
|
||||
"claude-sonnet-4-6": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 },
|
||||
@@ -136,36 +144,55 @@ const MODEL_PRICING: Record<
|
||||
"claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.50, cacheWrite: 6.25 },
|
||||
"claude-opus-4-7": { input: 5, output: 25, cacheRead: 0.50, cacheWrite: 6.25 },
|
||||
|
||||
// -- Pre-4.5 Opus (legacy, still served at original price tier) --
|
||||
// -- Anthropic: pre-4.5 Opus (legacy, still served at original price tier) --
|
||||
"claude-opus-4-1": { input: 15, output: 75, cacheRead: 1.50, cacheWrite: 18.75 },
|
||||
"claude-opus-4": { input: 15, output: 75, cacheRead: 1.50, cacheWrite: 18.75 },
|
||||
|
||||
// -- Sonnet 4.0 (deprecated; same price as the 4.x family) --
|
||||
// -- Anthropic: Sonnet 4.0 (deprecated; same price as the 4.x family) --
|
||||
"claude-sonnet-4": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 },
|
||||
|
||||
// -- Older Haiku tier (defensive entry for the rare runtime still on it) --
|
||||
// -- Anthropic: older Haiku tier (defensive entry for the rare runtime still on it) --
|
||||
"claude-haiku-3-5": { input: 0.80, output: 4, cacheRead: 0.08, cacheWrite: 1.00 },
|
||||
|
||||
// -- OpenAI: dotted-minor Codex catalog SKUs. Each generation is priced
|
||||
// independently — no fallback to `gpt-5`. Entries track
|
||||
// `server/pkg/agent/models.go` (Codex provider list).
|
||||
"gpt-5.5": { input: 5, output: 30, cacheRead: 0.50, cacheWrite: 5 },
|
||||
"gpt-5.4-mini": { input: 0.75, output: 4.50, cacheRead: 0.075, cacheWrite: 0.75 },
|
||||
"gpt-5.4": { input: 2.50, output: 15, cacheRead: 0.25, cacheWrite: 2.50 },
|
||||
"gpt-5.3-codex": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 1.75 },
|
||||
|
||||
// -- OpenAI: GPT-5 family (Codex CLI's default is gpt-5-codex; -codex/-mini/-nano variants priced per OpenAI tiers) --
|
||||
"gpt-5-codex": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 1.25 },
|
||||
"gpt-5-mini": { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0.25 },
|
||||
"gpt-5-nano": { input: 0.05, output: 0.40, cacheRead: 0.005, cacheWrite: 0.05 },
|
||||
"gpt-5": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 1.25 },
|
||||
|
||||
// -- OpenAI: o-series reasoning models --
|
||||
"o3-mini": { input: 1.10, output: 4.40, cacheRead: 0.55, cacheWrite: 1.10 },
|
||||
"o3": { input: 2, output: 8, cacheRead: 0.50, cacheWrite: 2 },
|
||||
"o4-mini": { input: 1.10, output: 4.40, cacheRead: 0.275, cacheWrite: 1.10 },
|
||||
|
||||
// -- OpenAI: GPT-4o family (legacy, kept for runtimes still configured against it) --
|
||||
"gpt-4o-mini": { input: 0.15, output: 0.60, cacheRead: 0.075, cacheWrite: 0.15 },
|
||||
"gpt-4o": { input: 2.50, output: 10, cacheRead: 1.25, cacheWrite: 2.50 },
|
||||
};
|
||||
|
||||
// Resolve a model string to its pricing tier. Two layers of fallback so the
|
||||
// daemon-reported model name doesn't have to match the keys exactly:
|
||||
// 1. Exact match.
|
||||
// 2. Strip a trailing date / "latest" tag (Claude Code typically reports
|
||||
// `claude-sonnet-4-5-20250929` — the date is volatile, the family is
|
||||
// what we price). Try exact match again on the stripped name.
|
||||
// 3. startsWith on either the raw or stripped name.
|
||||
// Anything that misses all three is genuinely unknown; we return undefined
|
||||
// so callers can distinguish "$0 spend" from "spent but model not priced".
|
||||
// Resolve a model string to its pricing tier. Exact match, with one
|
||||
// tolerance: providers ship dated snapshots (`claude-sonnet-4-5-20250929`,
|
||||
// `gpt-5-2025-08-07`) where the family is what we price and the date is
|
||||
// volatile, so we strip a trailing date / "latest" tag and try again.
|
||||
// Anything still unmapped after that is genuinely unknown; return
|
||||
// undefined so callers can distinguish "$0 spend" from "spent but model
|
||||
// not priced". No startsWith fallback: variants like `gpt-5.5-mini` must
|
||||
// have their own row to be priced (otherwise they'd inherit `gpt-5.5`).
|
||||
function resolvePricing(model: string) {
|
||||
if (!model) return undefined;
|
||||
if (MODEL_PRICING[model]) return MODEL_PRICING[model];
|
||||
|
||||
const stripped = model.replace(/-(20\d{6}|latest)$/, "");
|
||||
const stripped = model.replace(/-(20\d{2}-\d{2}-\d{2}|20\d{6}|latest)$/, "");
|
||||
if (stripped !== model && MODEL_PRICING[stripped]) return MODEL_PRICING[stripped];
|
||||
|
||||
for (const [key, p] of Object.entries(MODEL_PRICING)) {
|
||||
if (model.startsWith(key) || stripped.startsWith(key)) return p;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user