Compare commits

...

1 Commits

Author SHA1 Message Date
Jiang Bohan
8d42153560 feat(pricing): add DeepSeek, Kimi K2.6, and Zhipu GLM cost tracking (MUL-2606)
Adds rows to MODEL_PRICING for the Chinese-model SKUs listed on each
provider's official pricing page, so opencode / OpenRouter-routed
runtimes stop showing $0.00 in the dashboard for these models.

Sources (now cited inline above the table):

- DeepSeek: https://api-docs.deepseek.com/quick_start/pricing
- Moonshot: https://www.kimi.com/resources/kimi-k2-6-pricing
- Zhipu z.ai: https://docs.z.ai/guides/overview/pricing

Notes vs the closed PR #3170:

- Only SKUs that exist on the official pages are added. glm-z1*,
  deepseek-v4-pro at $0.55/$2.19, kimi-k2.6 at K2's tier were all
  hallucinated and are NOT included.
- deepseek-chat / deepseek-reasoner are routed by DeepSeek to
  deepseek-v4-flash, so they share the v4-flash rate.
- deepseek-v4-pro is priced at the post-promo standard rate
  ($1.74 / $3.48), not the 75%-off promo that ends 2026-05-31. Brief
  over-estimate beats a sudden 4x jump on June 1.
- glm-*-flash are priced at $0 because z.ai's free tiers are the
  literal published price.

Co-authored-by: multica-agent <github@multica.ai>
2026-05-25 15:13:32 +08:00
2 changed files with 139 additions and 7 deletions

View File

@@ -250,6 +250,98 @@ describe("estimateCost", () => {
}),
).toBe(0);
});
// The Chinese-model rates below are spot-checked against the literal
// numbers on the three official price sheets cited in MODEL_PRICING's
// header comment. Pinning them in tests is what catches a future edit
// that copies a price from a near-named neighbour by accident — the
// mistake the previous attempt (PR #3170, closed) made.
it("prices deepseek-v4-flash at the official $0.14/$0.28 with ~50× cache-hit discount", () => {
// 1M input × $0.14 + 1M output × $0.28 + 1M cache read × $0.0028 = $0.4228.
const cost = estimateCost({
...zeroUsage,
model: "deepseek-v4-flash",
input_tokens: 1_000_000,
output_tokens: 1_000_000,
cache_read_tokens: 1_000_000,
});
expect(cost).toBeCloseTo(0.14 + 0.28 + 0.0028, 5);
});
it("prices the deepseek-chat / deepseek-reasoner aliases at the same rate as deepseek-v4-flash", () => {
// The DeepSeek docs explicitly route both legacy names to v4-flash —
// they must hit the same numbers, not the older $0.27/$1.10 tier.
const flash = estimateCost({
...zeroUsage,
model: "deepseek-v4-flash",
input_tokens: 1_000_000,
});
expect(
estimateCost({
...zeroUsage,
model: "deepseek-chat",
input_tokens: 1_000_000,
}),
).toBeCloseTo(flash, 5);
expect(
estimateCost({
...zeroUsage,
model: "deepseek-reasoner",
input_tokens: 1_000_000,
}),
).toBeCloseTo(flash, 5);
});
it("prices kimi-k2.6 at the official $0.95 / $4.00 tier (not the K2 tier)", () => {
// Moonshot's K2.6 page is the only authoritative source today; K2.6 is
// explicitly NOT priced like K2. 1M input × $0.95 + 1M output × $4.00 = $4.95.
expect(
estimateCost({
...zeroUsage,
model: "kimi-k2.6",
input_tokens: 1_000_000,
output_tokens: 1_000_000,
}),
).toBeCloseTo(4.95, 5);
});
it("prices glm-5.1 at the official $1.4 / $4.4 tier", () => {
expect(
estimateCost({
...zeroUsage,
model: "glm-5.1",
input_tokens: 1_000_000,
output_tokens: 1_000_000,
}),
).toBeCloseTo(1.4 + 4.4, 5);
});
it("prices glm-4.5-flash at the official Free tier ($0)", () => {
// z.ai currently ships Free tiers for the *-flash family; $0 is the
// literal price on the page, not a placeholder. Anything non-zero
// here would mean we mis-copied a paid SKU's number into the row.
expect(isModelPriced("glm-4.5-flash")).toBe(true);
expect(isModelPriced("glm-4.7-flash")).toBe(true);
expect(
estimateCost({
...zeroUsage,
model: "glm-4.5-flash",
input_tokens: 1_000_000,
output_tokens: 1_000_000,
}),
).toBe(0);
});
it("recognises the provider-prefixed forms emitted by OpenRouter-style runtimes", () => {
// opencode + OpenRouter route IDs through as `<provider>/<model>`.
// canonicalCandidates strips the prefix; without this the rows above
// would only fire on bare IDs and the dashboard would still show
// $0.00 for the runtime that actually triggered this work.
expect(isModelPriced("deepseek/deepseek-v4-flash")).toBe(true);
expect(isModelPriced("moonshotai/kimi-k2.6")).toBe(true);
expect(isModelPriced("zhipuai/glm-5.1")).toBe(true);
expect(isModelPriced("zhipuai/glm-4.5-air")).toBe(true);
});
});
describe("isModelPriced", () => {

View File

@@ -123,16 +123,22 @@ export function formatTokens(n: number): string {
// Cost estimation
// ---------------------------------------------------------------------------
// Pricing per million tokens (USD). Anthropic figures sourced from
// https://platform.claude.com/docs/en/about-claude/pricing; OpenAI figures
// from https://openai.com/api/pricing — keep in sync when providers release
// new models or adjust prices.
// Pricing per million tokens (USD). Sources, each authoritative for the
// rows tagged under it — keep in sync when providers release new models
// or adjust prices.
//
// Anthropic: https://platform.claude.com/docs/en/about-claude/pricing
// OpenAI: https://openai.com/api/pricing
// DeepSeek: https://api-docs.deepseek.com/quick_start/pricing
// Moonshot: https://www.kimi.com/resources/kimi-k2-6-pricing
// Zhipu: https://docs.z.ai/guides/overview/pricing
//
// Anthropic's cacheWrite reflects the 5-minute cache TTL (1.25× input); the
// daemon reports cache_creation_input_tokens without TTL metadata, so 5m is
// the safest / cheapest assumption (matches the API default). OpenAI does
// not bill cache writes separately (cached input is just discounted on
// subsequent reads), so cacheWrite mirrors input there.
// the safest / cheapest assumption (matches the API default). OpenAI,
// DeepSeek, Moonshot and Zhipu do not bill cache writes separately (cached
// input is just discounted on subsequent reads), so cacheWrite mirrors
// input there.
//
// The resolver matches exact keys after stripping a trailing date snapshot
// (see `resolvePricing` below). It deliberately does NOT do startsWith
@@ -185,6 +191,40 @@ const MODEL_PRICING: Record<
// -- OpenAI: GPT-4o family (legacy, kept for runtimes still configured against it) --
"gpt-4o-mini": { input: 0.15, output: 0.60, cacheRead: 0.075, cacheWrite: 0.15 },
"gpt-4o": { input: 2.50, output: 10, cacheRead: 1.25, cacheWrite: 2.50 },
// -- DeepSeek (api-docs.deepseek.com/quick_start/pricing).
// The official catalog lists exactly two current SKUs; `deepseek-chat`
// and `deepseek-reasoner` are aliases that route to `deepseek-v4-flash`
// (non-thinking and thinking mode respectively) per the same page.
// `deepseek-v4-pro` is currently under a 75%-off promo that ends
// 2026-05-31 15:59 UTC; we price at the post-promo standard rate
// ($1.74/$3.48) so the dashboard does not jump 4× on June 1 — accept
// a brief over-estimate during the promo over a sudden cliff after it. --
"deepseek-v4-flash": { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
"deepseek-v4-pro": { input: 1.74, output: 3.48, cacheRead: 0.0145, cacheWrite: 1.74 },
"deepseek-chat": { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
"deepseek-reasoner": { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
// -- Moonshot Kimi (kimi.com/resources/kimi-k2-6-pricing).
// Only K2.6 is on the official price sheet today; earlier K2 variants
// are intentionally omitted until Moonshot publishes their rates. --
"kimi-k2.6": { input: 0.95, output: 4.00, cacheRead: 0.16, cacheWrite: 0.95 },
// -- Zhipu z.ai (docs.z.ai/guides/overview/pricing). Free flash tiers
// are priced at 0 so they resolve cleanly instead of falling through
// to the "unmapped" diagnostic. --
"glm-5.1": { input: 1.4, output: 4.4, cacheRead: 0.26, cacheWrite: 1.4 },
"glm-5": { input: 1.0, output: 3.2, cacheRead: 0.2, cacheWrite: 1.0 },
"glm-5-turbo": { input: 1.2, output: 4.0, cacheRead: 0.24, cacheWrite: 1.2 },
"glm-4.7": { input: 0.6, output: 2.2, cacheRead: 0.11, cacheWrite: 0.6 },
"glm-4.7-flashx": { input: 0.07, output: 0.4, cacheRead: 0.01, cacheWrite: 0.07 },
"glm-4.7-flash": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
"glm-4.6": { input: 0.6, output: 2.2, cacheRead: 0.11, cacheWrite: 0.6 },
"glm-4.5": { input: 0.6, output: 2.2, cacheRead: 0.11, cacheWrite: 0.6 },
"glm-4.5-x": { input: 2.2, output: 8.9, cacheRead: 0.45, cacheWrite: 2.2 },
"glm-4.5-air": { input: 0.2, output: 1.1, cacheRead: 0.03, cacheWrite: 0.2 },
"glm-4.5-airx": { input: 1.1, output: 4.5, cacheRead: 0.22, cacheWrite: 1.1 },
"glm-4.5-flash": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
};
// Resolve a model string to its pricing tier. Exact match, with four