feat(pricing): add DeepSeek, Kimi K2.6, and Zhipu GLM cost tracking (MUL-2606)

Adds rows to MODEL_PRICING for the Chinese-model SKUs listed on each provider's official pricing page, so opencode / OpenRouter-routed runtimes stop showing $0.00 in the dashboard for these models. Sources (now cited inline above the table): - DeepSeek: https://api-docs.deepseek.com/quick_start/pricing - Moonshot: https://www.kimi.com/resources/kimi-k2-6-pricing - Zhipu z.ai: https://docs.z.ai/guides/overview/pricing Notes vs the closed PR #3170: - Only SKUs that exist on the official pages are added. glm-z1*, deepseek-v4-pro at $0.55/$2.19, kimi-k2.6 at K2's tier were all hallucinated and are NOT included. - deepseek-chat / deepseek-reasoner are routed by DeepSeek to deepseek-v4-flash, so they share the v4-flash rate. - deepseek-v4-pro is priced at the post-promo standard rate ($1.74 / $3.48), not the 75%-off promo that ends 2026-05-31. Brief over-estimate beats a sudden 4x jump on June 1. - glm-*-flash are priced at $0 because z.ai's free tiers are the literal published price. Co-authored-by: multica-agent <github@multica.ai>
2026-06-24 16:09:19 +02:00 · 2026-05-25 15:13:32 +08:00
2 changed files with 139 additions and 7 deletions
--- a/packages/views/runtimes/utils.test.ts
+++ b/packages/views/runtimes/utils.test.ts
@@ -250,6 +250,98 @@ describe("estimateCost", () => {
      }),
    ).toBe(0);
  });
+
+  // The Chinese-model rates below are spot-checked against the literal
+  // numbers on the three official price sheets cited in MODEL_PRICING's
+  // header comment. Pinning them in tests is what catches a future edit
+  // that copies a price from a near-named neighbour by accident — the
+  // mistake the previous attempt (PR #3170, closed) made.
+  it("prices deepseek-v4-flash at the official $0.14/$0.28 with ~50× cache-hit discount", () => {
+    // 1M input × $0.14 + 1M output × $0.28 + 1M cache read × $0.0028 = $0.4228.
+    const cost = estimateCost({
+      ...zeroUsage,
+      model: "deepseek-v4-flash",
+      input_tokens: 1_000_000,
+      output_tokens: 1_000_000,
+      cache_read_tokens: 1_000_000,
+    });
+    expect(cost).toBeCloseTo(0.14 + 0.28 + 0.0028, 5);
+  });
+
+  it("prices the deepseek-chat / deepseek-reasoner aliases at the same rate as deepseek-v4-flash", () => {
+    // The DeepSeek docs explicitly route both legacy names to v4-flash —
+    // they must hit the same numbers, not the older $0.27/$1.10 tier.
+    const flash = estimateCost({
+      ...zeroUsage,
+      model: "deepseek-v4-flash",
+      input_tokens: 1_000_000,
+    });
+    expect(
+      estimateCost({
+        ...zeroUsage,
+        model: "deepseek-chat",
+        input_tokens: 1_000_000,
+      }),
+    ).toBeCloseTo(flash, 5);
+    expect(
+      estimateCost({
+        ...zeroUsage,
+        model: "deepseek-reasoner",
+        input_tokens: 1_000_000,
+      }),
+    ).toBeCloseTo(flash, 5);
+  });
+
+  it("prices kimi-k2.6 at the official $0.95 / $4.00 tier (not the K2 tier)", () => {
+    // Moonshot's K2.6 page is the only authoritative source today; K2.6 is
+    // explicitly NOT priced like K2. 1M input × $0.95 + 1M output × $4.00 = $4.95.
+    expect(
+      estimateCost({
+        ...zeroUsage,
+        model: "kimi-k2.6",
+        input_tokens: 1_000_000,
+        output_tokens: 1_000_000,
+      }),
+    ).toBeCloseTo(4.95, 5);
+  });
+
+  it("prices glm-5.1 at the official $1.4 / $4.4 tier", () => {
+    expect(
+      estimateCost({
+        ...zeroUsage,
+        model: "glm-5.1",
+        input_tokens: 1_000_000,
+        output_tokens: 1_000_000,
+      }),
+    ).toBeCloseTo(1.4 + 4.4, 5);
+  });
+
+  it("prices glm-4.5-flash at the official Free tier ($0)", () => {
+    // z.ai currently ships Free tiers for the *-flash family; $0 is the
+    // literal price on the page, not a placeholder. Anything non-zero
+    // here would mean we mis-copied a paid SKU's number into the row.
+    expect(isModelPriced("glm-4.5-flash")).toBe(true);
+    expect(isModelPriced("glm-4.7-flash")).toBe(true);
+    expect(
+      estimateCost({
+        ...zeroUsage,
+        model: "glm-4.5-flash",
+        input_tokens: 1_000_000,
+        output_tokens: 1_000_000,
+      }),
+    ).toBe(0);
+  });
+
+  it("recognises the provider-prefixed forms emitted by OpenRouter-style runtimes", () => {
+    // opencode + OpenRouter route IDs through as `<provider>/<model>`.
+    // canonicalCandidates strips the prefix; without this the rows above
+    // would only fire on bare IDs and the dashboard would still show
+    // $0.00 for the runtime that actually triggered this work.
+    expect(isModelPriced("deepseek/deepseek-v4-flash")).toBe(true);
+    expect(isModelPriced("moonshotai/kimi-k2.6")).toBe(true);
+    expect(isModelPriced("zhipuai/glm-5.1")).toBe(true);
+    expect(isModelPriced("zhipuai/glm-4.5-air")).toBe(true);
+  });
 });

 describe("isModelPriced", () => {
--- a/packages/views/runtimes/utils.ts
+++ b/packages/views/runtimes/utils.ts
@@ -123,16 +123,22 @@ export function formatTokens(n: number): string {
 // Cost estimation
 // ---------------------------------------------------------------------------

-// Pricing per million tokens (USD). Anthropic figures sourced from
-// https://platform.claude.com/docs/en/about-claude/pricing; OpenAI figures
-// from https://openai.com/api/pricing — keep in sync when providers release
-// new models or adjust prices.
+// Pricing per million tokens (USD). Sources, each authoritative for the
+// rows tagged under it — keep in sync when providers release new models
+// or adjust prices.
+//
+//   Anthropic: https://platform.claude.com/docs/en/about-claude/pricing
+//   OpenAI:    https://openai.com/api/pricing
+//   DeepSeek:  https://api-docs.deepseek.com/quick_start/pricing
+//   Moonshot:  https://www.kimi.com/resources/kimi-k2-6-pricing
+//   Zhipu:     https://docs.z.ai/guides/overview/pricing
 //
 // Anthropic's cacheWrite reflects the 5-minute cache TTL (1.25× input); the
 // daemon reports cache_creation_input_tokens without TTL metadata, so 5m is
-// the safest / cheapest assumption (matches the API default). OpenAI does
-// not bill cache writes separately (cached input is just discounted on
-// subsequent reads), so cacheWrite mirrors input there.
+// the safest / cheapest assumption (matches the API default). OpenAI,
+// DeepSeek, Moonshot and Zhipu do not bill cache writes separately (cached
+// input is just discounted on subsequent reads), so cacheWrite mirrors
+// input there.
 //
 // The resolver matches exact keys after stripping a trailing date snapshot
 // (see `resolvePricing` below). It deliberately does NOT do startsWith
@@ -185,6 +191,40 @@ const MODEL_PRICING: Record<
  // -- OpenAI: GPT-4o family (legacy, kept for runtimes still configured against it) --
  "gpt-4o-mini":        { input: 0.15, output: 0.60, cacheRead: 0.075, cacheWrite: 0.15 },
  "gpt-4o":             { input: 2.50, output: 10,   cacheRead: 1.25,  cacheWrite: 2.50 },
+
+  // -- DeepSeek (api-docs.deepseek.com/quick_start/pricing).
+  //    The official catalog lists exactly two current SKUs; `deepseek-chat`
+  //    and `deepseek-reasoner` are aliases that route to `deepseek-v4-flash`
+  //    (non-thinking and thinking mode respectively) per the same page.
+  //    `deepseek-v4-pro` is currently under a 75%-off promo that ends
+  //    2026-05-31 15:59 UTC; we price at the post-promo standard rate
+  //    ($1.74/$3.48) so the dashboard does not jump 4× on June 1 — accept
+  //    a brief over-estimate during the promo over a sudden cliff after it. --
+  "deepseek-v4-flash":  { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
+  "deepseek-v4-pro":    { input: 1.74, output: 3.48, cacheRead: 0.0145, cacheWrite: 1.74 },
+  "deepseek-chat":      { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
+  "deepseek-reasoner":  { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0.14 },
+
+  // -- Moonshot Kimi (kimi.com/resources/kimi-k2-6-pricing).
+  //    Only K2.6 is on the official price sheet today; earlier K2 variants
+  //    are intentionally omitted until Moonshot publishes their rates. --
+  "kimi-k2.6":          { input: 0.95, output: 4.00, cacheRead: 0.16,   cacheWrite: 0.95 },
+
+  // -- Zhipu z.ai (docs.z.ai/guides/overview/pricing). Free flash tiers
+  //    are priced at 0 so they resolve cleanly instead of falling through
+  //    to the "unmapped" diagnostic. --
+  "glm-5.1":            { input: 1.4,  output: 4.4,  cacheRead: 0.26,   cacheWrite: 1.4 },
+  "glm-5":              { input: 1.0,  output: 3.2,  cacheRead: 0.2,    cacheWrite: 1.0 },
+  "glm-5-turbo":        { input: 1.2,  output: 4.0,  cacheRead: 0.24,   cacheWrite: 1.2 },
+  "glm-4.7":            { input: 0.6,  output: 2.2,  cacheRead: 0.11,   cacheWrite: 0.6 },
+  "glm-4.7-flashx":     { input: 0.07, output: 0.4,  cacheRead: 0.01,   cacheWrite: 0.07 },
+  "glm-4.7-flash":      { input: 0,    output: 0,    cacheRead: 0,      cacheWrite: 0 },
+  "glm-4.6":            { input: 0.6,  output: 2.2,  cacheRead: 0.11,   cacheWrite: 0.6 },
+  "glm-4.5":            { input: 0.6,  output: 2.2,  cacheRead: 0.11,   cacheWrite: 0.6 },
+  "glm-4.5-x":          { input: 2.2,  output: 8.9,  cacheRead: 0.45,   cacheWrite: 2.2 },
+  "glm-4.5-air":        { input: 0.2,  output: 1.1,  cacheRead: 0.03,   cacheWrite: 0.2 },
+  "glm-4.5-airx":       { input: 1.1,  output: 4.5,  cacheRead: 0.22,   cacheWrite: 1.1 },
+  "glm-4.5-flash":      { input: 0,    output: 0,    cacheRead: 0,      cacheWrite: 0 },
 };

 // Resolve a model string to its pricing tier. Exact match, with four