From ea07b841aaa48f1c3774431687d34bdabb37a10a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 31 Jan 2026 15:07:46 +0000
Subject: [PATCH] fix: capture actual model and cost from API response

The model was showing "auto" instead of the actual model (e.g.,
"anthropic/claude-opus-4.5") because we were storing the requested
modelId rather than the model from the API response.

Now properly captures:
- chunk.model: the actual model that generated the response
- chunk.usage.cost or chunk.usage.cost_details.upstream_inference_cost:
  API-provided cost (falls back to calculated cost if not available)

https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
---
 src/services/llm/provider-manager.ts | 28 ++++++++++++++++++++++++++--
 src/services/llm/session-manager.ts  | 25 +++++++++++++++++--------
 src/types/llm.ts                     |  4 ++++
 3 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/src/services/llm/provider-manager.ts b/src/services/llm/provider-manager.ts
index 7e058df..97cdd89 100644
--- a/src/services/llm/provider-manager.ts
+++ b/src/services/llm/provider-manager.ts
@@ -377,8 +377,15 @@ class AIProviderManager {
 
     let usage: ChatStreamChunk["usage"] | undefined;
     let finishReason: ChatStreamChunk["finish_reason"] = null;
+    let actualModel: string | undefined;
+    let cost: number | undefined;
 
     for await (const chunk of stream) {
+      // Capture model from first chunk that has it (actual model may differ from requested)
+      if (chunk.model && !actualModel) {
+        actualModel = chunk.model;
+      }
+
       const choice = chunk.choices[0];
       if (!choice) continue;
 
@@ -421,16 +428,33 @@ class AIProviderManager {
         finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
       }
 
-      // Capture usage from final chunk
+      // Capture usage and cost from final chunk
       if (chunk.usage) {
         usage = {
           promptTokens: chunk.usage.prompt_tokens,
           completionTokens: chunk.usage.completion_tokens,
         };
+
+        // Extract cost from API response (OpenRouter/PPQ provide this)
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const usageAny = chunk.usage as any;
+        if (typeof usageAny.cost === "number") {
+          cost = usageAny.cost;
+        } else if (
+          usageAny.cost_details?.upstream_inference_cost !== undefined
+        ) {
+          cost = usageAny.cost_details.upstream_inference_cost;
+        }
       }
     }
 
-    yield { type: "done", usage, finish_reason: finishReason };
+    yield {
+      type: "done",
+      usage,
+      finish_reason: finishReason,
+      model: actualModel,
+      cost,
+    };
     return { success: true };
   }
 
diff --git a/src/services/llm/session-manager.ts b/src/services/llm/session-manager.ts
index c398057..9a4e43e 100644
--- a/src/services/llm/session-manager.ts
+++ b/src/services/llm/session-manager.ts
@@ -535,6 +535,8 @@ class ChatSessionManager {
     const toolCallsMap = new Map<number, StreamingToolCall>();
     let usage: ChatSessionState["usage"];
     let finishReason: ChatSessionState["finishReason"] = "stop";
+    let actualModel: string | undefined;
+    let apiCost: number | undefined;
 
     // Build messages array with system prompt prepended if present
     const messagesForAPI: LLMMessage[] = conversation.systemPrompt
@@ -654,6 +656,13 @@ class ChatSessionManager {
         if (chunk.finish_reason) {
           finishReason = chunk.finish_reason;
         }
+        // Capture actual model and API-provided cost
+        if (chunk.model) {
+          actualModel = chunk.model;
+        }
+        if (chunk.cost !== undefined) {
+          apiCost = chunk.cost;
+        }
         // Clear retry state on success
         const currentSession = this.getSession(conversationId);
         if (currentSession?.retryState) {
@@ -679,12 +688,12 @@ class ChatSessionManager {
       }
     }
 
-    // Calculate cost before creating message (so we can include it)
-    let cost = 0;
-    if (usage) {
+    // Use API-provided cost if available, otherwise calculate from pricing
+    let cost = apiCost;
+    if (cost === undefined && usage) {
       cost = await this.calculateCost(
         providerInstanceId,
-        modelId,
+        actualModel || modelId,
         usage.promptTokens,
         usage.completionTokens,
       );
@@ -696,10 +705,10 @@ class ChatSessionManager {
       role: "assistant",
       content: streaming.content,
       timestamp: Date.now(),
-      // Local-only fields for cost display
-      model: modelId,
+      // Local-only fields for cost display (actual model from API, not requested model)
+      model: actualModel || modelId,
       usage,
-      cost: cost > 0 ? cost : undefined,
+      cost: cost !== undefined && cost > 0 ? cost : undefined,
     };
 
     // Add optional fields if present
@@ -726,7 +735,7 @@ class ChatSessionManager {
       finishReason,
       toolCalls: streaming.tool_calls,
       usage,
-      cost,
+      cost: cost ?? 0,
     };
   }
 
diff --git a/src/types/llm.ts b/src/types/llm.ts
index 00fe454..5b3b50d 100644
--- a/src/types/llm.ts
+++ b/src/types/llm.ts
@@ -81,10 +81,14 @@ export interface ChatStreamChunk {
   /** Finish reason from the API */
   finish_reason?: "stop" | "length" | "tool_calls" | null;
   error?: string;
+  /** Actual model that generated the response (may differ from requested) */
+  model?: string;
   usage?: {
     promptTokens: number;
     completionTokens: number;
   };
+  /** Cost from API response (USD) - preferred over calculated cost */
+  cost?: number;
   /** Retry information for error recovery */
   retry?: {
     /** Current attempt number (1-based) */