fix: capture actual model and cost from API response

The model was showing "auto" instead of the actual model (e.g., "anthropic/claude-opus-4.5") because we were storing the requested modelId rather than the model from the API response. Now properly captures: - chunk.model: the actual model that generated the response - chunk.usage.cost or chunk.usage.cost_details.upstream_inference_cost: API-provided cost (falls back to calculated cost if not available) https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
2026-06-05 10:11:12 +02:00 · 2026-01-31 15:07:46 +00:00
parent 15d353e6e6
commit ea07b841aa
3 changed files with 47 additions and 10 deletions
--- a/src/services/llm/provider-manager.ts
+++ b/src/services/llm/provider-manager.ts
@@ -377,8 +377,15 @@ class AIProviderManager {

    let usage: ChatStreamChunk["usage"] | undefined;
    let finishReason: ChatStreamChunk["finish_reason"] = null;
+    let actualModel: string | undefined;
+    let cost: number | undefined;

    for await (const chunk of stream) {
+      // Capture model from first chunk that has it (actual model may differ from requested)
+      if (chunk.model && !actualModel) {
+        actualModel = chunk.model;
+      }
+
      const choice = chunk.choices[0];
      if (!choice) continue;

@@ -421,16 +428,33 @@ class AIProviderManager {
        finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
      }

-      // Capture usage from final chunk
+      // Capture usage and cost from final chunk
      if (chunk.usage) {
        usage = {
          promptTokens: chunk.usage.prompt_tokens,
          completionTokens: chunk.usage.completion_tokens,
        };
+
+        // Extract cost from API response (OpenRouter/PPQ provide this)
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const usageAny = chunk.usage as any;
+        if (typeof usageAny.cost === "number") {
+          cost = usageAny.cost;
+        } else if (
+          usageAny.cost_details?.upstream_inference_cost !== undefined
+        ) {
+          cost = usageAny.cost_details.upstream_inference_cost;
+        }
      }
    }

-    yield { type: "done", usage, finish_reason: finishReason };
+    yield {
+      type: "done",
+      usage,
+      finish_reason: finishReason,
+      model: actualModel,
+      cost,
+    };
    return { success: true };
  }

--- a/src/services/llm/session-manager.ts
+++ b/src/services/llm/session-manager.ts
@@ -535,6 +535,8 @@ class ChatSessionManager {
    const toolCallsMap = new Map<number, StreamingToolCall>();
    let usage: ChatSessionState["usage"];
    let finishReason: ChatSessionState["finishReason"] = "stop";
+    let actualModel: string | undefined;
+    let apiCost: number | undefined;

    // Build messages array with system prompt prepended if present
    const messagesForAPI: LLMMessage[] = conversation.systemPrompt
@@ -654,6 +656,13 @@ class ChatSessionManager {
        if (chunk.finish_reason) {
          finishReason = chunk.finish_reason;
        }
+        // Capture actual model and API-provided cost
+        if (chunk.model) {
+          actualModel = chunk.model;
+        }
+        if (chunk.cost !== undefined) {
+          apiCost = chunk.cost;
+        }
        // Clear retry state on success
        const currentSession = this.getSession(conversationId);
        if (currentSession?.retryState) {
@@ -679,12 +688,12 @@ class ChatSessionManager {
      }
    }

-    // Calculate cost before creating message (so we can include it)
-    let cost = 0;
-    if (usage) {
+    // Use API-provided cost if available, otherwise calculate from pricing
+    let cost = apiCost;
+    if (cost === undefined && usage) {
      cost = await this.calculateCost(
        providerInstanceId,
-        modelId,
+        actualModel || modelId,
        usage.promptTokens,
        usage.completionTokens,
      );
@@ -696,10 +705,10 @@ class ChatSessionManager {
      role: "assistant",
      content: streaming.content,
      timestamp: Date.now(),
-      // Local-only fields for cost display
-      model: modelId,
+      // Local-only fields for cost display (actual model from API, not requested model)
+      model: actualModel || modelId,
      usage,
-      cost: cost > 0 ? cost : undefined,
+      cost: cost !== undefined && cost > 0 ? cost : undefined,
    };

    // Add optional fields if present
@@ -726,7 +735,7 @@ class ChatSessionManager {
      finishReason,
      toolCalls: streaming.tool_calls,
      usage,
-      cost,
+      cost: cost ?? 0,
    };
  }

--- a/src/types/llm.ts
+++ b/src/types/llm.ts
@@ -81,10 +81,14 @@ export interface ChatStreamChunk {
  /** Finish reason from the API */
  finish_reason?: "stop" | "length" | "tool_calls" | null;
  error?: string;
+  /** Actual model that generated the response (may differ from requested) */
+  model?: string;
  usage?: {
    promptTokens: number;
    completionTokens: number;
  };
+  /** Cost from API response (USD) - preferred over calculated cost */
+  cost?: number;
  /** Retry information for error recovery */
  retry?: {
    /** Current attempt number (1-based) */