From ea07b841aaa48f1c3774431687d34bdabb37a10a Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 31 Jan 2026 15:07:46 +0000 Subject: [PATCH] fix: capture actual model and cost from API response The model was showing "auto" instead of the actual model (e.g., "anthropic/claude-opus-4.5") because we were storing the requested modelId rather than the model from the API response. Now properly captures: - chunk.model: the actual model that generated the response - chunk.usage.cost or chunk.usage.cost_details.upstream_inference_cost: API-provided cost (falls back to calculated cost if not available) https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC --- src/services/llm/provider-manager.ts | 28 ++++++++++++++++++++++++++-- src/services/llm/session-manager.ts | 25 +++++++++++++++++-------- src/types/llm.ts | 4 ++++ 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/services/llm/provider-manager.ts b/src/services/llm/provider-manager.ts index 7e058df..97cdd89 100644 --- a/src/services/llm/provider-manager.ts +++ b/src/services/llm/provider-manager.ts @@ -377,8 +377,15 @@ class AIProviderManager { let usage: ChatStreamChunk["usage"] | undefined; let finishReason: ChatStreamChunk["finish_reason"] = null; + let actualModel: string | undefined; + let cost: number | undefined; for await (const chunk of stream) { + // Capture model from first chunk that has it (actual model may differ from requested) + if (chunk.model && !actualModel) { + actualModel = chunk.model; + } + const choice = chunk.choices[0]; if (!choice) continue; @@ -421,16 +428,33 @@ class AIProviderManager { finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"]; } - // Capture usage from final chunk + // Capture usage and cost from final chunk if (chunk.usage) { usage = { promptTokens: chunk.usage.prompt_tokens, completionTokens: chunk.usage.completion_tokens, }; + + // Extract cost from API response (OpenRouter/PPQ provide this) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const usageAny = chunk.usage as any; + if (typeof usageAny.cost === "number") { + cost = usageAny.cost; + } else if ( + usageAny.cost_details?.upstream_inference_cost !== undefined + ) { + cost = usageAny.cost_details.upstream_inference_cost; + } } } - yield { type: "done", usage, finish_reason: finishReason }; + yield { + type: "done", + usage, + finish_reason: finishReason, + model: actualModel, + cost, + }; return { success: true }; } diff --git a/src/services/llm/session-manager.ts b/src/services/llm/session-manager.ts index c398057..9a4e43e 100644 --- a/src/services/llm/session-manager.ts +++ b/src/services/llm/session-manager.ts @@ -535,6 +535,8 @@ class ChatSessionManager { const toolCallsMap = new Map(); let usage: ChatSessionState["usage"]; let finishReason: ChatSessionState["finishReason"] = "stop"; + let actualModel: string | undefined; + let apiCost: number | undefined; // Build messages array with system prompt prepended if present const messagesForAPI: LLMMessage[] = conversation.systemPrompt @@ -654,6 +656,13 @@ class ChatSessionManager { if (chunk.finish_reason) { finishReason = chunk.finish_reason; } + // Capture actual model and API-provided cost + if (chunk.model) { + actualModel = chunk.model; + } + if (chunk.cost !== undefined) { + apiCost = chunk.cost; + } // Clear retry state on success const currentSession = this.getSession(conversationId); if (currentSession?.retryState) { @@ -679,12 +688,12 @@ class ChatSessionManager { } } - // Calculate cost before creating message (so we can include it) - let cost = 0; - if (usage) { + // Use API-provided cost if available, otherwise calculate from pricing + let cost = apiCost; + if (cost === undefined && usage) { cost = await this.calculateCost( providerInstanceId, - modelId, + actualModel || modelId, usage.promptTokens, usage.completionTokens, ); @@ -696,10 +705,10 @@ class ChatSessionManager { role: "assistant", content: streaming.content, timestamp: Date.now(), - // Local-only fields for cost display - model: modelId, + // Local-only fields for cost display (actual model from API, not requested model) + model: actualModel || modelId, usage, - cost: cost > 0 ? cost : undefined, + cost: cost !== undefined && cost > 0 ? cost : undefined, }; // Add optional fields if present @@ -726,7 +735,7 @@ class ChatSessionManager { finishReason, toolCalls: streaming.tool_calls, usage, - cost, + cost: cost ?? 0, }; } diff --git a/src/types/llm.ts b/src/types/llm.ts index 00fe454..5b3b50d 100644 --- a/src/types/llm.ts +++ b/src/types/llm.ts @@ -81,10 +81,14 @@ export interface ChatStreamChunk { /** Finish reason from the API */ finish_reason?: "stop" | "length" | "tool_calls" | null; error?: string; + /** Actual model that generated the response (may differ from requested) */ + model?: string; usage?: { promptTokens: number; completionTokens: number; }; + /** Cost from API response (USD) - preferred over calculated cost */ + cost?: number; /** Retry information for error recovery */ retry?: { /** Current attempt number (1-based) */