fix: capture actual model and cost from API response

The model was showing "auto" instead of the actual model (e.g.,
"anthropic/claude-opus-4.5") because we were storing the requested
modelId rather than the model from the API response.

Now properly captures:
- chunk.model: the actual model that generated the response
- chunk.usage.cost or chunk.usage.cost_details.upstream_inference_cost:
  API-provided cost (falls back to calculated cost if not available)

https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
This commit is contained in:
Claude
2026-01-31 15:07:46 +00:00
parent 15d353e6e6
commit ea07b841aa
3 changed files with 47 additions and 10 deletions

View File

@@ -377,8 +377,15 @@ class AIProviderManager {
let usage: ChatStreamChunk["usage"] | undefined;
let finishReason: ChatStreamChunk["finish_reason"] = null;
let actualModel: string | undefined;
let cost: number | undefined;
for await (const chunk of stream) {
// Capture model from first chunk that has it (actual model may differ from requested)
if (chunk.model && !actualModel) {
actualModel = chunk.model;
}
const choice = chunk.choices[0];
if (!choice) continue;
@@ -421,16 +428,33 @@ class AIProviderManager {
finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
}
// Capture usage from final chunk
// Capture usage and cost from final chunk
if (chunk.usage) {
usage = {
promptTokens: chunk.usage.prompt_tokens,
completionTokens: chunk.usage.completion_tokens,
};
// Extract cost from API response (OpenRouter/PPQ provide this)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const usageAny = chunk.usage as any;
if (typeof usageAny.cost === "number") {
cost = usageAny.cost;
} else if (
usageAny.cost_details?.upstream_inference_cost !== undefined
) {
cost = usageAny.cost_details.upstream_inference_cost;
}
}
}
yield { type: "done", usage, finish_reason: finishReason };
yield {
type: "done",
usage,
finish_reason: finishReason,
model: actualModel,
cost,
};
return { success: true };
}

View File

@@ -535,6 +535,8 @@ class ChatSessionManager {
const toolCallsMap = new Map<number, StreamingToolCall>();
let usage: ChatSessionState["usage"];
let finishReason: ChatSessionState["finishReason"] = "stop";
let actualModel: string | undefined;
let apiCost: number | undefined;
// Build messages array with system prompt prepended if present
const messagesForAPI: LLMMessage[] = conversation.systemPrompt
@@ -654,6 +656,13 @@ class ChatSessionManager {
if (chunk.finish_reason) {
finishReason = chunk.finish_reason;
}
// Capture actual model and API-provided cost
if (chunk.model) {
actualModel = chunk.model;
}
if (chunk.cost !== undefined) {
apiCost = chunk.cost;
}
// Clear retry state on success
const currentSession = this.getSession(conversationId);
if (currentSession?.retryState) {
@@ -679,12 +688,12 @@ class ChatSessionManager {
}
}
// Calculate cost before creating message (so we can include it)
let cost = 0;
if (usage) {
// Use API-provided cost if available, otherwise calculate from pricing
let cost = apiCost;
if (cost === undefined && usage) {
cost = await this.calculateCost(
providerInstanceId,
modelId,
actualModel || modelId,
usage.promptTokens,
usage.completionTokens,
);
@@ -696,10 +705,10 @@ class ChatSessionManager {
role: "assistant",
content: streaming.content,
timestamp: Date.now(),
// Local-only fields for cost display
model: modelId,
// Local-only fields for cost display (actual model from API, not requested model)
model: actualModel || modelId,
usage,
cost: cost > 0 ? cost : undefined,
cost: cost !== undefined && cost > 0 ? cost : undefined,
};
// Add optional fields if present
@@ -726,7 +735,7 @@ class ChatSessionManager {
finishReason,
toolCalls: streaming.tool_calls,
usage,
cost,
cost: cost ?? 0,
};
}

View File

@@ -81,10 +81,14 @@ export interface ChatStreamChunk {
/** Finish reason from the API */
finish_reason?: "stop" | "length" | "tool_calls" | null;
error?: string;
/** Actual model that generated the response (may differ from requested) */
model?: string;
usage?: {
promptTokens: number;
completionTokens: number;
};
/** Cost from API response (USD) - preferred over calculated cost */
cost?: number;
/** Retry information for error recovery */
retry?: {
/** Current attempt number (1-based) */