mirror of
https://github.com/purrgrammer/grimoire.git
synced 2026-04-12 08:27:27 +02:00
fix: capture actual model and cost from API response
The model was showing "auto" instead of the actual model (e.g., "anthropic/claude-opus-4.5") because we were storing the requested modelId rather than the model from the API response. Now properly captures: - chunk.model: the actual model that generated the response - chunk.usage.cost or chunk.usage.cost_details.upstream_inference_cost: API-provided cost (falls back to calculated cost if not available) https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
This commit is contained in:
@@ -377,8 +377,15 @@ class AIProviderManager {
|
||||
|
||||
let usage: ChatStreamChunk["usage"] | undefined;
|
||||
let finishReason: ChatStreamChunk["finish_reason"] = null;
|
||||
let actualModel: string | undefined;
|
||||
let cost: number | undefined;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
// Capture model from first chunk that has it (actual model may differ from requested)
|
||||
if (chunk.model && !actualModel) {
|
||||
actualModel = chunk.model;
|
||||
}
|
||||
|
||||
const choice = chunk.choices[0];
|
||||
if (!choice) continue;
|
||||
|
||||
@@ -421,16 +428,33 @@ class AIProviderManager {
|
||||
finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
|
||||
}
|
||||
|
||||
// Capture usage from final chunk
|
||||
// Capture usage and cost from final chunk
|
||||
if (chunk.usage) {
|
||||
usage = {
|
||||
promptTokens: chunk.usage.prompt_tokens,
|
||||
completionTokens: chunk.usage.completion_tokens,
|
||||
};
|
||||
|
||||
// Extract cost from API response (OpenRouter/PPQ provide this)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const usageAny = chunk.usage as any;
|
||||
if (typeof usageAny.cost === "number") {
|
||||
cost = usageAny.cost;
|
||||
} else if (
|
||||
usageAny.cost_details?.upstream_inference_cost !== undefined
|
||||
) {
|
||||
cost = usageAny.cost_details.upstream_inference_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: "done", usage, finish_reason: finishReason };
|
||||
yield {
|
||||
type: "done",
|
||||
usage,
|
||||
finish_reason: finishReason,
|
||||
model: actualModel,
|
||||
cost,
|
||||
};
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
|
||||
@@ -535,6 +535,8 @@ class ChatSessionManager {
|
||||
const toolCallsMap = new Map<number, StreamingToolCall>();
|
||||
let usage: ChatSessionState["usage"];
|
||||
let finishReason: ChatSessionState["finishReason"] = "stop";
|
||||
let actualModel: string | undefined;
|
||||
let apiCost: number | undefined;
|
||||
|
||||
// Build messages array with system prompt prepended if present
|
||||
const messagesForAPI: LLMMessage[] = conversation.systemPrompt
|
||||
@@ -654,6 +656,13 @@ class ChatSessionManager {
|
||||
if (chunk.finish_reason) {
|
||||
finishReason = chunk.finish_reason;
|
||||
}
|
||||
// Capture actual model and API-provided cost
|
||||
if (chunk.model) {
|
||||
actualModel = chunk.model;
|
||||
}
|
||||
if (chunk.cost !== undefined) {
|
||||
apiCost = chunk.cost;
|
||||
}
|
||||
// Clear retry state on success
|
||||
const currentSession = this.getSession(conversationId);
|
||||
if (currentSession?.retryState) {
|
||||
@@ -679,12 +688,12 @@ class ChatSessionManager {
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate cost before creating message (so we can include it)
|
||||
let cost = 0;
|
||||
if (usage) {
|
||||
// Use API-provided cost if available, otherwise calculate from pricing
|
||||
let cost = apiCost;
|
||||
if (cost === undefined && usage) {
|
||||
cost = await this.calculateCost(
|
||||
providerInstanceId,
|
||||
modelId,
|
||||
actualModel || modelId,
|
||||
usage.promptTokens,
|
||||
usage.completionTokens,
|
||||
);
|
||||
@@ -696,10 +705,10 @@ class ChatSessionManager {
|
||||
role: "assistant",
|
||||
content: streaming.content,
|
||||
timestamp: Date.now(),
|
||||
// Local-only fields for cost display
|
||||
model: modelId,
|
||||
// Local-only fields for cost display (actual model from API, not requested model)
|
||||
model: actualModel || modelId,
|
||||
usage,
|
||||
cost: cost > 0 ? cost : undefined,
|
||||
cost: cost !== undefined && cost > 0 ? cost : undefined,
|
||||
};
|
||||
|
||||
// Add optional fields if present
|
||||
@@ -726,7 +735,7 @@ class ChatSessionManager {
|
||||
finishReason,
|
||||
toolCalls: streaming.tool_calls,
|
||||
usage,
|
||||
cost,
|
||||
cost: cost ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -81,10 +81,14 @@ export interface ChatStreamChunk {
|
||||
/** Finish reason from the API */
|
||||
finish_reason?: "stop" | "length" | "tool_calls" | null;
|
||||
error?: string;
|
||||
/** Actual model that generated the response (may differ from requested) */
|
||||
model?: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
};
|
||||
/** Cost from API response (USD) - preferred over calculated cost */
|
||||
cost?: number;
|
||||
/** Retry information for error recovery */
|
||||
retry?: {
|
||||
/** Current attempt number (1-based) */
|
||||
|
||||
Reference in New Issue
Block a user