mirror of
https://github.com/purrgrammer/grimoire.git
synced 2026-04-12 00:17:02 +02:00
feat: add production-ready error handling with retry logic
- Add error-handling.ts with centralized error parsing and categorization - Implement exponential backoff with jitter for transient errors - Handle rate limits (429) respecting Retry-After header - Add retry state to ChatSessionState and ChatStreamChunk types - Update provider-manager with automatic retry loop for: - Rate limit errors (429) - Server errors (5xx) - Network errors - Timeouts - Add retry indicator UI in AIViewer showing retry progress - Show detailed error messages with attempt count on failure Retry configuration: - Max 3 retries (4 total attempts) - Exponential backoff: 1s, 2s, 4s (capped at 30s) - 20% jitter to prevent thundering herd - Respects abort signal during wait https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
This commit is contained in:
@@ -20,6 +20,8 @@ import {
|
||||
RefreshCw,
|
||||
Play,
|
||||
Sparkles,
|
||||
AlertCircle,
|
||||
RotateCw,
|
||||
} from "lucide-react";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { Button } from "@/components/ui/button";
|
||||
@@ -256,8 +258,14 @@ function ChatPanel({
|
||||
onConversationCreated: (id: string) => void;
|
||||
}) {
|
||||
// Session manager hooks
|
||||
const { messages, isLoading, streamingContent, error, canResume } =
|
||||
useChatSession(conversationId, { providerInstanceId, modelId });
|
||||
const {
|
||||
messages,
|
||||
isLoading,
|
||||
streamingContent,
|
||||
error,
|
||||
canResume,
|
||||
retryState,
|
||||
} = useChatSession(conversationId, { providerInstanceId, modelId });
|
||||
|
||||
const { sendMessage, createConversation, stopGeneration, resumeGeneration } =
|
||||
useChatActions();
|
||||
@@ -444,10 +452,31 @@ function ChatPanel({
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Error display */}
|
||||
{error && !isLoading && (
|
||||
<div className="text-sm text-destructive bg-destructive/10 rounded-lg px-3 py-2">
|
||||
{error}
|
||||
{/* Retry indicator - shown during automatic retry */}
|
||||
{retryState?.isRetrying && (
|
||||
<div className="flex items-center gap-2 text-sm text-muted-foreground bg-muted/50 rounded-lg px-3 py-2">
|
||||
<RotateCw className="h-4 w-4 animate-spin" />
|
||||
<span>
|
||||
Retrying ({retryState.attempt}/{retryState.maxAttempts})...
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error display - shown for non-retryable errors */}
|
||||
{error && !isLoading && !retryState?.isRetrying && (
|
||||
<div className="flex items-start gap-2 text-sm bg-destructive/10 rounded-lg px-3 py-2">
|
||||
<AlertCircle className="h-4 w-4 text-destructive flex-shrink-0 mt-0.5" />
|
||||
<div className="flex-1">
|
||||
<span className="text-destructive">{error}</span>
|
||||
{retryState &&
|
||||
!retryState.isRetrying &&
|
||||
retryState.attempt > 0 && (
|
||||
<span className="text-muted-foreground text-xs block mt-1">
|
||||
Failed after {retryState.attempt} attempt
|
||||
{retryState.attempt > 1 ? "s" : ""}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
||||
@@ -43,6 +43,9 @@ interface UseChatSessionResult {
|
||||
// Resume state
|
||||
canResume: boolean;
|
||||
finishReason: ChatSessionState["finishReason"];
|
||||
|
||||
// Retry state
|
||||
retryState: ChatSessionState["retryState"];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -118,6 +121,9 @@ export function useChatSession(
|
||||
(session.finishReason === null && session.streamingContent)),
|
||||
),
|
||||
finishReason: session?.finishReason,
|
||||
|
||||
// Retry state
|
||||
retryState: session?.retryState,
|
||||
}),
|
||||
[conversation, session],
|
||||
);
|
||||
|
||||
299
src/services/llm/error-handling.ts
Normal file
299
src/services/llm/error-handling.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* LLM Error Handling
|
||||
*
|
||||
* Centralized error handling with retry logic for LLM API calls.
|
||||
* Supports exponential backoff, rate limit handling, and transient error recovery.
|
||||
*/
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Error Types
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
export interface LLMError {
|
||||
/** Human-readable error message */
|
||||
message: string;
|
||||
/** HTTP status code if applicable */
|
||||
status?: number;
|
||||
/** Whether this error can be retried */
|
||||
retryable: boolean;
|
||||
/** Suggested wait time before retry (ms) */
|
||||
retryAfter?: number;
|
||||
/** Original error for debugging */
|
||||
originalError?: unknown;
|
||||
}
|
||||
|
||||
export type ErrorCategory =
|
||||
| "auth" // 401, 403 - API key issues
|
||||
| "billing" // 402 - Payment required
|
||||
| "not_found" // 404 - Model not found
|
||||
| "rate_limit" // 429 - Rate limited
|
||||
| "server" // 5xx - Server errors
|
||||
| "network" // Connection issues
|
||||
| "timeout" // Request timeout
|
||||
| "cancelled" // User cancelled
|
||||
| "unknown"; // Catch-all
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Error Detection
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Categorize an error for handling.
|
||||
*/
|
||||
export function categorizeError(error: unknown): ErrorCategory {
|
||||
// Aborted requests
|
||||
if (error instanceof DOMException && error.name === "AbortError") {
|
||||
return "cancelled";
|
||||
}
|
||||
|
||||
// Timeout
|
||||
if (error instanceof DOMException && error.name === "TimeoutError") {
|
||||
return "timeout";
|
||||
}
|
||||
|
||||
// Check for HTTP status codes (OpenAI SDK error shape)
|
||||
if (isAPIError(error)) {
|
||||
const status = error.status;
|
||||
if (status === 401 || status === 403) return "auth";
|
||||
if (status === 402) return "billing";
|
||||
if (status === 404) return "not_found";
|
||||
if (status === 429) return "rate_limit";
|
||||
if (status >= 500 && status < 600) return "server";
|
||||
}
|
||||
|
||||
// Network errors
|
||||
if (error instanceof TypeError && error.message.includes("fetch")) {
|
||||
return "network";
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
const msg = error.message.toLowerCase();
|
||||
if (msg.includes("network") || msg.includes("connection")) return "network";
|
||||
if (msg.includes("timeout")) return "timeout";
|
||||
if (msg.includes("rate") && msg.includes("limit")) return "rate_limit";
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is from the OpenAI API (duck typing).
|
||||
*/
|
||||
function isAPIError(
|
||||
error: unknown,
|
||||
): error is { status: number; message?: string; headers?: Headers } {
|
||||
return (
|
||||
error !== null &&
|
||||
typeof error === "object" &&
|
||||
"status" in error &&
|
||||
typeof (error as { status: unknown }).status === "number"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error category is retryable.
|
||||
*/
|
||||
export function isRetryable(category: ErrorCategory): boolean {
|
||||
return (
|
||||
category === "rate_limit" ||
|
||||
category === "server" ||
|
||||
category === "network" ||
|
||||
category === "timeout"
|
||||
);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Error Parsing
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parse an error into a structured LLMError.
|
||||
*/
|
||||
export function parseError(error: unknown): LLMError {
|
||||
const category = categorizeError(error);
|
||||
const retryable = isRetryable(category);
|
||||
|
||||
// Extract retry-after from rate limit response
|
||||
let retryAfter: number | undefined;
|
||||
if (category === "rate_limit" && isAPIError(error)) {
|
||||
retryAfter = extractRetryAfter(error);
|
||||
}
|
||||
|
||||
// Default backoff for retryable errors without explicit retry-after
|
||||
if (retryable && !retryAfter) {
|
||||
retryAfter = getDefaultBackoff(category);
|
||||
}
|
||||
|
||||
return {
|
||||
message: getErrorMessage(error, category),
|
||||
status: isAPIError(error) ? error.status : undefined,
|
||||
retryable,
|
||||
retryAfter,
|
||||
originalError: error,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract retry-after header from API error.
|
||||
*/
|
||||
function extractRetryAfter(error: { headers?: Headers }): number | undefined {
|
||||
if (!error.headers) return undefined;
|
||||
|
||||
const retryAfter = error.headers.get?.("retry-after");
|
||||
if (!retryAfter) return undefined;
|
||||
|
||||
// Can be seconds or HTTP date
|
||||
const seconds = parseInt(retryAfter, 10);
|
||||
if (!isNaN(seconds)) {
|
||||
return seconds * 1000;
|
||||
}
|
||||
|
||||
// Try parsing as date
|
||||
const date = Date.parse(retryAfter);
|
||||
if (!isNaN(date)) {
|
||||
return Math.max(0, date - Date.now());
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default backoff time for a category.
|
||||
*/
|
||||
function getDefaultBackoff(category: ErrorCategory): number {
|
||||
switch (category) {
|
||||
case "rate_limit":
|
||||
return 5000; // 5 seconds
|
||||
case "server":
|
||||
return 2000; // 2 seconds
|
||||
case "network":
|
||||
case "timeout":
|
||||
return 1000; // 1 second
|
||||
default:
|
||||
return 1000;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get user-friendly error message.
|
||||
*/
|
||||
function getErrorMessage(error: unknown, category: ErrorCategory): string {
|
||||
switch (category) {
|
||||
case "auth":
|
||||
return "Invalid API key. Please check your credentials.";
|
||||
case "billing":
|
||||
return "Insufficient balance. Please top up your account.";
|
||||
case "not_found":
|
||||
return "Model not found. Please select a different model.";
|
||||
case "rate_limit":
|
||||
return "Rate limit exceeded. Retrying automatically...";
|
||||
case "server":
|
||||
return "Provider service is temporarily unavailable. Retrying...";
|
||||
case "network":
|
||||
return "Network error. Please check your connection.";
|
||||
case "timeout":
|
||||
return "Request timed out. Retrying...";
|
||||
case "cancelled":
|
||||
return "Request was cancelled.";
|
||||
default:
|
||||
if (error instanceof Error) {
|
||||
return error.message;
|
||||
}
|
||||
return "An unknown error occurred.";
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Retry Logic
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
export interface RetryConfig {
|
||||
/** Maximum number of retry attempts */
|
||||
maxRetries: number;
|
||||
/** Base delay in milliseconds */
|
||||
baseDelay: number;
|
||||
/** Maximum delay in milliseconds */
|
||||
maxDelay: number;
|
||||
/** Jitter factor (0-1) to add randomness */
|
||||
jitter: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_RETRY_CONFIG: RetryConfig = {
|
||||
maxRetries: 3,
|
||||
baseDelay: 1000,
|
||||
maxDelay: 30000,
|
||||
jitter: 0.2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate backoff delay with exponential increase and jitter.
|
||||
*/
|
||||
export function calculateBackoff(
|
||||
attempt: number,
|
||||
suggestedDelay?: number,
|
||||
config: RetryConfig = DEFAULT_RETRY_CONFIG,
|
||||
): number {
|
||||
// Use suggested delay if provided (e.g., from Retry-After header)
|
||||
if (suggestedDelay && suggestedDelay > 0) {
|
||||
return Math.min(suggestedDelay, config.maxDelay);
|
||||
}
|
||||
|
||||
// Exponential backoff: baseDelay * 2^attempt
|
||||
const exponentialDelay = config.baseDelay * Math.pow(2, attempt);
|
||||
|
||||
// Add jitter to prevent thundering herd
|
||||
const jitterRange = exponentialDelay * config.jitter;
|
||||
const jitter = Math.random() * jitterRange * 2 - jitterRange;
|
||||
|
||||
return Math.min(exponentialDelay + jitter, config.maxDelay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the specified duration.
|
||||
*/
|
||||
export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (signal?.aborted) {
|
||||
reject(new DOMException("Aborted", "AbortError"));
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = setTimeout(resolve, ms);
|
||||
|
||||
signal?.addEventListener("abort", () => {
|
||||
clearTimeout(timeout);
|
||||
reject(new DOMException("Aborted", "AbortError"));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// Retry State for UI
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
export interface RetryState {
|
||||
/** Current retry attempt (0 = initial, 1+ = retry) */
|
||||
attempt: number;
|
||||
/** Maximum attempts allowed */
|
||||
maxAttempts: number;
|
||||
/** Whether currently waiting before retry */
|
||||
waiting: boolean;
|
||||
/** Time remaining until next retry (ms) */
|
||||
waitTimeRemaining: number;
|
||||
/** Last error that triggered retry */
|
||||
lastError?: LLMError;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a retry state tracker for UI updates.
|
||||
*/
|
||||
export function createRetryState(
|
||||
config: RetryConfig = DEFAULT_RETRY_CONFIG,
|
||||
): RetryState {
|
||||
return {
|
||||
attempt: 0,
|
||||
maxAttempts: config.maxRetries + 1, // +1 for initial attempt
|
||||
waiting: false,
|
||||
waitTimeRemaining: 0,
|
||||
};
|
||||
}
|
||||
@@ -15,11 +15,14 @@ import type {
|
||||
ChatStreamChunk,
|
||||
ChatOptions,
|
||||
} from "@/types/llm";
|
||||
import { createOpenAIClient, formatModelName } from "./openai-client";
|
||||
import {
|
||||
createOpenAIClient,
|
||||
formatModelName,
|
||||
parseAPIError,
|
||||
} from "./openai-client";
|
||||
parseError,
|
||||
calculateBackoff,
|
||||
sleep,
|
||||
DEFAULT_RETRY_CONFIG,
|
||||
type RetryConfig,
|
||||
} from "./error-handling";
|
||||
import { AI_PROVIDER_PRESETS } from "@/lib/ai-provider-presets";
|
||||
|
||||
const MODEL_CACHE_TTL = 1000 * 60 * 5; // 5 minutes
|
||||
@@ -237,12 +240,13 @@ class AIProviderManager {
|
||||
|
||||
/**
|
||||
* Chat with a model (streaming).
|
||||
* Includes automatic retry logic for transient errors.
|
||||
*/
|
||||
async *chat(
|
||||
instanceId: string,
|
||||
modelId: string,
|
||||
messages: LLMMessage[],
|
||||
options: Omit<ChatOptions, "model">,
|
||||
options: Omit<ChatOptions, "model"> & { retryConfig?: RetryConfig },
|
||||
): AsyncGenerator<ChatStreamChunk> {
|
||||
const instance = await db.llmProviders.get(instanceId);
|
||||
if (!instance) {
|
||||
@@ -255,108 +259,179 @@ class AIProviderManager {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const client = this.getClient(instance);
|
||||
const retryConfig = options.retryConfig ?? DEFAULT_RETRY_CONFIG;
|
||||
const maxAttempts = retryConfig.maxRetries + 1;
|
||||
|
||||
// Format messages for OpenAI API
|
||||
const formattedMessages = this.formatMessages(messages);
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
// Yield all chunks from the stream attempt
|
||||
const streamResult = yield* this.streamChat(
|
||||
instance,
|
||||
modelId,
|
||||
messages,
|
||||
options,
|
||||
);
|
||||
|
||||
// Build request params
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const params: any = {
|
||||
model: modelId,
|
||||
messages: formattedMessages,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
temperature: options.temperature ?? 0.7,
|
||||
max_tokens: options.maxTokens,
|
||||
};
|
||||
// If we got here successfully, update usage tracking and return
|
||||
if (streamResult.success) {
|
||||
await db.llmProviders.update(instanceId, {
|
||||
lastUsed: Date.now(),
|
||||
lastModelId: modelId,
|
||||
});
|
||||
this.addRecentModel(instance.providerId, modelId);
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
// Handle abort - don't retry
|
||||
if (error instanceof DOMException && error.name === "AbortError") {
|
||||
yield { type: "done" };
|
||||
return;
|
||||
}
|
||||
|
||||
// Add tools if provided
|
||||
if (options.tools && options.tools.length > 0) {
|
||||
params.tools = options.tools;
|
||||
if (options.tool_choice) {
|
||||
params.tool_choice = options.tool_choice;
|
||||
// Parse the error
|
||||
const llmError = parseError(error);
|
||||
|
||||
// If not retryable or last attempt, yield error and stop
|
||||
if (!llmError.retryable || attempt >= maxAttempts - 1) {
|
||||
yield {
|
||||
type: "error",
|
||||
error: llmError.message,
|
||||
retry: {
|
||||
attempt: attempt + 1,
|
||||
maxAttempts,
|
||||
delayMs: 0,
|
||||
retryable: false,
|
||||
},
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate backoff delay
|
||||
const delayMs = calculateBackoff(
|
||||
attempt,
|
||||
llmError.retryAfter,
|
||||
retryConfig,
|
||||
);
|
||||
|
||||
// Yield retry event so UI can show progress
|
||||
yield {
|
||||
type: "retry",
|
||||
error: llmError.message,
|
||||
retry: {
|
||||
attempt: attempt + 1,
|
||||
maxAttempts,
|
||||
delayMs,
|
||||
retryable: true,
|
||||
},
|
||||
};
|
||||
|
||||
// Wait before retrying (respects abort signal)
|
||||
try {
|
||||
await sleep(delayMs, options.signal);
|
||||
} catch {
|
||||
// Aborted during wait
|
||||
yield { type: "done" };
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const stream = (await client.chat.completions.create(params, {
|
||||
signal: options.signal,
|
||||
})) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
|
||||
/**
|
||||
* Internal: Stream a single chat completion attempt.
|
||||
* Returns a result indicating success/failure.
|
||||
*/
|
||||
private async *streamChat(
|
||||
instance: LLMProviderInstance,
|
||||
modelId: string,
|
||||
messages: LLMMessage[],
|
||||
options: Omit<ChatOptions, "model">,
|
||||
): AsyncGenerator<ChatStreamChunk, { success: boolean }> {
|
||||
const client = this.getClient(instance);
|
||||
|
||||
let usage: ChatStreamChunk["usage"] | undefined;
|
||||
let finishReason: ChatStreamChunk["finish_reason"] = null;
|
||||
// Format messages for OpenAI API
|
||||
const formattedMessages = this.formatMessages(messages);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const choice = chunk.choices[0];
|
||||
if (!choice) continue;
|
||||
// Build request params
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const params: any = {
|
||||
model: modelId,
|
||||
messages: formattedMessages,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
temperature: options.temperature ?? 0.7,
|
||||
max_tokens: options.maxTokens,
|
||||
};
|
||||
|
||||
const delta = choice.delta;
|
||||
// Add tools if provided
|
||||
if (options.tools && options.tools.length > 0) {
|
||||
params.tools = options.tools;
|
||||
if (options.tool_choice) {
|
||||
params.tool_choice = options.tool_choice;
|
||||
}
|
||||
}
|
||||
|
||||
// Regular content
|
||||
if (delta?.content) {
|
||||
yield { type: "token", content: delta.content };
|
||||
}
|
||||
const stream = (await client.chat.completions.create(params, {
|
||||
signal: options.signal,
|
||||
})) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
|
||||
|
||||
// Extended thinking / reasoning (Claude, DeepSeek, etc.)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const reasoning = (delta as any)?.reasoning_content;
|
||||
if (reasoning) {
|
||||
yield { type: "reasoning", content: reasoning };
|
||||
}
|
||||
let usage: ChatStreamChunk["usage"] | undefined;
|
||||
let finishReason: ChatStreamChunk["finish_reason"] = null;
|
||||
|
||||
// Tool calls (streamed incrementally)
|
||||
if (delta?.tool_calls) {
|
||||
for (const tc of delta.tool_calls) {
|
||||
yield {
|
||||
type: "tool_call",
|
||||
tool_call: {
|
||||
index: tc.index,
|
||||
id: tc.id,
|
||||
type: tc.type,
|
||||
function: tc.function
|
||||
? {
|
||||
name: tc.function.name,
|
||||
arguments: tc.function.arguments,
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
for await (const chunk of stream) {
|
||||
const choice = chunk.choices[0];
|
||||
if (!choice) continue;
|
||||
|
||||
// Capture finish reason
|
||||
if (choice.finish_reason) {
|
||||
finishReason =
|
||||
choice.finish_reason as ChatStreamChunk["finish_reason"];
|
||||
}
|
||||
const delta = choice.delta;
|
||||
|
||||
// Capture usage from final chunk
|
||||
if (chunk.usage) {
|
||||
usage = {
|
||||
promptTokens: chunk.usage.prompt_tokens,
|
||||
completionTokens: chunk.usage.completion_tokens,
|
||||
// Regular content
|
||||
if (delta?.content) {
|
||||
yield { type: "token", content: delta.content };
|
||||
}
|
||||
|
||||
// Extended thinking / reasoning (Claude, DeepSeek, etc.)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const reasoning = (delta as any)?.reasoning_content;
|
||||
if (reasoning) {
|
||||
yield { type: "reasoning", content: reasoning };
|
||||
}
|
||||
|
||||
// Tool calls (streamed incrementally)
|
||||
if (delta?.tool_calls) {
|
||||
for (const tc of delta.tool_calls) {
|
||||
yield {
|
||||
type: "tool_call",
|
||||
tool_call: {
|
||||
index: tc.index,
|
||||
id: tc.id,
|
||||
type: tc.type,
|
||||
function: tc.function
|
||||
? {
|
||||
name: tc.function.name,
|
||||
arguments: tc.function.arguments,
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: "done", usage, finish_reason: finishReason };
|
||||
|
||||
// Update lastUsed and add to recent models
|
||||
await db.llmProviders.update(instanceId, {
|
||||
lastUsed: Date.now(),
|
||||
lastModelId: modelId,
|
||||
});
|
||||
this.addRecentModel(instance.providerId, modelId);
|
||||
} catch (error) {
|
||||
if (error instanceof DOMException && error.name === "AbortError") {
|
||||
yield { type: "done" };
|
||||
return;
|
||||
// Capture finish reason
|
||||
if (choice.finish_reason) {
|
||||
finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
|
||||
}
|
||||
|
||||
const message = parseAPIError(error);
|
||||
yield { type: "error", error: message };
|
||||
// Capture usage from final chunk
|
||||
if (chunk.usage) {
|
||||
usage = {
|
||||
promptTokens: chunk.usage.prompt_tokens,
|
||||
completionTokens: chunk.usage.completion_tokens,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
yield { type: "done", usage, finish_reason: finishReason };
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -410,7 +485,8 @@ class AIProviderManager {
|
||||
await client.models.list({ signal: AbortSignal.timeout(5000) });
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: parseAPIError(error) };
|
||||
const llmError = parseError(error);
|
||||
return { success: false, error: llmError.message };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,12 +628,53 @@ class ChatSessionManager {
|
||||
streamingMessage: { ...streaming },
|
||||
lastActivity: Date.now(),
|
||||
});
|
||||
} else if (chunk.type === "retry" && chunk.retry) {
|
||||
// Transient error - retrying automatically
|
||||
this.updateSession(conversationId, {
|
||||
...session,
|
||||
retryState: {
|
||||
attempt: chunk.retry.attempt,
|
||||
maxAttempts: chunk.retry.maxAttempts,
|
||||
isRetrying: true,
|
||||
retryDelayMs: chunk.retry.delayMs,
|
||||
},
|
||||
lastError: chunk.error,
|
||||
lastActivity: Date.now(),
|
||||
});
|
||||
|
||||
// Emit error event for UI awareness (but we're handling it)
|
||||
if (chunk.error) {
|
||||
this.error$.next({
|
||||
conversationId,
|
||||
error: `${chunk.error} (retry ${chunk.retry.attempt}/${chunk.retry.maxAttempts})`,
|
||||
});
|
||||
}
|
||||
} else if (chunk.type === "done") {
|
||||
usage = chunk.usage;
|
||||
if (chunk.finish_reason) {
|
||||
finishReason = chunk.finish_reason;
|
||||
}
|
||||
// Clear retry state on success
|
||||
const currentSession = this.getSession(conversationId);
|
||||
if (currentSession?.retryState) {
|
||||
this.updateSession(conversationId, {
|
||||
...currentSession,
|
||||
retryState: undefined,
|
||||
});
|
||||
}
|
||||
} else if (chunk.type === "error") {
|
||||
// Check if this is a non-retryable error with retry info
|
||||
if (chunk.retry && !chunk.retry.retryable) {
|
||||
this.updateSession(conversationId, {
|
||||
...session,
|
||||
retryState: {
|
||||
attempt: chunk.retry.attempt,
|
||||
maxAttempts: chunk.retry.maxAttempts,
|
||||
isRetrying: false,
|
||||
retryDelayMs: 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
throw new Error(chunk.error || "Unknown error");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ export interface AISettings {
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
|
||||
export interface ChatStreamChunk {
|
||||
type: "token" | "reasoning" | "tool_call" | "done" | "error";
|
||||
type: "token" | "reasoning" | "tool_call" | "done" | "error" | "retry";
|
||||
content?: string;
|
||||
/** Streaming tool call delta */
|
||||
tool_call?: StreamingToolCall;
|
||||
@@ -85,6 +85,17 @@ export interface ChatStreamChunk {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
};
|
||||
/** Retry information for error recovery */
|
||||
retry?: {
|
||||
/** Current attempt number (1-based) */
|
||||
attempt: number;
|
||||
/** Maximum attempts allowed */
|
||||
maxAttempts: number;
|
||||
/** Delay before next retry (ms) */
|
||||
delayMs: number;
|
||||
/** Whether this error is retryable */
|
||||
retryable: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -154,6 +165,18 @@ export interface ChatSessionState {
|
||||
finishReason?: "stop" | "length" | "tool_calls" | "error" | null;
|
||||
lastError?: string;
|
||||
|
||||
// Retry state for transient errors
|
||||
retryState?: {
|
||||
/** Current retry attempt (1-based) */
|
||||
attempt: number;
|
||||
/** Maximum attempts allowed */
|
||||
maxAttempts: number;
|
||||
/** Whether currently waiting before retry */
|
||||
isRetrying: boolean;
|
||||
/** Time remaining until next retry (ms) */
|
||||
retryDelayMs: number;
|
||||
};
|
||||
|
||||
// Reference counting - how many windows have this session open
|
||||
subscriberCount: number;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user