diff --git a/src/components/AIViewer.tsx b/src/components/AIViewer.tsx index 6dc0ce6..b1a6923 100644 --- a/src/components/AIViewer.tsx +++ b/src/components/AIViewer.tsx @@ -20,6 +20,8 @@ import { RefreshCw, Play, Sparkles, + AlertCircle, + RotateCw, } from "lucide-react"; import { cn } from "@/lib/utils"; import { Button } from "@/components/ui/button"; @@ -256,8 +258,14 @@ function ChatPanel({ onConversationCreated: (id: string) => void; }) { // Session manager hooks - const { messages, isLoading, streamingContent, error, canResume } = - useChatSession(conversationId, { providerInstanceId, modelId }); + const { + messages, + isLoading, + streamingContent, + error, + canResume, + retryState, + } = useChatSession(conversationId, { providerInstanceId, modelId }); const { sendMessage, createConversation, stopGeneration, resumeGeneration } = useChatActions(); @@ -444,10 +452,31 @@ function ChatPanel({ )} - {/* Error display */} - {error && !isLoading && ( -
- {error} + {/* Retry indicator - shown during automatic retry */} + {retryState?.isRetrying && ( +
+ + + Retrying ({retryState.attempt}/{retryState.maxAttempts})... + +
+ )} + + {/* Error display - shown for non-retryable errors */} + {error && !isLoading && !retryState?.isRetrying && ( +
+ +
+ {error} + {retryState && + !retryState.isRetrying && + retryState.attempt > 0 && ( + + Failed after {retryState.attempt} attempt + {retryState.attempt > 1 ? "s" : ""} + + )} +
)} diff --git a/src/hooks/useChatSession.ts b/src/hooks/useChatSession.ts index 6d25044..ee6a564 100644 --- a/src/hooks/useChatSession.ts +++ b/src/hooks/useChatSession.ts @@ -43,6 +43,9 @@ interface UseChatSessionResult { // Resume state canResume: boolean; finishReason: ChatSessionState["finishReason"]; + + // Retry state + retryState: ChatSessionState["retryState"]; } /** @@ -118,6 +121,9 @@ export function useChatSession( (session.finishReason === null && session.streamingContent)), ), finishReason: session?.finishReason, + + // Retry state + retryState: session?.retryState, }), [conversation, session], ); diff --git a/src/services/llm/error-handling.ts b/src/services/llm/error-handling.ts new file mode 100644 index 0000000..49db97e --- /dev/null +++ b/src/services/llm/error-handling.ts @@ -0,0 +1,299 @@ +/** + * LLM Error Handling + * + * Centralized error handling with retry logic for LLM API calls. + * Supports exponential backoff, rate limit handling, and transient error recovery. + */ + +// ───────────────────────────────────────────────────────────── +// Error Types +// ───────────────────────────────────────────────────────────── + +export interface LLMError { + /** Human-readable error message */ + message: string; + /** HTTP status code if applicable */ + status?: number; + /** Whether this error can be retried */ + retryable: boolean; + /** Suggested wait time before retry (ms) */ + retryAfter?: number; + /** Original error for debugging */ + originalError?: unknown; +} + +export type ErrorCategory = + | "auth" // 401, 403 - API key issues + | "billing" // 402 - Payment required + | "not_found" // 404 - Model not found + | "rate_limit" // 429 - Rate limited + | "server" // 5xx - Server errors + | "network" // Connection issues + | "timeout" // Request timeout + | "cancelled" // User cancelled + | "unknown"; // Catch-all + +// ───────────────────────────────────────────────────────────── +// Error Detection +// ───────────────────────────────────────────────────────────── + +/** + * Categorize an error for handling. + */ +export function categorizeError(error: unknown): ErrorCategory { + // Aborted requests + if (error instanceof DOMException && error.name === "AbortError") { + return "cancelled"; + } + + // Timeout + if (error instanceof DOMException && error.name === "TimeoutError") { + return "timeout"; + } + + // Check for HTTP status codes (OpenAI SDK error shape) + if (isAPIError(error)) { + const status = error.status; + if (status === 401 || status === 403) return "auth"; + if (status === 402) return "billing"; + if (status === 404) return "not_found"; + if (status === 429) return "rate_limit"; + if (status >= 500 && status < 600) return "server"; + } + + // Network errors + if (error instanceof TypeError && error.message.includes("fetch")) { + return "network"; + } + + if (error instanceof Error) { + const msg = error.message.toLowerCase(); + if (msg.includes("network") || msg.includes("connection")) return "network"; + if (msg.includes("timeout")) return "timeout"; + if (msg.includes("rate") && msg.includes("limit")) return "rate_limit"; + } + + return "unknown"; +} + +/** + * Check if an error is from the OpenAI API (duck typing). + */ +function isAPIError( + error: unknown, +): error is { status: number; message?: string; headers?: Headers } { + return ( + error !== null && + typeof error === "object" && + "status" in error && + typeof (error as { status: unknown }).status === "number" + ); +} + +/** + * Check if an error category is retryable. + */ +export function isRetryable(category: ErrorCategory): boolean { + return ( + category === "rate_limit" || + category === "server" || + category === "network" || + category === "timeout" + ); +} + +// ───────────────────────────────────────────────────────────── +// Error Parsing +// ───────────────────────────────────────────────────────────── + +/** + * Parse an error into a structured LLMError. + */ +export function parseError(error: unknown): LLMError { + const category = categorizeError(error); + const retryable = isRetryable(category); + + // Extract retry-after from rate limit response + let retryAfter: number | undefined; + if (category === "rate_limit" && isAPIError(error)) { + retryAfter = extractRetryAfter(error); + } + + // Default backoff for retryable errors without explicit retry-after + if (retryable && !retryAfter) { + retryAfter = getDefaultBackoff(category); + } + + return { + message: getErrorMessage(error, category), + status: isAPIError(error) ? error.status : undefined, + retryable, + retryAfter, + originalError: error, + }; +} + +/** + * Extract retry-after header from API error. + */ +function extractRetryAfter(error: { headers?: Headers }): number | undefined { + if (!error.headers) return undefined; + + const retryAfter = error.headers.get?.("retry-after"); + if (!retryAfter) return undefined; + + // Can be seconds or HTTP date + const seconds = parseInt(retryAfter, 10); + if (!isNaN(seconds)) { + return seconds * 1000; + } + + // Try parsing as date + const date = Date.parse(retryAfter); + if (!isNaN(date)) { + return Math.max(0, date - Date.now()); + } + + return undefined; +} + +/** + * Get default backoff time for a category. + */ +function getDefaultBackoff(category: ErrorCategory): number { + switch (category) { + case "rate_limit": + return 5000; // 5 seconds + case "server": + return 2000; // 2 seconds + case "network": + case "timeout": + return 1000; // 1 second + default: + return 1000; + } +} + +/** + * Get user-friendly error message. + */ +function getErrorMessage(error: unknown, category: ErrorCategory): string { + switch (category) { + case "auth": + return "Invalid API key. Please check your credentials."; + case "billing": + return "Insufficient balance. Please top up your account."; + case "not_found": + return "Model not found. Please select a different model."; + case "rate_limit": + return "Rate limit exceeded. Retrying automatically..."; + case "server": + return "Provider service is temporarily unavailable. Retrying..."; + case "network": + return "Network error. Please check your connection."; + case "timeout": + return "Request timed out. Retrying..."; + case "cancelled": + return "Request was cancelled."; + default: + if (error instanceof Error) { + return error.message; + } + return "An unknown error occurred."; + } +} + +// ───────────────────────────────────────────────────────────── +// Retry Logic +// ───────────────────────────────────────────────────────────── + +export interface RetryConfig { + /** Maximum number of retry attempts */ + maxRetries: number; + /** Base delay in milliseconds */ + baseDelay: number; + /** Maximum delay in milliseconds */ + maxDelay: number; + /** Jitter factor (0-1) to add randomness */ + jitter: number; +} + +export const DEFAULT_RETRY_CONFIG: RetryConfig = { + maxRetries: 3, + baseDelay: 1000, + maxDelay: 30000, + jitter: 0.2, +}; + +/** + * Calculate backoff delay with exponential increase and jitter. + */ +export function calculateBackoff( + attempt: number, + suggestedDelay?: number, + config: RetryConfig = DEFAULT_RETRY_CONFIG, +): number { + // Use suggested delay if provided (e.g., from Retry-After header) + if (suggestedDelay && suggestedDelay > 0) { + return Math.min(suggestedDelay, config.maxDelay); + } + + // Exponential backoff: baseDelay * 2^attempt + const exponentialDelay = config.baseDelay * Math.pow(2, attempt); + + // Add jitter to prevent thundering herd + const jitterRange = exponentialDelay * config.jitter; + const jitter = Math.random() * jitterRange * 2 - jitterRange; + + return Math.min(exponentialDelay + jitter, config.maxDelay); +} + +/** + * Wait for the specified duration. + */ +export function sleep(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve, reject) => { + if (signal?.aborted) { + reject(new DOMException("Aborted", "AbortError")); + return; + } + + const timeout = setTimeout(resolve, ms); + + signal?.addEventListener("abort", () => { + clearTimeout(timeout); + reject(new DOMException("Aborted", "AbortError")); + }); + }); +} + +// ───────────────────────────────────────────────────────────── +// Retry State for UI +// ───────────────────────────────────────────────────────────── + +export interface RetryState { + /** Current retry attempt (0 = initial, 1+ = retry) */ + attempt: number; + /** Maximum attempts allowed */ + maxAttempts: number; + /** Whether currently waiting before retry */ + waiting: boolean; + /** Time remaining until next retry (ms) */ + waitTimeRemaining: number; + /** Last error that triggered retry */ + lastError?: LLMError; +} + +/** + * Create a retry state tracker for UI updates. + */ +export function createRetryState( + config: RetryConfig = DEFAULT_RETRY_CONFIG, +): RetryState { + return { + attempt: 0, + maxAttempts: config.maxRetries + 1, // +1 for initial attempt + waiting: false, + waitTimeRemaining: 0, + }; +} diff --git a/src/services/llm/provider-manager.ts b/src/services/llm/provider-manager.ts index 64affef..7e058df 100644 --- a/src/services/llm/provider-manager.ts +++ b/src/services/llm/provider-manager.ts @@ -15,11 +15,14 @@ import type { ChatStreamChunk, ChatOptions, } from "@/types/llm"; +import { createOpenAIClient, formatModelName } from "./openai-client"; import { - createOpenAIClient, - formatModelName, - parseAPIError, -} from "./openai-client"; + parseError, + calculateBackoff, + sleep, + DEFAULT_RETRY_CONFIG, + type RetryConfig, +} from "./error-handling"; import { AI_PROVIDER_PRESETS } from "@/lib/ai-provider-presets"; const MODEL_CACHE_TTL = 1000 * 60 * 5; // 5 minutes @@ -237,12 +240,13 @@ class AIProviderManager { /** * Chat with a model (streaming). + * Includes automatic retry logic for transient errors. */ async *chat( instanceId: string, modelId: string, messages: LLMMessage[], - options: Omit, + options: Omit & { retryConfig?: RetryConfig }, ): AsyncGenerator { const instance = await db.llmProviders.get(instanceId); if (!instance) { @@ -255,108 +259,179 @@ class AIProviderManager { return; } - try { - const client = this.getClient(instance); + const retryConfig = options.retryConfig ?? DEFAULT_RETRY_CONFIG; + const maxAttempts = retryConfig.maxRetries + 1; - // Format messages for OpenAI API - const formattedMessages = this.formatMessages(messages); + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + // Yield all chunks from the stream attempt + const streamResult = yield* this.streamChat( + instance, + modelId, + messages, + options, + ); - // Build request params - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const params: any = { - model: modelId, - messages: formattedMessages, - stream: true, - stream_options: { include_usage: true }, - temperature: options.temperature ?? 0.7, - max_tokens: options.maxTokens, - }; + // If we got here successfully, update usage tracking and return + if (streamResult.success) { + await db.llmProviders.update(instanceId, { + lastUsed: Date.now(), + lastModelId: modelId, + }); + this.addRecentModel(instance.providerId, modelId); + return; + } + } catch (error) { + // Handle abort - don't retry + if (error instanceof DOMException && error.name === "AbortError") { + yield { type: "done" }; + return; + } - // Add tools if provided - if (options.tools && options.tools.length > 0) { - params.tools = options.tools; - if (options.tool_choice) { - params.tool_choice = options.tool_choice; + // Parse the error + const llmError = parseError(error); + + // If not retryable or last attempt, yield error and stop + if (!llmError.retryable || attempt >= maxAttempts - 1) { + yield { + type: "error", + error: llmError.message, + retry: { + attempt: attempt + 1, + maxAttempts, + delayMs: 0, + retryable: false, + }, + }; + return; + } + + // Calculate backoff delay + const delayMs = calculateBackoff( + attempt, + llmError.retryAfter, + retryConfig, + ); + + // Yield retry event so UI can show progress + yield { + type: "retry", + error: llmError.message, + retry: { + attempt: attempt + 1, + maxAttempts, + delayMs, + retryable: true, + }, + }; + + // Wait before retrying (respects abort signal) + try { + await sleep(delayMs, options.signal); + } catch { + // Aborted during wait + yield { type: "done" }; + return; } } + } + } - const stream = (await client.chat.completions.create(params, { - signal: options.signal, - })) as unknown as AsyncIterable; + /** + * Internal: Stream a single chat completion attempt. + * Returns a result indicating success/failure. + */ + private async *streamChat( + instance: LLMProviderInstance, + modelId: string, + messages: LLMMessage[], + options: Omit, + ): AsyncGenerator { + const client = this.getClient(instance); - let usage: ChatStreamChunk["usage"] | undefined; - let finishReason: ChatStreamChunk["finish_reason"] = null; + // Format messages for OpenAI API + const formattedMessages = this.formatMessages(messages); - for await (const chunk of stream) { - const choice = chunk.choices[0]; - if (!choice) continue; + // Build request params + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const params: any = { + model: modelId, + messages: formattedMessages, + stream: true, + stream_options: { include_usage: true }, + temperature: options.temperature ?? 0.7, + max_tokens: options.maxTokens, + }; - const delta = choice.delta; + // Add tools if provided + if (options.tools && options.tools.length > 0) { + params.tools = options.tools; + if (options.tool_choice) { + params.tool_choice = options.tool_choice; + } + } - // Regular content - if (delta?.content) { - yield { type: "token", content: delta.content }; - } + const stream = (await client.chat.completions.create(params, { + signal: options.signal, + })) as unknown as AsyncIterable; - // Extended thinking / reasoning (Claude, DeepSeek, etc.) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const reasoning = (delta as any)?.reasoning_content; - if (reasoning) { - yield { type: "reasoning", content: reasoning }; - } + let usage: ChatStreamChunk["usage"] | undefined; + let finishReason: ChatStreamChunk["finish_reason"] = null; - // Tool calls (streamed incrementally) - if (delta?.tool_calls) { - for (const tc of delta.tool_calls) { - yield { - type: "tool_call", - tool_call: { - index: tc.index, - id: tc.id, - type: tc.type, - function: tc.function - ? { - name: tc.function.name, - arguments: tc.function.arguments, - } - : undefined, - }, - }; - } - } + for await (const chunk of stream) { + const choice = chunk.choices[0]; + if (!choice) continue; - // Capture finish reason - if (choice.finish_reason) { - finishReason = - choice.finish_reason as ChatStreamChunk["finish_reason"]; - } + const delta = choice.delta; - // Capture usage from final chunk - if (chunk.usage) { - usage = { - promptTokens: chunk.usage.prompt_tokens, - completionTokens: chunk.usage.completion_tokens, + // Regular content + if (delta?.content) { + yield { type: "token", content: delta.content }; + } + + // Extended thinking / reasoning (Claude, DeepSeek, etc.) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const reasoning = (delta as any)?.reasoning_content; + if (reasoning) { + yield { type: "reasoning", content: reasoning }; + } + + // Tool calls (streamed incrementally) + if (delta?.tool_calls) { + for (const tc of delta.tool_calls) { + yield { + type: "tool_call", + tool_call: { + index: tc.index, + id: tc.id, + type: tc.type, + function: tc.function + ? { + name: tc.function.name, + arguments: tc.function.arguments, + } + : undefined, + }, }; } } - yield { type: "done", usage, finish_reason: finishReason }; - - // Update lastUsed and add to recent models - await db.llmProviders.update(instanceId, { - lastUsed: Date.now(), - lastModelId: modelId, - }); - this.addRecentModel(instance.providerId, modelId); - } catch (error) { - if (error instanceof DOMException && error.name === "AbortError") { - yield { type: "done" }; - return; + // Capture finish reason + if (choice.finish_reason) { + finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"]; } - const message = parseAPIError(error); - yield { type: "error", error: message }; + // Capture usage from final chunk + if (chunk.usage) { + usage = { + promptTokens: chunk.usage.prompt_tokens, + completionTokens: chunk.usage.completion_tokens, + }; + } } + + yield { type: "done", usage, finish_reason: finishReason }; + return { success: true }; } /** @@ -410,7 +485,8 @@ class AIProviderManager { await client.models.list({ signal: AbortSignal.timeout(5000) }); return { success: true }; } catch (error) { - return { success: false, error: parseAPIError(error) }; + const llmError = parseError(error); + return { success: false, error: llmError.message }; } } } diff --git a/src/services/llm/session-manager.ts b/src/services/llm/session-manager.ts index 5935c1e..c2282d9 100644 --- a/src/services/llm/session-manager.ts +++ b/src/services/llm/session-manager.ts @@ -628,12 +628,53 @@ class ChatSessionManager { streamingMessage: { ...streaming }, lastActivity: Date.now(), }); + } else if (chunk.type === "retry" && chunk.retry) { + // Transient error - retrying automatically + this.updateSession(conversationId, { + ...session, + retryState: { + attempt: chunk.retry.attempt, + maxAttempts: chunk.retry.maxAttempts, + isRetrying: true, + retryDelayMs: chunk.retry.delayMs, + }, + lastError: chunk.error, + lastActivity: Date.now(), + }); + + // Emit error event for UI awareness (but we're handling it) + if (chunk.error) { + this.error$.next({ + conversationId, + error: `${chunk.error} (retry ${chunk.retry.attempt}/${chunk.retry.maxAttempts})`, + }); + } } else if (chunk.type === "done") { usage = chunk.usage; if (chunk.finish_reason) { finishReason = chunk.finish_reason; } + // Clear retry state on success + const currentSession = this.getSession(conversationId); + if (currentSession?.retryState) { + this.updateSession(conversationId, { + ...currentSession, + retryState: undefined, + }); + } } else if (chunk.type === "error") { + // Check if this is a non-retryable error with retry info + if (chunk.retry && !chunk.retry.retryable) { + this.updateSession(conversationId, { + ...session, + retryState: { + attempt: chunk.retry.attempt, + maxAttempts: chunk.retry.maxAttempts, + isRetrying: false, + retryDelayMs: 0, + }, + }); + } throw new Error(chunk.error || "Unknown error"); } } diff --git a/src/types/llm.ts b/src/types/llm.ts index 7a6d95f..93d6236 100644 --- a/src/types/llm.ts +++ b/src/types/llm.ts @@ -74,7 +74,7 @@ export interface AISettings { // ───────────────────────────────────────────────────────────── export interface ChatStreamChunk { - type: "token" | "reasoning" | "tool_call" | "done" | "error"; + type: "token" | "reasoning" | "tool_call" | "done" | "error" | "retry"; content?: string; /** Streaming tool call delta */ tool_call?: StreamingToolCall; @@ -85,6 +85,17 @@ export interface ChatStreamChunk { promptTokens: number; completionTokens: number; }; + /** Retry information for error recovery */ + retry?: { + /** Current attempt number (1-based) */ + attempt: number; + /** Maximum attempts allowed */ + maxAttempts: number; + /** Delay before next retry (ms) */ + delayMs: number; + /** Whether this error is retryable */ + retryable: boolean; + }; } /** @@ -154,6 +165,18 @@ export interface ChatSessionState { finishReason?: "stop" | "length" | "tool_calls" | "error" | null; lastError?: string; + // Retry state for transient errors + retryState?: { + /** Current retry attempt (1-based) */ + attempt: number; + /** Maximum attempts allowed */ + maxAttempts: number; + /** Whether currently waiting before retry */ + isRetrying: boolean; + /** Time remaining until next retry (ms) */ + retryDelayMs: number; + }; + // Reference counting - how many windows have this session open subscriberCount: number;