diff --git a/src/components/AIViewer.tsx b/src/components/AIViewer.tsx
index 6dc0ce6..b1a6923 100644
--- a/src/components/AIViewer.tsx
+++ b/src/components/AIViewer.tsx
@@ -20,6 +20,8 @@ import {
RefreshCw,
Play,
Sparkles,
+ AlertCircle,
+ RotateCw,
} from "lucide-react";
import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
@@ -256,8 +258,14 @@ function ChatPanel({
onConversationCreated: (id: string) => void;
}) {
// Session manager hooks
- const { messages, isLoading, streamingContent, error, canResume } =
- useChatSession(conversationId, { providerInstanceId, modelId });
+ const {
+ messages,
+ isLoading,
+ streamingContent,
+ error,
+ canResume,
+ retryState,
+ } = useChatSession(conversationId, { providerInstanceId, modelId });
const { sendMessage, createConversation, stopGeneration, resumeGeneration } =
useChatActions();
@@ -444,10 +452,31 @@ function ChatPanel({
>
)}
- {/* Error display */}
- {error && !isLoading && (
-
- {error}
+ {/* Retry indicator - shown during automatic retry */}
+ {retryState?.isRetrying && (
+
+
+
+ Retrying ({retryState.attempt}/{retryState.maxAttempts})...
+
+
+ )}
+
+ {/* Error display - shown for non-retryable errors */}
+ {error && !isLoading && !retryState?.isRetrying && (
+
+
+
+ {error}
+ {retryState &&
+ !retryState.isRetrying &&
+ retryState.attempt > 0 && (
+
+ Failed after {retryState.attempt} attempt
+ {retryState.attempt > 1 ? "s" : ""}
+
+ )}
+
)}
diff --git a/src/hooks/useChatSession.ts b/src/hooks/useChatSession.ts
index 6d25044..ee6a564 100644
--- a/src/hooks/useChatSession.ts
+++ b/src/hooks/useChatSession.ts
@@ -43,6 +43,9 @@ interface UseChatSessionResult {
// Resume state
canResume: boolean;
finishReason: ChatSessionState["finishReason"];
+
+ // Retry state
+ retryState: ChatSessionState["retryState"];
}
/**
@@ -118,6 +121,9 @@ export function useChatSession(
(session.finishReason === null && session.streamingContent)),
),
finishReason: session?.finishReason,
+
+ // Retry state
+ retryState: session?.retryState,
}),
[conversation, session],
);
diff --git a/src/services/llm/error-handling.ts b/src/services/llm/error-handling.ts
new file mode 100644
index 0000000..49db97e
--- /dev/null
+++ b/src/services/llm/error-handling.ts
@@ -0,0 +1,299 @@
+/**
+ * LLM Error Handling
+ *
+ * Centralized error handling with retry logic for LLM API calls.
+ * Supports exponential backoff, rate limit handling, and transient error recovery.
+ */
+
+// ─────────────────────────────────────────────────────────────
+// Error Types
+// ─────────────────────────────────────────────────────────────
+
+export interface LLMError {
+ /** Human-readable error message */
+ message: string;
+ /** HTTP status code if applicable */
+ status?: number;
+ /** Whether this error can be retried */
+ retryable: boolean;
+ /** Suggested wait time before retry (ms) */
+ retryAfter?: number;
+ /** Original error for debugging */
+ originalError?: unknown;
+}
+
+export type ErrorCategory =
+ | "auth" // 401, 403 - API key issues
+ | "billing" // 402 - Payment required
+ | "not_found" // 404 - Model not found
+ | "rate_limit" // 429 - Rate limited
+ | "server" // 5xx - Server errors
+ | "network" // Connection issues
+ | "timeout" // Request timeout
+ | "cancelled" // User cancelled
+ | "unknown"; // Catch-all
+
+// ─────────────────────────────────────────────────────────────
+// Error Detection
+// ─────────────────────────────────────────────────────────────
+
+/**
+ * Categorize an error for handling.
+ */
+export function categorizeError(error: unknown): ErrorCategory {
+ // Aborted requests
+ if (error instanceof DOMException && error.name === "AbortError") {
+ return "cancelled";
+ }
+
+ // Timeout
+ if (error instanceof DOMException && error.name === "TimeoutError") {
+ return "timeout";
+ }
+
+ // Check for HTTP status codes (OpenAI SDK error shape)
+ if (isAPIError(error)) {
+ const status = error.status;
+ if (status === 401 || status === 403) return "auth";
+ if (status === 402) return "billing";
+ if (status === 404) return "not_found";
+ if (status === 429) return "rate_limit";
+ if (status >= 500 && status < 600) return "server";
+ }
+
+ // Network errors
+ if (error instanceof TypeError && error.message.includes("fetch")) {
+ return "network";
+ }
+
+ if (error instanceof Error) {
+ const msg = error.message.toLowerCase();
+ if (msg.includes("network") || msg.includes("connection")) return "network";
+ if (msg.includes("timeout")) return "timeout";
+ if (msg.includes("rate") && msg.includes("limit")) return "rate_limit";
+ }
+
+ return "unknown";
+}
+
+/**
+ * Check if an error is from the OpenAI API (duck typing).
+ */
+function isAPIError(
+ error: unknown,
+): error is { status: number; message?: string; headers?: Headers } {
+ return (
+ error !== null &&
+ typeof error === "object" &&
+ "status" in error &&
+ typeof (error as { status: unknown }).status === "number"
+ );
+}
+
+/**
+ * Check if an error category is retryable.
+ */
+export function isRetryable(category: ErrorCategory): boolean {
+ return (
+ category === "rate_limit" ||
+ category === "server" ||
+ category === "network" ||
+ category === "timeout"
+ );
+}
+
+// ─────────────────────────────────────────────────────────────
+// Error Parsing
+// ─────────────────────────────────────────────────────────────
+
+/**
+ * Parse an error into a structured LLMError.
+ */
+export function parseError(error: unknown): LLMError {
+ const category = categorizeError(error);
+ const retryable = isRetryable(category);
+
+ // Extract retry-after from rate limit response
+ let retryAfter: number | undefined;
+ if (category === "rate_limit" && isAPIError(error)) {
+ retryAfter = extractRetryAfter(error);
+ }
+
+ // Default backoff for retryable errors without explicit retry-after
+ if (retryable && !retryAfter) {
+ retryAfter = getDefaultBackoff(category);
+ }
+
+ return {
+ message: getErrorMessage(error, category),
+ status: isAPIError(error) ? error.status : undefined,
+ retryable,
+ retryAfter,
+ originalError: error,
+ };
+}
+
+/**
+ * Extract retry-after header from API error.
+ */
+function extractRetryAfter(error: { headers?: Headers }): number | undefined {
+ if (!error.headers) return undefined;
+
+ const retryAfter = error.headers.get?.("retry-after");
+ if (!retryAfter) return undefined;
+
+ // Can be seconds or HTTP date
+ const seconds = parseInt(retryAfter, 10);
+ if (!isNaN(seconds)) {
+ return seconds * 1000;
+ }
+
+ // Try parsing as date
+ const date = Date.parse(retryAfter);
+ if (!isNaN(date)) {
+ return Math.max(0, date - Date.now());
+ }
+
+ return undefined;
+}
+
+/**
+ * Get default backoff time for a category.
+ */
+function getDefaultBackoff(category: ErrorCategory): number {
+ switch (category) {
+ case "rate_limit":
+ return 5000; // 5 seconds
+ case "server":
+ return 2000; // 2 seconds
+ case "network":
+ case "timeout":
+ return 1000; // 1 second
+ default:
+ return 1000;
+ }
+}
+
+/**
+ * Get user-friendly error message.
+ */
+function getErrorMessage(error: unknown, category: ErrorCategory): string {
+ switch (category) {
+ case "auth":
+ return "Invalid API key. Please check your credentials.";
+ case "billing":
+ return "Insufficient balance. Please top up your account.";
+ case "not_found":
+ return "Model not found. Please select a different model.";
+ case "rate_limit":
+ return "Rate limit exceeded. Retrying automatically...";
+ case "server":
+ return "Provider service is temporarily unavailable. Retrying...";
+ case "network":
+ return "Network error. Please check your connection.";
+ case "timeout":
+ return "Request timed out. Retrying...";
+ case "cancelled":
+ return "Request was cancelled.";
+ default:
+ if (error instanceof Error) {
+ return error.message;
+ }
+ return "An unknown error occurred.";
+ }
+}
+
+// ─────────────────────────────────────────────────────────────
+// Retry Logic
+// ─────────────────────────────────────────────────────────────
+
+export interface RetryConfig {
+ /** Maximum number of retry attempts */
+ maxRetries: number;
+ /** Base delay in milliseconds */
+ baseDelay: number;
+ /** Maximum delay in milliseconds */
+ maxDelay: number;
+ /** Jitter factor (0-1) to add randomness */
+ jitter: number;
+}
+
+export const DEFAULT_RETRY_CONFIG: RetryConfig = {
+ maxRetries: 3,
+ baseDelay: 1000,
+ maxDelay: 30000,
+ jitter: 0.2,
+};
+
+/**
+ * Calculate backoff delay with exponential increase and jitter.
+ */
+export function calculateBackoff(
+ attempt: number,
+ suggestedDelay?: number,
+ config: RetryConfig = DEFAULT_RETRY_CONFIG,
+): number {
+ // Use suggested delay if provided (e.g., from Retry-After header)
+ if (suggestedDelay && suggestedDelay > 0) {
+ return Math.min(suggestedDelay, config.maxDelay);
+ }
+
+ // Exponential backoff: baseDelay * 2^attempt
+ const exponentialDelay = config.baseDelay * Math.pow(2, attempt);
+
+ // Add jitter to prevent thundering herd
+ const jitterRange = exponentialDelay * config.jitter;
+ const jitter = Math.random() * jitterRange * 2 - jitterRange;
+
+ return Math.min(exponentialDelay + jitter, config.maxDelay);
+}
+
+/**
+ * Wait for the specified duration.
+ */
+export function sleep(ms: number, signal?: AbortSignal): Promise
{
+ return new Promise((resolve, reject) => {
+ if (signal?.aborted) {
+ reject(new DOMException("Aborted", "AbortError"));
+ return;
+ }
+
+ const timeout = setTimeout(resolve, ms);
+
+ signal?.addEventListener("abort", () => {
+ clearTimeout(timeout);
+ reject(new DOMException("Aborted", "AbortError"));
+ });
+ });
+}
+
+// ─────────────────────────────────────────────────────────────
+// Retry State for UI
+// ─────────────────────────────────────────────────────────────
+
+export interface RetryState {
+ /** Current retry attempt (0 = initial, 1+ = retry) */
+ attempt: number;
+ /** Maximum attempts allowed */
+ maxAttempts: number;
+ /** Whether currently waiting before retry */
+ waiting: boolean;
+ /** Time remaining until next retry (ms) */
+ waitTimeRemaining: number;
+ /** Last error that triggered retry */
+ lastError?: LLMError;
+}
+
+/**
+ * Create a retry state tracker for UI updates.
+ */
+export function createRetryState(
+ config: RetryConfig = DEFAULT_RETRY_CONFIG,
+): RetryState {
+ return {
+ attempt: 0,
+ maxAttempts: config.maxRetries + 1, // +1 for initial attempt
+ waiting: false,
+ waitTimeRemaining: 0,
+ };
+}
diff --git a/src/services/llm/provider-manager.ts b/src/services/llm/provider-manager.ts
index 64affef..7e058df 100644
--- a/src/services/llm/provider-manager.ts
+++ b/src/services/llm/provider-manager.ts
@@ -15,11 +15,14 @@ import type {
ChatStreamChunk,
ChatOptions,
} from "@/types/llm";
+import { createOpenAIClient, formatModelName } from "./openai-client";
import {
- createOpenAIClient,
- formatModelName,
- parseAPIError,
-} from "./openai-client";
+ parseError,
+ calculateBackoff,
+ sleep,
+ DEFAULT_RETRY_CONFIG,
+ type RetryConfig,
+} from "./error-handling";
import { AI_PROVIDER_PRESETS } from "@/lib/ai-provider-presets";
const MODEL_CACHE_TTL = 1000 * 60 * 5; // 5 minutes
@@ -237,12 +240,13 @@ class AIProviderManager {
/**
* Chat with a model (streaming).
+ * Includes automatic retry logic for transient errors.
*/
async *chat(
instanceId: string,
modelId: string,
messages: LLMMessage[],
- options: Omit,
+ options: Omit & { retryConfig?: RetryConfig },
): AsyncGenerator {
const instance = await db.llmProviders.get(instanceId);
if (!instance) {
@@ -255,108 +259,179 @@ class AIProviderManager {
return;
}
- try {
- const client = this.getClient(instance);
+ const retryConfig = options.retryConfig ?? DEFAULT_RETRY_CONFIG;
+ const maxAttempts = retryConfig.maxRetries + 1;
- // Format messages for OpenAI API
- const formattedMessages = this.formatMessages(messages);
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
+ try {
+ // Yield all chunks from the stream attempt
+ const streamResult = yield* this.streamChat(
+ instance,
+ modelId,
+ messages,
+ options,
+ );
- // Build request params
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- const params: any = {
- model: modelId,
- messages: formattedMessages,
- stream: true,
- stream_options: { include_usage: true },
- temperature: options.temperature ?? 0.7,
- max_tokens: options.maxTokens,
- };
+ // If we got here successfully, update usage tracking and return
+ if (streamResult.success) {
+ await db.llmProviders.update(instanceId, {
+ lastUsed: Date.now(),
+ lastModelId: modelId,
+ });
+ this.addRecentModel(instance.providerId, modelId);
+ return;
+ }
+ } catch (error) {
+ // Handle abort - don't retry
+ if (error instanceof DOMException && error.name === "AbortError") {
+ yield { type: "done" };
+ return;
+ }
- // Add tools if provided
- if (options.tools && options.tools.length > 0) {
- params.tools = options.tools;
- if (options.tool_choice) {
- params.tool_choice = options.tool_choice;
+ // Parse the error
+ const llmError = parseError(error);
+
+ // If not retryable or last attempt, yield error and stop
+ if (!llmError.retryable || attempt >= maxAttempts - 1) {
+ yield {
+ type: "error",
+ error: llmError.message,
+ retry: {
+ attempt: attempt + 1,
+ maxAttempts,
+ delayMs: 0,
+ retryable: false,
+ },
+ };
+ return;
+ }
+
+ // Calculate backoff delay
+ const delayMs = calculateBackoff(
+ attempt,
+ llmError.retryAfter,
+ retryConfig,
+ );
+
+ // Yield retry event so UI can show progress
+ yield {
+ type: "retry",
+ error: llmError.message,
+ retry: {
+ attempt: attempt + 1,
+ maxAttempts,
+ delayMs,
+ retryable: true,
+ },
+ };
+
+ // Wait before retrying (respects abort signal)
+ try {
+ await sleep(delayMs, options.signal);
+ } catch {
+ // Aborted during wait
+ yield { type: "done" };
+ return;
}
}
+ }
+ }
- const stream = (await client.chat.completions.create(params, {
- signal: options.signal,
- })) as unknown as AsyncIterable;
+ /**
+ * Internal: Stream a single chat completion attempt.
+ * Returns a result indicating success/failure.
+ */
+ private async *streamChat(
+ instance: LLMProviderInstance,
+ modelId: string,
+ messages: LLMMessage[],
+ options: Omit,
+ ): AsyncGenerator {
+ const client = this.getClient(instance);
- let usage: ChatStreamChunk["usage"] | undefined;
- let finishReason: ChatStreamChunk["finish_reason"] = null;
+ // Format messages for OpenAI API
+ const formattedMessages = this.formatMessages(messages);
- for await (const chunk of stream) {
- const choice = chunk.choices[0];
- if (!choice) continue;
+ // Build request params
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ const params: any = {
+ model: modelId,
+ messages: formattedMessages,
+ stream: true,
+ stream_options: { include_usage: true },
+ temperature: options.temperature ?? 0.7,
+ max_tokens: options.maxTokens,
+ };
- const delta = choice.delta;
+ // Add tools if provided
+ if (options.tools && options.tools.length > 0) {
+ params.tools = options.tools;
+ if (options.tool_choice) {
+ params.tool_choice = options.tool_choice;
+ }
+ }
- // Regular content
- if (delta?.content) {
- yield { type: "token", content: delta.content };
- }
+ const stream = (await client.chat.completions.create(params, {
+ signal: options.signal,
+ })) as unknown as AsyncIterable;
- // Extended thinking / reasoning (Claude, DeepSeek, etc.)
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- const reasoning = (delta as any)?.reasoning_content;
- if (reasoning) {
- yield { type: "reasoning", content: reasoning };
- }
+ let usage: ChatStreamChunk["usage"] | undefined;
+ let finishReason: ChatStreamChunk["finish_reason"] = null;
- // Tool calls (streamed incrementally)
- if (delta?.tool_calls) {
- for (const tc of delta.tool_calls) {
- yield {
- type: "tool_call",
- tool_call: {
- index: tc.index,
- id: tc.id,
- type: tc.type,
- function: tc.function
- ? {
- name: tc.function.name,
- arguments: tc.function.arguments,
- }
- : undefined,
- },
- };
- }
- }
+ for await (const chunk of stream) {
+ const choice = chunk.choices[0];
+ if (!choice) continue;
- // Capture finish reason
- if (choice.finish_reason) {
- finishReason =
- choice.finish_reason as ChatStreamChunk["finish_reason"];
- }
+ const delta = choice.delta;
- // Capture usage from final chunk
- if (chunk.usage) {
- usage = {
- promptTokens: chunk.usage.prompt_tokens,
- completionTokens: chunk.usage.completion_tokens,
+ // Regular content
+ if (delta?.content) {
+ yield { type: "token", content: delta.content };
+ }
+
+ // Extended thinking / reasoning (Claude, DeepSeek, etc.)
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ const reasoning = (delta as any)?.reasoning_content;
+ if (reasoning) {
+ yield { type: "reasoning", content: reasoning };
+ }
+
+ // Tool calls (streamed incrementally)
+ if (delta?.tool_calls) {
+ for (const tc of delta.tool_calls) {
+ yield {
+ type: "tool_call",
+ tool_call: {
+ index: tc.index,
+ id: tc.id,
+ type: tc.type,
+ function: tc.function
+ ? {
+ name: tc.function.name,
+ arguments: tc.function.arguments,
+ }
+ : undefined,
+ },
};
}
}
- yield { type: "done", usage, finish_reason: finishReason };
-
- // Update lastUsed and add to recent models
- await db.llmProviders.update(instanceId, {
- lastUsed: Date.now(),
- lastModelId: modelId,
- });
- this.addRecentModel(instance.providerId, modelId);
- } catch (error) {
- if (error instanceof DOMException && error.name === "AbortError") {
- yield { type: "done" };
- return;
+ // Capture finish reason
+ if (choice.finish_reason) {
+ finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
}
- const message = parseAPIError(error);
- yield { type: "error", error: message };
+ // Capture usage from final chunk
+ if (chunk.usage) {
+ usage = {
+ promptTokens: chunk.usage.prompt_tokens,
+ completionTokens: chunk.usage.completion_tokens,
+ };
+ }
}
+
+ yield { type: "done", usage, finish_reason: finishReason };
+ return { success: true };
}
/**
@@ -410,7 +485,8 @@ class AIProviderManager {
await client.models.list({ signal: AbortSignal.timeout(5000) });
return { success: true };
} catch (error) {
- return { success: false, error: parseAPIError(error) };
+ const llmError = parseError(error);
+ return { success: false, error: llmError.message };
}
}
}
diff --git a/src/services/llm/session-manager.ts b/src/services/llm/session-manager.ts
index 5935c1e..c2282d9 100644
--- a/src/services/llm/session-manager.ts
+++ b/src/services/llm/session-manager.ts
@@ -628,12 +628,53 @@ class ChatSessionManager {
streamingMessage: { ...streaming },
lastActivity: Date.now(),
});
+ } else if (chunk.type === "retry" && chunk.retry) {
+ // Transient error - retrying automatically
+ this.updateSession(conversationId, {
+ ...session,
+ retryState: {
+ attempt: chunk.retry.attempt,
+ maxAttempts: chunk.retry.maxAttempts,
+ isRetrying: true,
+ retryDelayMs: chunk.retry.delayMs,
+ },
+ lastError: chunk.error,
+ lastActivity: Date.now(),
+ });
+
+ // Emit error event for UI awareness (but we're handling it)
+ if (chunk.error) {
+ this.error$.next({
+ conversationId,
+ error: `${chunk.error} (retry ${chunk.retry.attempt}/${chunk.retry.maxAttempts})`,
+ });
+ }
} else if (chunk.type === "done") {
usage = chunk.usage;
if (chunk.finish_reason) {
finishReason = chunk.finish_reason;
}
+ // Clear retry state on success
+ const currentSession = this.getSession(conversationId);
+ if (currentSession?.retryState) {
+ this.updateSession(conversationId, {
+ ...currentSession,
+ retryState: undefined,
+ });
+ }
} else if (chunk.type === "error") {
+ // Check if this is a non-retryable error with retry info
+ if (chunk.retry && !chunk.retry.retryable) {
+ this.updateSession(conversationId, {
+ ...session,
+ retryState: {
+ attempt: chunk.retry.attempt,
+ maxAttempts: chunk.retry.maxAttempts,
+ isRetrying: false,
+ retryDelayMs: 0,
+ },
+ });
+ }
throw new Error(chunk.error || "Unknown error");
}
}
diff --git a/src/types/llm.ts b/src/types/llm.ts
index 7a6d95f..93d6236 100644
--- a/src/types/llm.ts
+++ b/src/types/llm.ts
@@ -74,7 +74,7 @@ export interface AISettings {
// ─────────────────────────────────────────────────────────────
export interface ChatStreamChunk {
- type: "token" | "reasoning" | "tool_call" | "done" | "error";
+ type: "token" | "reasoning" | "tool_call" | "done" | "error" | "retry";
content?: string;
/** Streaming tool call delta */
tool_call?: StreamingToolCall;
@@ -85,6 +85,17 @@ export interface ChatStreamChunk {
promptTokens: number;
completionTokens: number;
};
+ /** Retry information for error recovery */
+ retry?: {
+ /** Current attempt number (1-based) */
+ attempt: number;
+ /** Maximum attempts allowed */
+ maxAttempts: number;
+ /** Delay before next retry (ms) */
+ delayMs: number;
+ /** Whether this error is retryable */
+ retryable: boolean;
+ };
}
/**
@@ -154,6 +165,18 @@ export interface ChatSessionState {
finishReason?: "stop" | "length" | "tool_calls" | "error" | null;
lastError?: string;
+ // Retry state for transient errors
+ retryState?: {
+ /** Current retry attempt (1-based) */
+ attempt: number;
+ /** Maximum attempts allowed */
+ maxAttempts: number;
+ /** Whether currently waiting before retry */
+ isRetrying: boolean;
+ /** Time remaining until next retry (ms) */
+ retryDelayMs: number;
+ };
+
// Reference counting - how many windows have this session open
subscriberCount: number;