feat: add production-ready error handling with retry logic

- Add error-handling.ts with centralized error parsing and categorization
- Implement exponential backoff with jitter for transient errors
- Handle rate limits (429) respecting Retry-After header
- Add retry state to ChatSessionState and ChatStreamChunk types
- Update provider-manager with automatic retry loop for:
  - Rate limit errors (429)
  - Server errors (5xx)
  - Network errors
  - Timeouts
- Add retry indicator UI in AIViewer showing retry progress
- Show detailed error messages with attempt count on failure

Retry configuration:
- Max 3 retries (4 total attempts)
- Exponential backoff: 1s, 2s, 4s (capped at 30s)
- 20% jitter to prevent thundering herd
- Respects abort signal during wait

https://claude.ai/code/session_01HqtD9R33oqfB14Gu1V5wHC
This commit is contained in:
Claude
2026-01-31 14:31:58 +00:00
parent 3f19138ce2
commit eca153efc9
6 changed files with 568 additions and 94 deletions

View File

@@ -20,6 +20,8 @@ import {
RefreshCw,
Play,
Sparkles,
AlertCircle,
RotateCw,
} from "lucide-react";
import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
@@ -256,8 +258,14 @@ function ChatPanel({
onConversationCreated: (id: string) => void;
}) {
// Session manager hooks
const { messages, isLoading, streamingContent, error, canResume } =
useChatSession(conversationId, { providerInstanceId, modelId });
const {
messages,
isLoading,
streamingContent,
error,
canResume,
retryState,
} = useChatSession(conversationId, { providerInstanceId, modelId });
const { sendMessage, createConversation, stopGeneration, resumeGeneration } =
useChatActions();
@@ -444,10 +452,31 @@ function ChatPanel({
</>
)}
{/* Error display */}
{error && !isLoading && (
<div className="text-sm text-destructive bg-destructive/10 rounded-lg px-3 py-2">
{error}
{/* Retry indicator - shown during automatic retry */}
{retryState?.isRetrying && (
<div className="flex items-center gap-2 text-sm text-muted-foreground bg-muted/50 rounded-lg px-3 py-2">
<RotateCw className="h-4 w-4 animate-spin" />
<span>
Retrying ({retryState.attempt}/{retryState.maxAttempts})...
</span>
</div>
)}
{/* Error display - shown for non-retryable errors */}
{error && !isLoading && !retryState?.isRetrying && (
<div className="flex items-start gap-2 text-sm bg-destructive/10 rounded-lg px-3 py-2">
<AlertCircle className="h-4 w-4 text-destructive flex-shrink-0 mt-0.5" />
<div className="flex-1">
<span className="text-destructive">{error}</span>
{retryState &&
!retryState.isRetrying &&
retryState.attempt > 0 && (
<span className="text-muted-foreground text-xs block mt-1">
Failed after {retryState.attempt} attempt
{retryState.attempt > 1 ? "s" : ""}
</span>
)}
</div>
</div>
)}

View File

@@ -43,6 +43,9 @@ interface UseChatSessionResult {
// Resume state
canResume: boolean;
finishReason: ChatSessionState["finishReason"];
// Retry state
retryState: ChatSessionState["retryState"];
}
/**
@@ -118,6 +121,9 @@ export function useChatSession(
(session.finishReason === null && session.streamingContent)),
),
finishReason: session?.finishReason,
// Retry state
retryState: session?.retryState,
}),
[conversation, session],
);

View File

@@ -0,0 +1,299 @@
/**
* LLM Error Handling
*
* Centralized error handling with retry logic for LLM API calls.
* Supports exponential backoff, rate limit handling, and transient error recovery.
*/
// ─────────────────────────────────────────────────────────────
// Error Types
// ─────────────────────────────────────────────────────────────
export interface LLMError {
/** Human-readable error message */
message: string;
/** HTTP status code if applicable */
status?: number;
/** Whether this error can be retried */
retryable: boolean;
/** Suggested wait time before retry (ms) */
retryAfter?: number;
/** Original error for debugging */
originalError?: unknown;
}
export type ErrorCategory =
| "auth" // 401, 403 - API key issues
| "billing" // 402 - Payment required
| "not_found" // 404 - Model not found
| "rate_limit" // 429 - Rate limited
| "server" // 5xx - Server errors
| "network" // Connection issues
| "timeout" // Request timeout
| "cancelled" // User cancelled
| "unknown"; // Catch-all
// ─────────────────────────────────────────────────────────────
// Error Detection
// ─────────────────────────────────────────────────────────────
/**
* Categorize an error for handling.
*/
export function categorizeError(error: unknown): ErrorCategory {
// Aborted requests
if (error instanceof DOMException && error.name === "AbortError") {
return "cancelled";
}
// Timeout
if (error instanceof DOMException && error.name === "TimeoutError") {
return "timeout";
}
// Check for HTTP status codes (OpenAI SDK error shape)
if (isAPIError(error)) {
const status = error.status;
if (status === 401 || status === 403) return "auth";
if (status === 402) return "billing";
if (status === 404) return "not_found";
if (status === 429) return "rate_limit";
if (status >= 500 && status < 600) return "server";
}
// Network errors
if (error instanceof TypeError && error.message.includes("fetch")) {
return "network";
}
if (error instanceof Error) {
const msg = error.message.toLowerCase();
if (msg.includes("network") || msg.includes("connection")) return "network";
if (msg.includes("timeout")) return "timeout";
if (msg.includes("rate") && msg.includes("limit")) return "rate_limit";
}
return "unknown";
}
/**
* Check if an error is from the OpenAI API (duck typing).
*/
function isAPIError(
error: unknown,
): error is { status: number; message?: string; headers?: Headers } {
return (
error !== null &&
typeof error === "object" &&
"status" in error &&
typeof (error as { status: unknown }).status === "number"
);
}
/**
* Check if an error category is retryable.
*/
export function isRetryable(category: ErrorCategory): boolean {
return (
category === "rate_limit" ||
category === "server" ||
category === "network" ||
category === "timeout"
);
}
// ─────────────────────────────────────────────────────────────
// Error Parsing
// ─────────────────────────────────────────────────────────────
/**
* Parse an error into a structured LLMError.
*/
export function parseError(error: unknown): LLMError {
const category = categorizeError(error);
const retryable = isRetryable(category);
// Extract retry-after from rate limit response
let retryAfter: number | undefined;
if (category === "rate_limit" && isAPIError(error)) {
retryAfter = extractRetryAfter(error);
}
// Default backoff for retryable errors without explicit retry-after
if (retryable && !retryAfter) {
retryAfter = getDefaultBackoff(category);
}
return {
message: getErrorMessage(error, category),
status: isAPIError(error) ? error.status : undefined,
retryable,
retryAfter,
originalError: error,
};
}
/**
* Extract retry-after header from API error.
*/
function extractRetryAfter(error: { headers?: Headers }): number | undefined {
if (!error.headers) return undefined;
const retryAfter = error.headers.get?.("retry-after");
if (!retryAfter) return undefined;
// Can be seconds or HTTP date
const seconds = parseInt(retryAfter, 10);
if (!isNaN(seconds)) {
return seconds * 1000;
}
// Try parsing as date
const date = Date.parse(retryAfter);
if (!isNaN(date)) {
return Math.max(0, date - Date.now());
}
return undefined;
}
/**
* Get default backoff time for a category.
*/
function getDefaultBackoff(category: ErrorCategory): number {
switch (category) {
case "rate_limit":
return 5000; // 5 seconds
case "server":
return 2000; // 2 seconds
case "network":
case "timeout":
return 1000; // 1 second
default:
return 1000;
}
}
/**
* Get user-friendly error message.
*/
function getErrorMessage(error: unknown, category: ErrorCategory): string {
switch (category) {
case "auth":
return "Invalid API key. Please check your credentials.";
case "billing":
return "Insufficient balance. Please top up your account.";
case "not_found":
return "Model not found. Please select a different model.";
case "rate_limit":
return "Rate limit exceeded. Retrying automatically...";
case "server":
return "Provider service is temporarily unavailable. Retrying...";
case "network":
return "Network error. Please check your connection.";
case "timeout":
return "Request timed out. Retrying...";
case "cancelled":
return "Request was cancelled.";
default:
if (error instanceof Error) {
return error.message;
}
return "An unknown error occurred.";
}
}
// ─────────────────────────────────────────────────────────────
// Retry Logic
// ─────────────────────────────────────────────────────────────
export interface RetryConfig {
/** Maximum number of retry attempts */
maxRetries: number;
/** Base delay in milliseconds */
baseDelay: number;
/** Maximum delay in milliseconds */
maxDelay: number;
/** Jitter factor (0-1) to add randomness */
jitter: number;
}
export const DEFAULT_RETRY_CONFIG: RetryConfig = {
maxRetries: 3,
baseDelay: 1000,
maxDelay: 30000,
jitter: 0.2,
};
/**
* Calculate backoff delay with exponential increase and jitter.
*/
export function calculateBackoff(
attempt: number,
suggestedDelay?: number,
config: RetryConfig = DEFAULT_RETRY_CONFIG,
): number {
// Use suggested delay if provided (e.g., from Retry-After header)
if (suggestedDelay && suggestedDelay > 0) {
return Math.min(suggestedDelay, config.maxDelay);
}
// Exponential backoff: baseDelay * 2^attempt
const exponentialDelay = config.baseDelay * Math.pow(2, attempt);
// Add jitter to prevent thundering herd
const jitterRange = exponentialDelay * config.jitter;
const jitter = Math.random() * jitterRange * 2 - jitterRange;
return Math.min(exponentialDelay + jitter, config.maxDelay);
}
/**
* Wait for the specified duration.
*/
export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
return new Promise((resolve, reject) => {
if (signal?.aborted) {
reject(new DOMException("Aborted", "AbortError"));
return;
}
const timeout = setTimeout(resolve, ms);
signal?.addEventListener("abort", () => {
clearTimeout(timeout);
reject(new DOMException("Aborted", "AbortError"));
});
});
}
// ─────────────────────────────────────────────────────────────
// Retry State for UI
// ─────────────────────────────────────────────────────────────
export interface RetryState {
/** Current retry attempt (0 = initial, 1+ = retry) */
attempt: number;
/** Maximum attempts allowed */
maxAttempts: number;
/** Whether currently waiting before retry */
waiting: boolean;
/** Time remaining until next retry (ms) */
waitTimeRemaining: number;
/** Last error that triggered retry */
lastError?: LLMError;
}
/**
* Create a retry state tracker for UI updates.
*/
export function createRetryState(
config: RetryConfig = DEFAULT_RETRY_CONFIG,
): RetryState {
return {
attempt: 0,
maxAttempts: config.maxRetries + 1, // +1 for initial attempt
waiting: false,
waitTimeRemaining: 0,
};
}

View File

@@ -15,11 +15,14 @@ import type {
ChatStreamChunk,
ChatOptions,
} from "@/types/llm";
import { createOpenAIClient, formatModelName } from "./openai-client";
import {
createOpenAIClient,
formatModelName,
parseAPIError,
} from "./openai-client";
parseError,
calculateBackoff,
sleep,
DEFAULT_RETRY_CONFIG,
type RetryConfig,
} from "./error-handling";
import { AI_PROVIDER_PRESETS } from "@/lib/ai-provider-presets";
const MODEL_CACHE_TTL = 1000 * 60 * 5; // 5 minutes
@@ -237,12 +240,13 @@ class AIProviderManager {
/**
* Chat with a model (streaming).
* Includes automatic retry logic for transient errors.
*/
async *chat(
instanceId: string,
modelId: string,
messages: LLMMessage[],
options: Omit<ChatOptions, "model">,
options: Omit<ChatOptions, "model"> & { retryConfig?: RetryConfig },
): AsyncGenerator<ChatStreamChunk> {
const instance = await db.llmProviders.get(instanceId);
if (!instance) {
@@ -255,108 +259,179 @@ class AIProviderManager {
return;
}
try {
const client = this.getClient(instance);
const retryConfig = options.retryConfig ?? DEFAULT_RETRY_CONFIG;
const maxAttempts = retryConfig.maxRetries + 1;
// Format messages for OpenAI API
const formattedMessages = this.formatMessages(messages);
for (let attempt = 0; attempt < maxAttempts; attempt++) {
try {
// Yield all chunks from the stream attempt
const streamResult = yield* this.streamChat(
instance,
modelId,
messages,
options,
);
// Build request params
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const params: any = {
model: modelId,
messages: formattedMessages,
stream: true,
stream_options: { include_usage: true },
temperature: options.temperature ?? 0.7,
max_tokens: options.maxTokens,
};
// If we got here successfully, update usage tracking and return
if (streamResult.success) {
await db.llmProviders.update(instanceId, {
lastUsed: Date.now(),
lastModelId: modelId,
});
this.addRecentModel(instance.providerId, modelId);
return;
}
} catch (error) {
// Handle abort - don't retry
if (error instanceof DOMException && error.name === "AbortError") {
yield { type: "done" };
return;
}
// Add tools if provided
if (options.tools && options.tools.length > 0) {
params.tools = options.tools;
if (options.tool_choice) {
params.tool_choice = options.tool_choice;
// Parse the error
const llmError = parseError(error);
// If not retryable or last attempt, yield error and stop
if (!llmError.retryable || attempt >= maxAttempts - 1) {
yield {
type: "error",
error: llmError.message,
retry: {
attempt: attempt + 1,
maxAttempts,
delayMs: 0,
retryable: false,
},
};
return;
}
// Calculate backoff delay
const delayMs = calculateBackoff(
attempt,
llmError.retryAfter,
retryConfig,
);
// Yield retry event so UI can show progress
yield {
type: "retry",
error: llmError.message,
retry: {
attempt: attempt + 1,
maxAttempts,
delayMs,
retryable: true,
},
};
// Wait before retrying (respects abort signal)
try {
await sleep(delayMs, options.signal);
} catch {
// Aborted during wait
yield { type: "done" };
return;
}
}
}
}
const stream = (await client.chat.completions.create(params, {
signal: options.signal,
})) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
/**
* Internal: Stream a single chat completion attempt.
* Returns a result indicating success/failure.
*/
private async *streamChat(
instance: LLMProviderInstance,
modelId: string,
messages: LLMMessage[],
options: Omit<ChatOptions, "model">,
): AsyncGenerator<ChatStreamChunk, { success: boolean }> {
const client = this.getClient(instance);
let usage: ChatStreamChunk["usage"] | undefined;
let finishReason: ChatStreamChunk["finish_reason"] = null;
// Format messages for OpenAI API
const formattedMessages = this.formatMessages(messages);
for await (const chunk of stream) {
const choice = chunk.choices[0];
if (!choice) continue;
// Build request params
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const params: any = {
model: modelId,
messages: formattedMessages,
stream: true,
stream_options: { include_usage: true },
temperature: options.temperature ?? 0.7,
max_tokens: options.maxTokens,
};
const delta = choice.delta;
// Add tools if provided
if (options.tools && options.tools.length > 0) {
params.tools = options.tools;
if (options.tool_choice) {
params.tool_choice = options.tool_choice;
}
}
// Regular content
if (delta?.content) {
yield { type: "token", content: delta.content };
}
const stream = (await client.chat.completions.create(params, {
signal: options.signal,
})) as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>;
// Extended thinking / reasoning (Claude, DeepSeek, etc.)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const reasoning = (delta as any)?.reasoning_content;
if (reasoning) {
yield { type: "reasoning", content: reasoning };
}
let usage: ChatStreamChunk["usage"] | undefined;
let finishReason: ChatStreamChunk["finish_reason"] = null;
// Tool calls (streamed incrementally)
if (delta?.tool_calls) {
for (const tc of delta.tool_calls) {
yield {
type: "tool_call",
tool_call: {
index: tc.index,
id: tc.id,
type: tc.type,
function: tc.function
? {
name: tc.function.name,
arguments: tc.function.arguments,
}
: undefined,
},
};
}
}
for await (const chunk of stream) {
const choice = chunk.choices[0];
if (!choice) continue;
// Capture finish reason
if (choice.finish_reason) {
finishReason =
choice.finish_reason as ChatStreamChunk["finish_reason"];
}
const delta = choice.delta;
// Capture usage from final chunk
if (chunk.usage) {
usage = {
promptTokens: chunk.usage.prompt_tokens,
completionTokens: chunk.usage.completion_tokens,
// Regular content
if (delta?.content) {
yield { type: "token", content: delta.content };
}
// Extended thinking / reasoning (Claude, DeepSeek, etc.)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const reasoning = (delta as any)?.reasoning_content;
if (reasoning) {
yield { type: "reasoning", content: reasoning };
}
// Tool calls (streamed incrementally)
if (delta?.tool_calls) {
for (const tc of delta.tool_calls) {
yield {
type: "tool_call",
tool_call: {
index: tc.index,
id: tc.id,
type: tc.type,
function: tc.function
? {
name: tc.function.name,
arguments: tc.function.arguments,
}
: undefined,
},
};
}
}
yield { type: "done", usage, finish_reason: finishReason };
// Update lastUsed and add to recent models
await db.llmProviders.update(instanceId, {
lastUsed: Date.now(),
lastModelId: modelId,
});
this.addRecentModel(instance.providerId, modelId);
} catch (error) {
if (error instanceof DOMException && error.name === "AbortError") {
yield { type: "done" };
return;
// Capture finish reason
if (choice.finish_reason) {
finishReason = choice.finish_reason as ChatStreamChunk["finish_reason"];
}
const message = parseAPIError(error);
yield { type: "error", error: message };
// Capture usage from final chunk
if (chunk.usage) {
usage = {
promptTokens: chunk.usage.prompt_tokens,
completionTokens: chunk.usage.completion_tokens,
};
}
}
yield { type: "done", usage, finish_reason: finishReason };
return { success: true };
}
/**
@@ -410,7 +485,8 @@ class AIProviderManager {
await client.models.list({ signal: AbortSignal.timeout(5000) });
return { success: true };
} catch (error) {
return { success: false, error: parseAPIError(error) };
const llmError = parseError(error);
return { success: false, error: llmError.message };
}
}
}

View File

@@ -628,12 +628,53 @@ class ChatSessionManager {
streamingMessage: { ...streaming },
lastActivity: Date.now(),
});
} else if (chunk.type === "retry" && chunk.retry) {
// Transient error - retrying automatically
this.updateSession(conversationId, {
...session,
retryState: {
attempt: chunk.retry.attempt,
maxAttempts: chunk.retry.maxAttempts,
isRetrying: true,
retryDelayMs: chunk.retry.delayMs,
},
lastError: chunk.error,
lastActivity: Date.now(),
});
// Emit error event for UI awareness (but we're handling it)
if (chunk.error) {
this.error$.next({
conversationId,
error: `${chunk.error} (retry ${chunk.retry.attempt}/${chunk.retry.maxAttempts})`,
});
}
} else if (chunk.type === "done") {
usage = chunk.usage;
if (chunk.finish_reason) {
finishReason = chunk.finish_reason;
}
// Clear retry state on success
const currentSession = this.getSession(conversationId);
if (currentSession?.retryState) {
this.updateSession(conversationId, {
...currentSession,
retryState: undefined,
});
}
} else if (chunk.type === "error") {
// Check if this is a non-retryable error with retry info
if (chunk.retry && !chunk.retry.retryable) {
this.updateSession(conversationId, {
...session,
retryState: {
attempt: chunk.retry.attempt,
maxAttempts: chunk.retry.maxAttempts,
isRetrying: false,
retryDelayMs: 0,
},
});
}
throw new Error(chunk.error || "Unknown error");
}
}

View File

@@ -74,7 +74,7 @@ export interface AISettings {
// ─────────────────────────────────────────────────────────────
export interface ChatStreamChunk {
type: "token" | "reasoning" | "tool_call" | "done" | "error";
type: "token" | "reasoning" | "tool_call" | "done" | "error" | "retry";
content?: string;
/** Streaming tool call delta */
tool_call?: StreamingToolCall;
@@ -85,6 +85,17 @@ export interface ChatStreamChunk {
promptTokens: number;
completionTokens: number;
};
/** Retry information for error recovery */
retry?: {
/** Current attempt number (1-based) */
attempt: number;
/** Maximum attempts allowed */
maxAttempts: number;
/** Delay before next retry (ms) */
delayMs: number;
/** Whether this error is retryable */
retryable: boolean;
};
}
/**
@@ -154,6 +165,18 @@ export interface ChatSessionState {
finishReason?: "stop" | "length" | "tool_calls" | "error" | null;
lastError?: string;
// Retry state for transient errors
retryState?: {
/** Current retry attempt (1-based) */
attempt: number;
/** Maximum attempts allowed */
maxAttempts: number;
/** Whether currently waiting before retry */
isRetrying: boolean;
/** Time remaining until next retry (ms) */
retryDelayMs: number;
};
// Reference counting - how many windows have this session open
subscriberCount: number;