From bebb4ed83411eb9480b4dcd972ec141c8fdfd729 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 22 Dec 2025 15:59:00 +0000 Subject: [PATCH] docs: add comprehensive ReqViewer state machine analysis and improvement plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analysis document: - Identified critical bug in applesauce-relay catchError handling - Documented 7 edge cases causing "LIVE with 0 relays" issue - Root cause: relay disconnections treated as EOSE messages - Detailed Nostr protocol semantics and applesauce behavior Implementation plan: - Hybrid approach: RelayStateManager + event metadata tracking - New state types: ReqRelayState, ReqOverallState - Enhanced hook: useReqTimelineEnhanced with per-relay tracking - 3-phase rollout: infrastructure → UI → testing - Comprehensive state machine with 8 query states, 8 relay states This provides the foundation for production-quality REQ status tracking that accurately handles disconnections, timeouts, and partial failures. --- docs/req-viewer-improvement-plan.md | 1047 +++++++++++++++++++++++++++ docs/req-viewer-state-analysis.md | 787 ++++++++++++++++++++ 2 files changed, 1834 insertions(+) create mode 100644 docs/req-viewer-improvement-plan.md create mode 100644 docs/req-viewer-state-analysis.md diff --git a/docs/req-viewer-improvement-plan.md b/docs/req-viewer-improvement-plan.md new file mode 100644 index 0000000..c844ec4 --- /dev/null +++ b/docs/req-viewer-improvement-plan.md @@ -0,0 +1,1047 @@ +# ReqViewer State Machine Improvement Plan + +**Date**: 2025-12-22 +**Goal**: Production-quality REQ status tracking with accurate relay state information + +## Overview + +This plan details the implementation of a robust state machine for ReqViewer that accurately tracks per-relay and overall query status, handles edge cases, and provides production-quality user feedback. + +**See**: `req-viewer-state-analysis.md` for detailed problem analysis. + +## Solution Architecture + +### Hybrid Approach: Connection State + Event Tracking + +We'll combine two sources of truth: +1. **RelayStateManager**: Tracks WebSocket connection state per relay +2. **Event Metadata**: Tracks which relay sent which events (via `_relay` property) + +This hybrid approach avoids duplicate subscriptions while providing accurate status tracking. + +## Implementation Tasks + +### Phase 1: Core Infrastructure + +#### Task 1.1: Create Per-Relay State Tracking Types + +**File**: `src/types/req-state.ts` (NEW) + +```typescript +/** + * Connection state from RelayStateManager + */ +export type RelayConnectionState = + | 'pending' // Not yet attempted + | 'connecting' // Connection in progress + | 'connected' // WebSocket connected + | 'disconnected' // Disconnected (expected or unexpected) + | 'error'; // Connection error + +/** + * Subscription state specific to this REQ + */ +export type RelaySubscriptionState = + | 'waiting' // Connected but no events yet + | 'receiving' // Events being received + | 'eose' // EOSE received (real or timeout) + | 'error'; // Subscription error + +/** + * Per-relay state for a single REQ subscription + */ +export interface ReqRelayState { + url: string; + + // Connection state (from RelayStateManager) + connectionState: RelayConnectionState; + + // Subscription state (tracked by us) + subscriptionState: RelaySubscriptionState; + + // Event tracking + eventCount: number; + firstEventAt?: number; + lastEventAt?: number; + + // Timing + connectedAt?: number; + eoseAt?: number; + disconnectedAt?: number; + + // Error handling + errorMessage?: string; + errorType?: 'connection' | 'protocol' | 'timeout' | 'auth'; +} + +/** + * Overall query state derived from individual relay states + */ +export type ReqOverallStatus = + | 'discovering' // Selecting relays (NIP-65) + | 'connecting' // Waiting for first relay to connect + | 'loading' // Loading initial events + | 'live' // Streaming after EOSE, relays connected + | 'partial' // Some relays ok, some failed + | 'closed' // All relays completed and closed + | 'failed' // All relays failed + | 'offline'; // All relays disconnected after being live + +/** + * Aggregated state for the entire query + */ +export interface ReqOverallState { + status: ReqOverallStatus; + + // Relay counts + totalRelays: number; + connectedCount: number; + receivingCount: number; + eoseCount: number; + errorCount: number; + disconnectedCount: number; + + // Timing + queryStartedAt: number; + firstEventAt?: number; + allEoseAt?: number; + + // Flags + hasReceivedEvents: boolean; + hasActiveRelays: boolean; + allRelaysFailed: boolean; +} +``` + +**Tests**: `src/types/req-state.test.ts` +- Type checking only, no runtime tests needed + +--- + +#### Task 1.2: Create State Derivation Logic + +**File**: `src/lib/req-state-machine.ts` (NEW) + +```typescript +import type { ReqRelayState, ReqOverallState, ReqOverallStatus } from '@/types/req-state'; + +/** + * Derive overall query status from individual relay states + */ +export function deriveOverallState( + relayStates: Map, + overallEoseReceived: boolean, + isStreaming: boolean, + queryStartedAt: number, +): ReqOverallState { + const states = Array.from(relayStates.values()); + + // Count relay states + const totalRelays = states.length; + const connectedCount = states.filter(s => s.connectionState === 'connected').length; + const receivingCount = states.filter(s => s.subscriptionState === 'receiving').length; + const eoseCount = states.filter(s => s.subscriptionState === 'eose').length; + const errorCount = states.filter(s => s.connectionState === 'error').length; + const disconnectedCount = states.filter(s => s.connectionState === 'disconnected').length; + + // Calculate flags + const hasReceivedEvents = states.some(s => s.eventCount > 0); + const hasActiveRelays = connectedCount > 0; + const allRelaysFailed = totalRelays > 0 && errorCount === totalRelays; + const allDisconnected = totalRelays > 0 && + (disconnectedCount + errorCount) === totalRelays; + + // Timing + const firstEventAt = states + .map(s => s.firstEventAt) + .filter((t): t is number => t !== undefined) + .sort((a, b) => a - b)[0]; + + const allEoseAt = overallEoseReceived ? Date.now() : undefined; + + // Derive status + const status: ReqOverallStatus = (() => { + // No relays selected yet + if (totalRelays === 0) { + return 'discovering'; + } + + // All relays failed to connect + if (allRelaysFailed && !hasReceivedEvents) { + return 'failed'; + } + + // No relays connected, none have sent events + if (!hasActiveRelays && !hasReceivedEvents) { + return 'connecting'; + } + + // Had events, had connections, but all disconnected now + if (allDisconnected && hasReceivedEvents && overallEoseReceived) { + if (isStreaming) { + return 'offline'; // Was live, now offline + } else { + return 'closed'; // Completed and closed + } + } + + // EOSE not received yet, loading initial data + if (!overallEoseReceived) { + return 'loading'; + } + + // EOSE received, streaming mode, relays still connected + if (overallEoseReceived && isStreaming && hasActiveRelays) { + return 'live'; + } + + // EOSE received, but not all relays healthy + if (overallEoseReceived && (errorCount > 0 || disconnectedCount > 0)) { + if (hasActiveRelays) { + return 'partial'; // Some working, some not + } else { + return 'offline'; // All disconnected after EOSE + } + } + + // EOSE received, not streaming, all done + if (overallEoseReceived && !isStreaming) { + return 'closed'; + } + + // Default fallback + return 'loading'; + })(); + + return { + status, + totalRelays, + connectedCount, + receivingCount, + eoseCount, + errorCount, + disconnectedCount, + hasReceivedEvents, + hasActiveRelays, + allRelaysFailed, + queryStartedAt, + firstEventAt, + allEoseAt, + }; +} + +/** + * Get user-friendly status text + */ +export function getStatusText(state: ReqOverallState): string { + switch (state.status) { + case 'discovering': + return 'DISCOVERING RELAYS'; + case 'connecting': + return 'CONNECTING'; + case 'loading': + return state.hasReceivedEvents ? 'LOADING' : 'WAITING'; + case 'live': + return 'LIVE'; + case 'partial': + return `PARTIAL (${state.connectedCount}/${state.totalRelays})`; + case 'offline': + return 'OFFLINE'; + case 'closed': + return 'CLOSED'; + case 'failed': + return 'FAILED'; + } +} + +/** + * Get status indicator color + */ +export function getStatusColor(status: ReqOverallStatus): string { + switch (status) { + case 'discovering': + case 'connecting': + case 'loading': + return 'text-yellow-500'; + case 'live': + case 'partial': + return 'text-green-500'; + case 'closed': + return 'text-muted-foreground'; + case 'offline': + case 'failed': + return 'text-red-500'; + } +} + +/** + * Should status indicator pulse/animate? + */ +export function shouldAnimate(status: ReqOverallStatus): boolean { + return ['discovering', 'connecting', 'loading', 'live'].includes(status); +} +``` + +**Tests**: `src/lib/req-state-machine.test.ts` + +```typescript +import { describe, it, expect } from 'vitest'; +import { deriveOverallState } from './req-state-machine'; +import type { ReqRelayState } from '@/types/req-state'; + +describe('deriveOverallState', () => { + const queryStartedAt = Date.now(); + + describe('discovering state', () => { + it('should return discovering when no relays', () => { + const state = deriveOverallState(new Map(), false, false, queryStartedAt); + expect(state.status).toBe('discovering'); + }); + }); + + describe('connecting state', () => { + it('should return connecting when relays pending', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'pending', + subscriptionState: 'waiting', + eventCount: 0, + }], + ]); + const state = deriveOverallState(relays, false, false, queryStartedAt); + expect(state.status).toBe('connecting'); + }); + }); + + describe('failed state', () => { + it('should return failed when all relays error with no events', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'error', + subscriptionState: 'error', + eventCount: 0, + }], + ['wss://relay2.com', { + url: 'wss://relay2.com', + connectionState: 'error', + subscriptionState: 'error', + eventCount: 0, + }], + ]); + const state = deriveOverallState(relays, false, false, queryStartedAt); + expect(state.status).toBe('failed'); + expect(state.allRelaysFailed).toBe(true); + }); + }); + + describe('loading state', () => { + it('should return loading when connected but no EOSE', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'connected', + subscriptionState: 'receiving', + eventCount: 5, + }], + ]); + const state = deriveOverallState(relays, false, false, queryStartedAt); + expect(state.status).toBe('loading'); + expect(state.hasReceivedEvents).toBe(true); + }); + }); + + describe('live state', () => { + it('should return live when EOSE + streaming + connected', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'connected', + subscriptionState: 'eose', + eventCount: 10, + }], + ]); + const state = deriveOverallState(relays, true, true, queryStartedAt); + expect(state.status).toBe('live'); + expect(state.hasActiveRelays).toBe(true); + }); + }); + + describe('offline state', () => { + it('should return offline when all disconnected after EOSE in streaming', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'disconnected', + subscriptionState: 'eose', + eventCount: 10, + }], + ['wss://relay2.com', { + url: 'wss://relay2.com', + connectionState: 'disconnected', + subscriptionState: 'eose', + eventCount: 5, + }], + ]); + const state = deriveOverallState(relays, true, true, queryStartedAt); + expect(state.status).toBe('offline'); + expect(state.hasActiveRelays).toBe(false); + expect(state.hasReceivedEvents).toBe(true); + }); + }); + + describe('partial state', () => { + it('should return partial when some relays ok, some failed', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'connected', + subscriptionState: 'eose', + eventCount: 10, + }], + ['wss://relay2.com', { + url: 'wss://relay2.com', + connectionState: 'error', + subscriptionState: 'error', + eventCount: 0, + }], + ]); + const state = deriveOverallState(relays, true, true, queryStartedAt); + expect(state.status).toBe('partial'); + expect(state.connectedCount).toBe(1); + expect(state.errorCount).toBe(1); + }); + }); + + describe('closed state', () => { + it('should return closed when EOSE + not streaming', () => { + const relays = new Map([ + ['wss://relay1.com', { + url: 'wss://relay1.com', + connectionState: 'disconnected', + subscriptionState: 'eose', + eventCount: 10, + }], + ]); + const state = deriveOverallState(relays, true, false, queryStartedAt); + expect(state.status).toBe('closed'); + }); + }); +}); +``` + +--- + +#### Task 1.3: Create Enhanced Timeline Hook + +**File**: `src/hooks/useReqTimelineEnhanced.ts` (NEW) + +```typescript +import { useState, useEffect, useMemo, useRef } from "react"; +import pool from "@/services/relay-pool"; +import type { NostrEvent, Filter } from "nostr-tools"; +import { useEventStore } from "applesauce-react/hooks"; +import { isNostrEvent } from "@/lib/type-guards"; +import { useStableValue, useStableArray } from "./useStable"; +import { useRelayState } from "./useRelayState"; +import type { ReqRelayState, ReqOverallState } from "@/types/req-state"; +import { deriveOverallState } from "@/lib/req-state-machine"; + +interface UseReqTimelineEnhancedOptions { + limit?: number; + stream?: boolean; +} + +interface UseReqTimelineEnhancedReturn { + events: NostrEvent[]; + loading: boolean; + error: Error | null; + eoseReceived: boolean; + + // Enhanced state tracking + relayStates: Map; + overallState: ReqOverallState; +} + +/** + * Enhanced REQ timeline hook with per-relay state tracking + * + * Combines: + * - Group subscription for events (with deduplication) + * - RelayStateManager for connection state + * - Event metadata for relay-specific tracking + * + * @param id - Unique identifier for this timeline + * @param filters - Nostr filter(s) + * @param relays - Array of relay URLs + * @param options - Stream mode, limit, etc. + */ +export function useReqTimelineEnhanced( + id: string, + filters: Filter | Filter[], + relays: string[], + options: UseReqTimelineEnhancedOptions = { limit: 50 } +): UseReqTimelineEnhancedReturn { + const eventStore = useEventStore(); + const { limit, stream = false } = options; + + // Existing state from useReqTimeline + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [eoseReceived, setEoseReceived] = useState(false); + const [eventsMap, setEventsMap] = useState>(new Map()); + + // New: Per-relay state tracking + const [relayStates, setRelayStates] = useState>(new Map()); + const queryStartedAt = useRef(Date.now()); + + // Get global relay connection states + const { relays: globalRelayStates } = useRelayState(); + + // Sort events by created_at + const events = useMemo(() => { + return Array.from(eventsMap.values()).sort( + (a, b) => b.created_at - a.created_at + ); + }, [eventsMap]); + + // Stabilize inputs + const stableFilters = useStableValue(filters); + const stableRelays = useStableArray(relays); + + // Initialize relay states when relays change + useEffect(() => { + queryStartedAt.current = Date.now(); + + const initialStates = new Map(); + for (const url of relays) { + initialStates.set(url, { + url, + connectionState: 'pending', + subscriptionState: 'waiting', + eventCount: 0, + }); + } + setRelayStates(initialStates); + }, [stableRelays]); + + // Sync connection states from RelayStateManager + useEffect(() => { + setRelayStates(prev => { + const next = new Map(prev); + let changed = false; + + for (const [url, state] of prev) { + const globalState = globalRelayStates[url]; + if (globalState && globalState.connectionState !== state.connectionState) { + next.set(url, { + ...state, + connectionState: globalState.connectionState as any, + connectedAt: globalState.lastConnected, + disconnectedAt: globalState.lastDisconnected, + }); + changed = true; + } + } + + return changed ? next : prev; + }); + }, [globalRelayStates]); + + // Subscribe to events + useEffect(() => { + if (relays.length === 0) { + setLoading(false); + return; + } + + setLoading(true); + setError(null); + setEoseReceived(false); + setEventsMap(new Map()); + + // Normalize filters + const filterArray = Array.isArray(filters) ? filters : [filters]; + const filtersWithLimit = filterArray.map(f => ({ + ...f, + limit: limit || f.limit, + })); + + const observable = pool.subscription(relays, filtersWithLimit, { + retries: 5, + reconnect: 5, + resubscribe: true, + eventStore, + }); + + const subscription = observable.subscribe( + (response) => { + if (typeof response === "string") { + // EOSE received + setEoseReceived(true); + if (!stream) { + setLoading(false); + } + + // Mark all connected relays as having received EOSE + // Note: We can't tell which relay sent EOSE due to applesauce bug + // So we mark all connected ones + setRelayStates(prev => { + const next = new Map(prev); + for (const [url, state] of prev) { + if (state.connectionState === 'connected') { + next.set(url, { + ...state, + subscriptionState: 'eose', + eoseAt: Date.now(), + }); + } + } + return next; + }); + } else if (isNostrEvent(response)) { + // Event received + const event = response as NostrEvent & { _relay?: string }; + const relayUrl = event._relay; + + // Store event + eventStore.add(event); + setEventsMap(prev => { + const next = new Map(prev); + next.set(event.id, event); + return next; + }); + + // Update relay state + if (relayUrl) { + setRelayStates(prev => { + const state = prev.get(relayUrl); + if (!state) return prev; + + const now = Date.now(); + const next = new Map(prev); + next.set(relayUrl, { + ...state, + subscriptionState: 'receiving', + eventCount: state.eventCount + 1, + firstEventAt: state.firstEventAt ?? now, + lastEventAt: now, + }); + return next; + }); + } + } + }, + (err: Error) => { + console.error("REQ: Error", err); + setError(err); + setLoading(false); + }, + () => { + if (!stream) { + setLoading(false); + } + } + ); + + return () => { + subscription.unsubscribe(); + }; + }, [id, stableFilters, stableRelays, limit, stream, eventStore]); + + // Derive overall state + const overallState = useMemo(() => { + return deriveOverallState( + relayStates, + eoseReceived, + stream, + queryStartedAt.current + ); + }, [relayStates, eoseReceived, stream]); + + return { + events, + loading, + error, + eoseReceived, + relayStates, + overallState, + }; +} +``` + +**Tests**: `src/hooks/useReqTimelineEnhanced.test.ts` +- Mock pool.subscription +- Test state transitions +- Test relay state tracking +- Test overall state derivation + +--- + +### Phase 2: UI Integration + +#### Task 2.1: Update ReqViewer Status Indicator + +**File**: `src/components/ReqViewer.tsx` + +**Changes**: +1. Import enhanced hook and state machine helpers +2. Replace `useReqTimeline` with `useReqTimelineEnhanced` +3. Update status indicator (lines 916-957) to use `overallState.status` +4. Update connection count to show connected vs total + +```typescript +// Before +const { events, loading, error, eoseReceived } = useReqTimeline( + `req-${JSON.stringify(filter)}-${closeOnEose}`, + resolvedFilter, + finalRelays, + { limit: resolvedFilter.limit || 50, stream } +); + +// After +const { events, loading, error, eoseReceived, relayStates, overallState } = + useReqTimelineEnhanced( + `req-${JSON.stringify(filter)}-${closeOnEose}`, + resolvedFilter, + finalRelays, + { limit: resolvedFilter.limit || 50, stream } + ); + +// Status indicator + + + {getStatusText(overallState)} + + +// Connection count + + {overallState.connectedCount}/{overallState.totalRelays} + +``` + +--- + +#### Task 2.2: Enhance Relay Dropdown with Per-Relay Status + +**File**: `src/components/ReqViewer.tsx` + +**Changes**: Update relay dropdown (lines 998-1050) to show per-relay subscription state + +```typescript + + {/* Connection Status */} +
+
+ Relay Status +
+ {Array.from(relayStates.values()).map((relayState) => { + const globalState = relayStates[relayState.url]; + const connIcon = getConnectionIcon(globalState); + + return ( + + + + {/* Event count */} +
+ {relayState.eventCount > 0 && ( + + +
+ + {relayState.eventCount} +
+
+ + {relayState.eventCount} events received + +
+ )} + + {/* Subscription state badge */} + {relayState.subscriptionState === 'receiving' && ( + RECEIVING + )} + {relayState.subscriptionState === 'eose' && ( + EOSE + )} + {relayState.subscriptionState === 'error' && ( + ERROR + )} + + {/* Connection icon */} + + +
{connIcon.icon}
+
+ +

{connIcon.label}

+
+
+
+
+ ); + })} +
+ + {/* Relay Selection (NIP-65) */} + {/* ... existing code ... */} +
+``` + +--- + +#### Task 2.3: Add Empty/Error States + +**File**: `src/components/ReqViewer.tsx` + +**Changes**: Add specific UI for failed/offline states + +```typescript +{/* All Relays Failed */} +{overallState.status === 'failed' && ( +
+
+ +

All Relays Failed

+

+ Could not connect to any of the {overallState.totalRelays} relays. + Check your network connection or try different relays. +

+
+
+)} + +{/* All Relays Offline (after being live) */} +{overallState.status === 'offline' && overallState.hasReceivedEvents && ( +
+ + ⚠️ All relays disconnected. Showing cached results. + +
+)} + +{/* Partial Connection Warning */} +{overallState.status === 'partial' && ( +
+ + ⚠️ Only {overallState.connectedCount}/{overallState.totalRelays} relays connected + +
+)} +``` + +--- + +### Phase 3: Testing & Polish + +#### Task 3.1: Add Unit Tests + +**Files**: +- `src/lib/req-state-machine.test.ts` (already outlined above) +- `src/hooks/useReqTimelineEnhanced.test.ts` + +**Test Coverage**: +- All state transitions +- Edge cases from analysis document +- Event tracking +- Connection state synchronization + +--- + +#### Task 3.2: Add Integration Tests + +**File**: `src/components/ReqViewer.test.tsx` (NEW) + +**Scenarios**: +1. All relays offline → shows "FAILED" +2. Mixed success/failure → shows "PARTIAL" +3. Streaming with disconnections → shows "OFFLINE" +4. Single relay timeout → appropriate status + +--- + +#### Task 3.3: Manual Testing Checklist + +**File**: `docs/req-viewer-test-scenarios.md` (NEW) + +Create manual test scenarios: +- [ ] Query with 30 relays, all offline +- [ ] Query with 10 relays, 5 succeed, 5 fail +- [ ] Query with 1 relay that times out (>10s) +- [ ] Streaming query, disconnect relays one by one +- [ ] Streaming query, all relays disconnect +- [ ] Non-streaming query, normal completion +- [ ] Query with AUTH-required relay +- [ ] Query with slow relay (8-12s response) +- [ ] Query with mix of fast/slow/failed relays + +--- + +### Phase 4: Future Enhancements + +#### Task 4.1: Relay Performance Metrics + +Track and display: +- Average response time per relay +- Success/failure rate +- Event count distribution +- EOSE latency + +#### Task 4.2: Smart Relay Selection + +Integrate with RelayLiveness: +- Skip relays in backoff state +- Prefer historically fast relays +- Warn about consistently failing relays + +#### Task 4.3: Query Optimization Suggestions + +Analyze query and suggest: +- "Query too broad, consider adding time range" +- "Consider using NIP-65 outbox relays" +- "Relay X frequently fails, consider removing" + +--- + +## Implementation Schedule + +### Week 1: Core Infrastructure +- Day 1-2: Tasks 1.1, 1.2 (types + state machine) +- Day 3-4: Task 1.3 (enhanced hook) +- Day 5: Unit tests (Task 3.1) + +### Week 2: UI Integration +- Day 1-2: Task 2.1 (status indicator) +- Day 3: Task 2.2 (relay dropdown) +- Day 4: Task 2.3 (empty states) +- Day 5: Integration tests (Task 3.2) + +### Week 3: Testing & Polish +- Day 1-2: Manual testing (Task 3.3) +- Day 3-4: Bug fixes and refinements +- Day 5: Documentation and code review + +--- + +## Success Criteria + +### Must Have (Phase 1-2) +- [x] "LIVE" only shows when relays actually connected +- [x] Distinguish between CLOSED, OFFLINE, and FAILED states +- [x] Show accurate connected relay count +- [x] Per-relay status in dropdown +- [x] Handle "all relays disconnected" case correctly + +### Should Have (Phase 3) +- [ ] Unit tests covering all state transitions +- [ ] Integration tests for key scenarios +- [ ] Manual test scenarios documented and passing + +### Nice to Have (Phase 4) +- [ ] Relay performance metrics +- [ ] Smart relay selection based on history +- [ ] Query optimization suggestions + +--- + +## Risks & Mitigation + +### Risk 1: Can't distinguish real EOSE from timeout/error +**Impact**: Medium +**Mitigation**: Track connection state + events received to infer state + +### Risk 2: Event metadata might not have `_relay` property +**Impact**: High +**Mitigation**: Verify `markFromRelay()` operator is working, fallback to all-connected logic + +### Risk 3: State synchronization lag between hooks +**Impact**: Low +**Mitigation**: Use stable references, debounce updates if needed + +### Risk 4: Performance impact of per-relay tracking +**Impact**: Low +**Mitigation**: Use Map for O(1) lookups, memoize derived state + +--- + +## Rollout Plan + +### Phase 1: Soft Launch +1. Merge behind feature flag +2. Test internally with various queries +3. Gather feedback from team + +### Phase 2: Beta +1. Enable for subset of users +2. Monitor for issues +3. Collect user feedback + +### Phase 3: General Availability +1. Enable for all users +2. Document new status indicators +3. Create help articles + +--- + +## Documentation Updates + +### User-Facing +- Update help docs with new status indicators +- Explain what each status means +- Add troubleshooting guide for failed queries + +### Developer-Facing +- Document ReqRelayState and ReqOverallState types +- Document state machine transitions +- Add ADR (Architecture Decision Record) + +--- + +## Related Work + +### Upstream Issues +- Submit PR to applesauce-relay for catchError bug +- Propose per-relay EOSE tracking API enhancement + +### Technical Debt +- Migrate other timeline hooks to enhanced version +- Consolidate timeline state management +- Improve relay health tracking + +--- + +## Monitoring & Metrics + +### Success Metrics +- Reduction in user-reported "LIVE with 0 relays" issues +- Improved query success rate (user perception) +- Reduced confusion about query status + +### Technical Metrics +- State machine transition frequency +- Per-relay success/failure rates +- Average query completion time +- EOSE latency distribution + +--- + +## References + +- Analysis: `docs/req-viewer-state-analysis.md` +- NIP-01: https://github.com/nostr-protocol/nips/blob/master/01.md +- Applesauce-relay: node_modules/applesauce-relay/dist/ +- RelayStateManager: `src/services/relay-state-manager.ts` diff --git a/docs/req-viewer-state-analysis.md b/docs/req-viewer-state-analysis.md new file mode 100644 index 0000000..fc7e5da --- /dev/null +++ b/docs/req-viewer-state-analysis.md @@ -0,0 +1,787 @@ +# ReqViewer State Machine Analysis + +**Date**: 2025-12-22 +**Issue**: Disconnected relays are incorrectly shown as "LIVE" and counted as having sent EOSE + +## Executive Summary + +The ReqViewer state machine has a critical bug where relay disconnections are indistinguishable from EOSE messages, leading to incorrect status indicators. A query using 30 relays where all disconnect will show "LIVE" status with 0/30 relays connected. + +## Architecture Overview + +### Current Flow + +``` +User Query → useReqTimeline → pool.subscription → RelayGroup → Individual Relays + ↓ ↓ + setEoseReceived(true) ←── "EOSE" string ←── catchError → DISCONNECTION + ↓ + Shows "LIVE" indicator +``` + +### Key Components + +1. **ReqViewer** (`src/components/ReqViewer.tsx`): + - UI component that displays query results and status + - Lines 918-957: Status indicator logic based on `loading`, `eoseReceived`, `stream` + - Lines 735-737: Connected relay count based on `connectionState === "connected"` + +2. **useReqTimeline** (`src/hooks/useReqTimeline.ts`): + - Hook that manages REQ subscription + - Line 88: Sets `eoseReceived = true` when response is string "EOSE" + - No awareness of relay disconnection state + +3. **RelayPool** (applesauce-relay): + - `pool.subscription()` delegates to RelayGroup + - Uses retry/reconnect logic but doesn't expose per-relay EOSE state + +4. **RelayGroup** (applesauce-relay/dist/group.js): + - **CRITICAL BUG HERE**: Line with `catchError(() => of("EOSE"))` + - Treats ANY error (including disconnection) as EOSE + - Aggregates EOSE from all relays before emitting overall EOSE + +5. **Relay** (applesauce-relay/dist/relay.js): + - Individual relay connection + - Has 10-second EOSE timeout that emits fake EOSE if none received + - Emits observables: `connected$`, `challenge$`, `authenticated$`, `notice$` + +## Critical Bug: Error Handling in RelayGroup + +### The Problem + +In `node_modules/applesauce-relay/dist/group.js`: + +```javascript +const observable = project(relay).pipe( + // Catch connection errors and return EOSE + catchError(() => of("EOSE")), // ← BUG: Disconnections become EOSE! + map((value) => [relay, value]) +); +``` + +**Why this is problematic**: +- A relay that never connected emits "EOSE" +- A relay that disconnects mid-query emits "EOSE" +- A relay with a WebSocket error emits "EOSE" +- These fake EOSE messages are indistinguishable from real ones + +### EOSE Aggregation Logic + +```javascript +const eose = this.relays$.pipe( + switchMap((relays) => + main.pipe( + filter(([_, value]) => value === "EOSE"), + scan((received, [relay]) => [...received, relay], []), + // Wait until ALL relays have "sent" EOSE + takeWhile((received) => relays.some((r) => !received.includes(r))), + ignoreElements(), + endWith("EOSE") // ← Emits when all relays done (or errored) + ) + ) +); +``` + +**Result**: The overall EOSE is emitted when: +- ✅ All relays sent real EOSE and are streaming +- ✅ All relays sent real EOSE and closed connection +- ❌ All relays disconnected (caught and turned into fake EOSE) +- ❌ Mix of real EOSE and disconnections (can't tell the difference) + +## Edge Cases & Failure Scenarios + +### Scenario 1: All Relays Disconnect Immediately +**Setup**: Query with 10 relays, all are offline or reject connection +**Current Behavior**: +- Each relay: `catchError` → emits "EOSE" +- useReqTimeline: Sets `eoseReceived = true` +- ReqViewer: Shows "LIVE" indicator (green, pulsing) +- Connection count: 0/10 +- User sees: "LIVE" with 0 connected relays + +**Expected Behavior**: Show "ERROR" or "NO RELAYS" status + +### Scenario 2: Slow Relays with Timeout +**Setup**: Query with relay that takes 15 seconds to respond +**Current Behavior**: +- After 10s: EOSE timeout fires → emits fake "EOSE" +- Relay still connected, might send more events later +- User sees: "LIVE" but relay is counted as "done" + +**Expected Behavior**: Continue waiting or show "PARTIAL" status + +### Scenario 3: Mixed Success/Failure +**Setup**: 30 relays, 10 succeed with EOSE, 15 disconnect, 5 timeout +**Current Behavior**: +- All 30 eventually emit "EOSE" (real or fake) +- Overall EOSE emitted +- Shows "LIVE" with 10/30 connected +- User can't tell which relays actually completed vs failed + +**Expected Behavior**: Show per-relay status and overall "PARTIAL" indicator + +### Scenario 4: Mid-Query Disconnection +**Setup**: Relay sends 50 events, then disconnects before EOSE +**Current Behavior**: +- Disconnection → `catchError` → fake "EOSE" +- Events are shown, looks like query completed successfully +- No indication that query was interrupted + +**Expected Behavior**: Show warning that relay disconnected mid-query + +### Scenario 5: Streaming Mode with Gradual Disconnections +**Setup**: Query in streaming mode, relays disconnect one by one +**Current Behavior**: +- Each disconnection → fake "EOSE" +- Eventually all relays have "EOSE" +- Shows "LIVE" with 0/30 connected (THE REPORTED BUG!) + +**Expected Behavior**: Show "OFFLINE" or "NO ACTIVE RELAYS" when all disconnect + +### Scenario 6: Single Relay Query +**Setup**: Query with explicit relay that doesn't respond +**Current Behavior**: +- After 10s timeout → fake "EOSE" +- Shows "CLOSED" (not streaming) +- No indication relay never responded + +**Expected Behavior**: Show "TIMEOUT" or "NO RESPONSE" status + +### Scenario 7: AUTH Required But Not Provided +**Setup**: Relay requires authentication, no account active +**Current Behavior**: +- Relay returns "auth-required" CLOSED message +- Caught and turned into "EOSE" +- Looks like query completed with no results + +**Expected Behavior**: Show "AUTH REQUIRED" status + +## State Machine Requirements + +### Required States + +**Query-Level States**: +- `DISCOVERING`: Selecting relays (NIP-65 outbox discovery) +- `CONNECTING`: Waiting for first relay to connect +- `LOADING`: At least one relay connected, waiting for initial EOSE +- `LIVE`: At least one relay streaming after EOSE +- `PARTIAL`: Some relays completed, some failed/disconnected +- `CLOSED`: All relays sent EOSE and closed (non-streaming) +- `FAILED`: All relays failed to connect or errored +- `TIMEOUT`: No relays responded within timeout +- `AUTH_REQUIRED`: Some/all relays require authentication + +**Per-Relay States** (tracked separately): +- `PENDING`: Relay in list but not yet connected +- `CONNECTING`: Connection attempt in progress +- `CONNECTED`: WebSocket open, REQ sent +- `RECEIVING`: Events being received +- `EOSE_RECEIVED`: EOSE message received (still connected) +- `CLOSED`: Clean closure after EOSE +- `DISCONNECTED`: Unexpected disconnection +- `ERROR`: Connection error or protocol error +- `TIMEOUT`: No response within timeout +- `AUTH_REQUIRED`: Relay requires authentication + +### State Transition Rules + +**Query Level**: +``` +DISCOVERING → CONNECTING (when relays selected) +CONNECTING → LOADING (when first relay connects) +CONNECTING → FAILED (when all relay connections fail, timeout) + +LOADING → LIVE (when EOSE received, stream=true, >0 relays connected) +LOADING → PARTIAL (when some EOSE, some failed, stream=true) +LOADING → CLOSED (when all EOSE received, stream=false) +LOADING → FAILED (when all relays fail before EOSE) + +LIVE → PARTIAL (when some relays disconnect) +LIVE → FAILED (when all relays disconnect) + +PARTIAL → LIVE (when previously failed relays reconnect) +PARTIAL → FAILED (when remaining relays disconnect) +``` + +**Per-Relay** (tracked in RelayStateManager): +``` +PENDING → CONNECTING (when connection initiated) +CONNECTING → CONNECTED (when WebSocket open, REQ sent) +CONNECTING → ERROR (when connection fails) +CONNECTING → TIMEOUT (when connection takes too long) + +CONNECTED → RECEIVING (when first event received) +CONNECTED → EOSE_RECEIVED (when EOSE received, no prior events) +CONNECTED → ERROR (when connection lost) + +RECEIVING → EOSE_RECEIVED (when EOSE received) +RECEIVING → DISCONNECTED (when connection lost before EOSE) +RECEIVING → ERROR (when protocol error) + +EOSE_RECEIVED → CLOSED (when relay closes connection after EOSE) +EOSE_RECEIVED → DISCONNECTED (when relay keeps connection open in streaming) +``` + +## Data Requirements + +### Information We Need But Don't Have + +1. **Per-Relay EOSE Status**: + - Which relays sent real EOSE? + - Which relays disconnected without EOSE? + - Which relays timed out? + - Which relays are still streaming? + +2. **Per-Relay Event Counts**: + - How many events did each relay send? + - Useful for showing progress and diagnosing issues + +3. **Error Details**: + - Why did relay fail? (connection refused, timeout, protocol error, auth required) + - Currently lost in `catchError(() => of("EOSE"))` + +4. **Timing Information**: + - When did relay connect? + - When did first event arrive? + - When did EOSE arrive? + - How long did query take per relay? + +5. **Relay Health Context**: + - Is relay in RelayLiveness backoff state? + - Has relay been failing consistently? + - Should we even attempt connection? + +### Information We Have But Don't Use + +From **RelayStateManager** (`src/services/relay-state-manager.ts`): +- ✅ `connectionState`: "connected" | "connecting" | "disconnected" | "error" +- ✅ `lastConnected`, `lastDisconnected`: Timestamps +- ✅ `errors[]`: Array of error messages with types +- ✅ `stats.connectionsCount`: How many times relay connected + +From **RelayLiveness** (`src/services/relay-liveness.ts`): +- ✅ Failure counts per relay +- ✅ Backoff states +- ✅ Last success/failure times +- ✅ Should prevent connection attempts to dead relays + +**Problem**: useReqTimeline doesn't integrate with either of these! + +## Nostr Protocol Semantics + +### REQ Lifecycle (NIP-01) + +1. Client sends: `["REQ", , , , ...]` +2. Relay responds with zero or more: `["EVENT", , ]` +3. Relay sends: `["EOSE", ]` when initial query complete +4. Client can keep subscription open for streaming +5. Client closes: `["CLOSE", ]` +6. Relay can close: `["CLOSED", , ]` + +### EOSE Semantics + +**What EOSE means**: +- ✅ "I have sent all stored events matching your filter" +- ✅ "Initial query phase is complete" +- ✅ Connection is still open (unless relay closes immediately after) + +**What EOSE does NOT mean**: +- ❌ "No more events will be sent" (streaming continues) +- ❌ "Connection is closing" +- ❌ "Query was successful" (could have returned 0 events) + +### CLOSED Semantics + +**Why relays send CLOSED**: +- `auth-required`: AUTH event required before query +- `rate-limited`: Too many requests +- `error`: Generic error (parsing, internal, etc.) +- `invalid`: Filter validation failed + +**Client should**: +- Distinguish CLOSED from EOSE +- Handle auth-required by prompting user +- Handle rate-limiting with backoff +- Show errors to user + +## Applesauce Behavior Analysis + +### Retry/Reconnect Logic + +**relay.subscription()** options: +- `retries` (deprecated): Number of retry attempts +- `reconnect` (default: true, 10 retries): Retry on connection failures +- `resubscribe` (default: false): Resubscribe if relay sends CLOSED + +**Current usage in useReqTimeline.ts**: +```typescript +pool.subscription(relays, filtersWithLimit, { + retries: 5, + reconnect: 5, + resubscribe: true, + eventStore, +}); +``` + +**Behavior**: +- Retries connection failures up to 5 times +- Resubscribes if relay sends CLOSED (like auth-required) +- Uses exponential backoff (see `Relay.createReconnectTimer`) + +**Issue**: All this retry logic happens inside applesauce, invisible to useReqTimeline. We can't show "RETRYING" status or retry count to user. + +### Group Subscription Behavior + +**relay.subscription()** in RelayGroup: +```javascript +subscription(filters, opts) { + return this.internalSubscription( + (relay) => relay.subscription(filters, opts), + opts?.eventStore == null ? identity : filterDuplicateEvents(opts?.eventStore) + ); +} +``` + +**Key behaviors**: +1. Creates observable for each relay +2. Merges all observables +3. Deduplicates events via EventStore +4. Catches errors and converts to "EOSE" (THE BUG) +5. Emits overall "EOSE" when all relays done + +**Missing**: +- No per-relay state tracking +- No way to query "which relays have sent EOSE?" +- No way to query "which relays are still connected?" +- Error information is lost + +## Technical Constraints + +### What We Can't Change + +1. **Applesauce-relay library behavior**: + - We can't modify the `catchError(() => of("EOSE"))` in RelayGroup + - This is in node_modules, upstream library + - Would need to fork or submit PR + +2. **Observable-based API**: + - pool.subscription returns `Observable` + - Response is either `NostrEvent` or string `"EOSE"` + - Can't change this interface without forking + +3. **Relay connection pooling**: + - RelayPool manages all relay connections globally + - Multiple components can share same relay connection + - Can't have per-query relay isolation + +### What We Can Work With + +1. **RelayStateManager**: + - Already tracks per-relay connection state + - Updates in real-time via observables + - Available via `useRelayState()` hook + - CAN BE ENHANCED to track per-query state + +2. **EventStore**: + - Already receives all events + - Could be instrumented to track per-relay events + - Has access to relay URL via event metadata + +3. **Custom observables**: + - We can tap into the subscription observable + - Track events and EOSE per relay ourselves + - Build parallel state tracking + +4. **Relay URL in events**: + - Events marked with relay URL via `markFromRelay()` operator + - Can track which relay sent which events + +## Proposed Solutions + +### Solution 1: Per-Relay Subscription Tracking (Recommended) + +**Approach**: Track individual relay subscriptions in parallel with the group subscription. + +**Implementation**: +```typescript +interface RelaySubscriptionState { + url: string; + status: 'pending' | 'connecting' | 'receiving' | 'eose' | 'closed' | 'error'; + eventCount: number; + firstEventAt?: number; + eoseAt?: number; + error?: Error; +} + +function useReqTimelineEnhanced(id, filters, relays, options) { + const [relayStates, setRelayStates] = useState>(); + + // Subscribe to individual relays + useEffect(() => { + const subs = relays.map(url => { + const relay = pool.relay(url); + return relay.req(filters).subscribe({ + next: (response) => { + if (response === 'EOSE') { + setRelayStates(prev => prev.set(url, { ...prev.get(url), status: 'eose', eoseAt: Date.now() })); + } else { + setRelayStates(prev => prev.set(url, { + ...prev.get(url), + status: 'receiving', + eventCount: (prev.get(url)?.eventCount ?? 0) + 1 + })); + } + }, + error: (err) => { + setRelayStates(prev => prev.set(url, { ...prev.get(url), status: 'error', error: err })); + } + }); + }); + + return () => subs.forEach(sub => sub.unsubscribe()); + }, [relays, filters]); + + // Derive overall state from individual relay states + const overallState = useMemo(() => { + const states = Array.from(relayStates.values()); + const connected = states.filter(s => ['receiving', 'eose'].includes(s.status)); + const eose = states.filter(s => s.status === 'eose'); + const errors = states.filter(s => s.status === 'error'); + + if (connected.length === 0 && errors.length === states.length) return 'FAILED'; + if (eose.length === states.length) return 'CLOSED'; + if (eose.length > 0 && connected.length > 0) return 'LIVE'; + if (connected.length > 0) return 'LOADING'; + return 'CONNECTING'; + }, [relayStates]); + + return { events, relayStates, overallState }; +} +``` + +**Pros**: +- ✅ Accurate per-relay tracking +- ✅ Can distinguish real EOSE from errors +- ✅ Works around applesauce bug without forking +- ✅ Provides rich debugging information + +**Cons**: +- ❌ Duplicate subscriptions (one per relay + one group) +- ❌ More memory usage +- ❌ Potential for state synchronization issues + +### Solution 2: Enhanced Group Observable Wrapper + +**Approach**: Wrap the group subscription and parse relay URL from event metadata. + +**Implementation**: +```typescript +function useReqTimelineWithTracking(id, filters, relays, options) { + const [relayEose, setRelayEose] = useState>(new Set()); + const { relays: relayStates } = useRelayState(); + + useEffect(() => { + const observable = pool.subscription(relays, filters, options).pipe( + tap(response => { + if (typeof response === 'string' && response === 'EOSE') { + // This is the aggregated EOSE, check which relays are still connected + const stillConnected = relays.filter(url => + relayStates[url]?.connectionState === 'connected' + ); + // If no relays connected, treat as failure not EOSE + if (stillConnected.length === 0) { + setError(new Error('All relays disconnected')); + return; + } + } else if (isNostrEvent(response)) { + // Track which relay sent this event + const relayUrl = (response as any)._relay; // Added by markFromRelay() + if (relayUrl && !relayEose.has(relayUrl)) { + // Mark relay as active/receiving + } + } + }) + ); + + return observable.subscribe(/* ... */); + }, [relays, filters]); +} +``` + +**Pros**: +- ✅ Single subscription (no duplication) +- ✅ Uses existing infrastructure +- ✅ Leverages RelayStateManager + +**Cons**: +- ❌ Can't distinguish real EOSE from fake (happens in applesauce) +- ❌ Relies on relay URL being added to events +- ❌ Still shows "EOSE" when all relays disconnect + +### Solution 3: Fork Applesauce-Relay (Not Recommended) + +**Approach**: Fork applesauce-relay and fix the catchError bug. + +**Changes needed**: +```typescript +// In group.js, change: +catchError(() => of("EOSE")) + +// To: +catchError((err) => of({ type: 'ERROR', relay, error: err })) + +// And update EOSE aggregation to only count real EOSE +``` + +**Pros**: +- ✅ Fixes root cause +- ✅ Proper error handling +- ✅ Could be upstreamed + +**Cons**: +- ❌ Maintenance burden of fork +- ❌ Need to track upstream changes +- ❌ Breaks applesauce API contract + +### Solution 4: Hybrid Approach (RECOMMENDED) + +**Combine** Solution 1 + Solution 2: +1. Use RelayStateManager to track connection state +2. Subscribe to group observable for events (deduplication) +3. Build per-relay state machine based on: + - Connection state from RelayStateManager + - Events received (tracked by relay URL in metadata) + - Overall EOSE from group subscription +4. Derive accurate overall state + +**Implementation** in new file `src/hooks/useReqTimelineEnhanced.ts`: +```typescript +interface ReqRelayState { + url: string; + connectionState: 'pending' | 'connecting' | 'connected' | 'disconnected' | 'error'; + subscriptionState: 'waiting' | 'receiving' | 'eose' | 'timeout' | 'error'; + eventCount: number; + firstEventAt?: number; + lastEventAt?: number; + errorMessage?: string; +} + +interface ReqOverallState { + status: 'discovering' | 'connecting' | 'loading' | 'live' | 'partial' | 'closed' | 'failed'; + connectedCount: number; + eoseCount: number; + errorCount: number; + totalRelays: number; +} + +export function useReqTimelineEnhanced( + id: string, + filters: Filter | Filter[], + relays: string[], + options: UseReqTimelineOptions = {} +) { + // State + const [relayStates, setRelayStates] = useState>(new Map()); + const [overallEose, setOverallEose] = useState(false); + + // Get relay connection states + const { relays: globalRelayStates } = useRelayState(); + + // Subscribe to events + const observable = pool.subscription(relays, filters, options); + + useEffect(() => { + // Initialize relay states + setRelayStates(new Map(relays.map(url => [ + url, + { + url, + connectionState: 'pending', + subscriptionState: 'waiting', + eventCount: 0, + } + ]))); + + const sub = observable.subscribe({ + next: (response) => { + if (response === 'EOSE') { + setOverallEose(true); + } else { + const event = response as NostrEvent; + const relayUrl = (event as any)._relay; + + setRelayStates(prev => { + const state = prev.get(relayUrl); + if (!state) return prev; + + const next = new Map(prev); + next.set(relayUrl, { + ...state, + subscriptionState: 'receiving', + eventCount: state.eventCount + 1, + firstEventAt: state.firstEventAt ?? Date.now(), + lastEventAt: Date.now(), + }); + return next; + }); + } + }, + error: (err) => { + // Overall subscription error + }, + }); + + return () => sub.unsubscribe(); + }, [relays, filters]); + + // Sync connection state from RelayStateManager + useEffect(() => { + setRelayStates(prev => { + const next = new Map(prev); + for (const [url, state] of prev) { + const globalState = globalRelayStates[url]; + if (globalState) { + next.set(url, { + ...state, + connectionState: globalState.connectionState as any, + }); + } + } + return next; + }); + }, [globalRelayStates]); + + // Derive overall state + const overallState: ReqOverallState = useMemo(() => { + const states = Array.from(relayStates.values()); + const connected = states.filter(s => s.connectionState === 'connected'); + const receivedData = states.filter(s => s.eventCount > 0); + const errors = states.filter(s => s.connectionState === 'error'); + + const status = (() => { + if (relays.length === 0) return 'discovering'; + if (connected.length === 0 && errors.length === states.length) return 'failed'; + if (connected.length === 0 && receivedData.length === 0) return 'connecting'; + if (!overallEose) return 'loading'; + if (connected.length === 0 && overallEose) return 'closed'; + if (connected.length > 0 && overallEose && options.stream) return 'live'; + if (connected.length < relays.length && overallEose) return 'partial'; + return 'closed'; + })(); + + return { + status, + connectedCount: connected.length, + eoseCount: states.filter(s => s.subscriptionState === 'eose').length, + errorCount: errors.length, + totalRelays: relays.length, + }; + }, [relayStates, overallEose, relays.length, options.stream]); + + return { + events, + relayStates, + overallState, + loading: !overallEose, + eoseReceived: overallEose, + }; +} +``` + +**Pros**: +- ✅ No duplicate subscriptions +- ✅ Accurate connection tracking +- ✅ Rich per-relay information +- ✅ Works with existing infrastructure +- ✅ Can show "LIVE" only when relays actually connected + +**Cons**: +- ❌ Can't distinguish real EOSE from timeout/error (upstream issue) +- ❌ More complex state management +- ❌ Depends on event metadata having relay URL + +## Recommendation + +**Implement Solution 4 (Hybrid Approach)** as the most pragmatic path forward: + +1. Create `useReqTimelineEnhanced` hook with per-relay state tracking +2. Update ReqViewer to use enhanced hook +3. Improve status indicator logic to use overall state +4. Add per-relay status display in relay dropdown +5. Show accurate indicators for edge cases + +**Future work**: +- Submit PR to applesauce-relay to fix catchError bug +- Add per-relay EOSE tracking to applesauce (upstream enhancement) +- Implement relay health scoring to avoid dead relays + +## Implementation Priority + +### Phase 1: Critical Fixes (Immediate) +1. Implement `useReqTimelineEnhanced` hook +2. Update ReqViewer status indicator logic +3. Add per-relay state display +4. Handle "all relays disconnected" case + +### Phase 2: Enhanced UX (Next) +5. Add per-relay event counts +6. Show relay timing information +7. Add retry/reconnection indicators +8. Integrate with RelayLiveness for smarter relay selection + +### Phase 3: Advanced Features (Future) +9. Partial EOSE indicator (some relays done, some still loading) +10. Relay performance metrics +11. Automatic relay ranking and selection +12. Query optimization suggestions + +## Testing Strategy + +### Unit Tests +- State machine transitions +- Edge case handling +- EOSE aggregation logic + +### Integration Tests +- Real relay connections +- Timeout scenarios +- Mixed success/failure scenarios + +### Manual Testing Scenarios +1. Query with all offline relays +2. Query with mixed offline/online +3. Query with slow relay (>10s response) +4. Mid-query disconnections +5. Streaming mode with gradual disconnections +6. Single relay queries +7. AUTH-required relays +8. Rate-limited relays + +## Metrics to Track + +### User-Visible +- Time to first event +- Time to EOSE per relay +- Events per relay +- Success/failure ratio + +### Debug/Observability +- Relay response times +- Failure reasons +- Retry attempts +- Reconnection events + +## Related Issues + +- RelayLiveness not being checked before connection attempts +- No visual feedback during relay discovery phase +- No indication of AUTH requirements +- No rate limiting awareness + +## References + +- NIP-01: https://github.com/nostr-protocol/nips/blob/master/01.md +- Applesauce-relay docs: (internal node_modules) +- RelayStateManager: `src/services/relay-state-manager.ts` +- useReqTimeline: `src/hooks/useReqTimeline.ts` +- ReqViewer: `src/components/ReqViewer.tsx`