From 501f2746fb27b04bd644a2fba50480bf90e059c9 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Dec 2025 13:23:21 +0000 Subject: [PATCH] docs: add outbox relay selection improvement plans Research and analysis of outbox implementations from nosotros, noStrudel, and jumble to create a comprehensive improvement plan for Grimoire. Priority improvements (docs/outbox-improvements-plan.md): 1. Relay Performance Scoring - track response time, connection time, stability 2. Adaptive Timeouts - use historical data for per-relay timeouts 3. Per-Relay Filter Optimization - send only relevant authors to each relay 4. Custom Scoring Function - combine coverage + performance in selection Future work saved in docs/outbox-future-work.md: 5. Progressive Relay Selection 6. NIP-66 Relay Discovery --- docs/outbox-future-work.md | 263 +++++++++++++++++ docs/outbox-improvements-plan.md | 481 +++++++++++++++++++++++++++++++ 2 files changed, 744 insertions(+) create mode 100644 docs/outbox-future-work.md create mode 100644 docs/outbox-improvements-plan.md diff --git a/docs/outbox-future-work.md b/docs/outbox-future-work.md new file mode 100644 index 0000000..33a4763 --- /dev/null +++ b/docs/outbox-future-work.md @@ -0,0 +1,263 @@ +# Outbox Relay Selection: Future Work + +These improvements are lower priority and saved for future implementation after the core scoring and optimization work is complete. + +--- + +## 5. Progressive Relay Selection + +### Problem +Currently, relay selection waits for all relay list fetches before returning results. Users wait for the full timeout even when cached data is available. + +### Proposed Solution + +Return results in phases: +1. **Phase 1 (0-10ms)**: Return relays from memory cache immediately +2. **Phase 2 (10-100ms)**: Add relays from Dexie cache +3. **Phase 3 (100-1000ms)**: Add relays from network fetches + +```typescript +export async function selectRelaysIncremental( + eventStore: IEventStore, + filter: NostrFilter, + options?: RelaySelectionOptions, + onUpdate?: (partial: RelaySelectionResult) => void +): Promise { + const authors = filter.authors || []; + + // Phase 1: Return cached relays immediately + const cachedPointers = await Promise.all( + authors.map(async (pubkey) => { + const cached = await relayListCache.getOutboxRelays(pubkey); + return cached ? { pubkey, relays: cached } : null; + }) + ); + + const initialRelays = cachedPointers + .filter((p): p is NonNullable => p !== null) + .flatMap(p => p.relays); + + if (initialRelays.length > 0 && onUpdate) { + onUpdate({ + relays: initialRelays, + reasoning: [], + isOptimized: true, + }); + } + + // Phase 2: Fetch missing relay lists + const uncachedAuthors = authors.filter((_, i) => !cachedPointers[i]); + + if (uncachedAuthors.length > 0) { + const subscription = eventStore + .query({ kinds: [10002], authors: uncachedAuthors }) + .subscribe((event) => { + relayListCache.set(event); + if (onUpdate) { + selectRelaysForFilter(eventStore, filter, options).then(onUpdate); + } + }); + + await new Promise(resolve => + setTimeout(resolve, options?.timeout || 1000) + ); + subscription.unsubscribe(); + } + + // Phase 3: Final selection + return selectRelaysForFilter(eventStore, filter, options); +} +``` + +### Hook Integration + +```typescript +export function useOutboxRelaysIncremental( + filter: NostrFilter, + options?: RelaySelectionOptions +) { + const [result, setResult] = useState({ + relays: options?.fallbackRelays || [], + reasoning: [], + isOptimized: false, + }); + + useEffect(() => { + selectRelaysIncremental(eventStore, filter, options, setResult); + }, [filter, options]); + + return result; +} +``` + +### Expected Impact +- Show initial results within 10-50ms (cached relays) +- Progressive enhancement as more relay lists arrive +- Better perceived performance + +### Effort: Medium +### Priority: Lower (current streaming approach already shows results as they arrive) + +--- + +## 6. NIP-66 Relay Discovery + +### Problem +Grimoire uses a fixed set of fallback/aggregator relays. New relays are never discovered automatically. + +### NIP-66 Overview +NIP-66 defines relay discovery via monitor relays that publish relay metadata: +- Kind 30166: Relay metadata (NIPs supported, network, country) +- Monitor relays: `wss://relay.nostr.watch`, `wss://monitorlizard.nostr1.com` + +### Proposed Implementation + +```typescript +// src/services/relay-discovery.ts + +class RelayDiscoveryService { + private discoveryRelays = [ + "wss://relay.nostr.watch/", + "wss://monitorlizard.nostr1.com/", + ]; + + private relayCache = new Map(); + private cacheExpiry = 60 * 60 * 1000; // 1 hour + + /** + * Discover relays by supported NIPs + */ + async getRelaysByNIPs(nips: number[]): Promise { + await this.ensureCacheLoaded(); + + return Array.from(this.relayCache.entries()) + .filter(([_, meta]) => + nips.every(nip => meta.supportedNips.includes(nip)) + ) + .map(([url]) => url); + } + + /** + * Discover relays by country + */ + async getRelaysByCountry(countryCode: string): Promise { + await this.ensureCacheLoaded(); + + return Array.from(this.relayCache.entries()) + .filter(([_, meta]) => meta.countryCode === countryCode) + .map(([url]) => url); + } + + /** + * Get online relays (recently seen active) + */ + async getOnlineRelays(): Promise { + await this.ensureCacheLoaded(); + + const now = Date.now(); + const recentThreshold = 5 * 60 * 1000; // 5 minutes + + return Array.from(this.relayCache.entries()) + .filter(([_, meta]) => now - meta.lastSeen < recentThreshold) + .map(([url]) => url); + } + + /** + * Fetch relay metadata from monitor relays + */ + private async fetchRelayMetadata(): Promise { + const filter = { kinds: [30166], limit: 500 }; + + for (const monitorRelay of this.discoveryRelays) { + try { + const events = await pool.querySync([monitorRelay], filter); + + for (const event of events) { + const url = getTagValue(event, "d"); + if (!url) continue; + + const metadata: RelayMetadata = { + url: normalizeRelayURL(url), + supportedNips: parseNipTags(event), + network: getTagValue(event, "n") || "clearnet", + countryCode: getTagValue(event, "l"), + lastSeen: event.created_at * 1000, + }; + + this.relayCache.set(metadata.url, metadata); + } + } catch (error) { + console.warn(`[RelayDiscovery] Failed to fetch from ${monitorRelay}:`, error); + } + } + } +} + +interface RelayMetadata { + url: string; + supportedNips: number[]; + network: "clearnet" | "tor" | "i2p"; + countryCode?: string; + lastSeen: number; +} +``` + +### Use Cases + +1. **Dynamic fallbacks**: Instead of hardcoded aggregators, discover relays that support NIP-50 (search) +2. **Geographic optimization**: Prefer relays in user's region for lower latency +3. **Feature detection**: Find relays supporting specific NIPs for advanced queries + +### Integration with Relay Selection + +```typescript +// In relay-selection.ts +async function selectRelaysForFilter(...) { + // If all users have no relay lists, try NIP-66 discovery + if (fallbackCount === allPointers.length) { + const discoveredRelays = await relayDiscovery.getOnlineRelays(); + if (discoveredRelays.length > 0) { + return { + relays: discoveredRelays.slice(0, 10), + reasoning: discoveredRelays.slice(0, 10).map(relay => ({ + relay, + writers: [], + readers: [], + isFallback: true, + isDiscovered: true, // New field + })), + isOptimized: false, + }; + } + } +} +``` + +### Expected Impact +- Better fallback relay selection +- Automatic discovery of new relays +- Geographic optimization potential + +### Effort: High +### Priority: Low (current fallback aggregators work well) + +--- + +## When to Implement + +### Progressive Relay Selection (#5) +Implement when: +- Users report slow initial load times +- Cache hit rates are low +- There's demand for faster perceived performance + +### NIP-66 Relay Discovery (#6) +Implement when: +- Fallback aggregators become unreliable +- Users want geographic relay preferences +- There's a need for automatic relay discovery + +--- + +*Created: 2024-12-24* +*Status: Backlog* diff --git a/docs/outbox-improvements-plan.md b/docs/outbox-improvements-plan.md new file mode 100644 index 0000000..db017f2 --- /dev/null +++ b/docs/outbox-improvements-plan.md @@ -0,0 +1,481 @@ +# Outbox Relay Selection Improvements Plan + +This document outlines the implementation plan for improving Grimoire's NIP-65 outbox relay selection to maximize reliability and performance. + +**Priority**: Reliability (getting events from the right relays) > Performance (speed/efficiency) + +--- + +## Overview + +We're implementing 4 improvements that work together: + +1. **Relay Performance Scoring** - Track response time, connection time, stability +2. **Adaptive Timeouts** - Use historical performance to set per-relay timeouts +3. **Per-Relay Filter Optimization** - Send only relevant authors to each relay +4. **Custom Scoring Function** - Combine scoring + coverage in relay selection + +--- + +## 1. Relay Performance Scoring + +### Goal +Track relay performance metrics over time to prefer fast, reliable relays. + +### Metrics to Track (inspired by noStrudel) + +```typescript +interface RelayPerformanceMetrics { + url: string; + + // Response time (how fast relay answers queries) + responseTimeMs: number; // Exponential moving average + responseTimeCount: number; // Number of samples + + // Connection time (how fast WebSocket connects) + connectTimeMs: number; // Exponential moving average + connectTimeCount: number; // Number of samples + + // Stability (how long before relay disconnects) + avgSessionDurationMs: number; // Average time connected before disconnect + sessionCount: number; // Number of sessions + + // Success rate + successfulQueries: number; + failedQueries: number; + + // Timestamps + lastUpdated: number; + lastSuccess: number; + lastFailure: number; +} +``` + +### Scoring Algorithm + +```typescript +function calculateRelayScore(metrics: RelayPerformanceMetrics): number { + // Response time score: 0-10 points + // 1 point per 100ms under 1000ms, max 10 + const responseScore = Math.max(0, Math.min(10, + (1000 - metrics.responseTimeMs) / 100 + )); + + // Connection time score: 0-10 points + // Same formula as response time + const connectScore = Math.max(0, Math.min(10, + (1000 - metrics.connectTimeMs) / 100 + )); + + // Stability score: 0-10 points + // Based on average session duration + // 1 point per 30s of stability, max 10 (5 min) + const stabilityScore = Math.max(0, Math.min(10, + metrics.avgSessionDurationMs / 30000 + )); + + // Success rate score: 0-10 points + const totalQueries = metrics.successfulQueries + metrics.failedQueries; + const successRate = totalQueries > 0 + ? metrics.successfulQueries / totalQueries + : 0.5; // Default to 50% for unknown relays + const successScore = successRate * 10; + + // Combined score (weighted) + // Response time is most important for UX + return ( + responseScore * 0.4 + + connectScore * 0.2 + + stabilityScore * 0.2 + + successScore * 0.2 + ); +} +``` + +### Files to Create/Modify + +**New file: `src/services/relay-scoreboard.ts`** +```typescript +import db from "./db"; +import pool from "./relay-pool"; + +class RelayScoreboard { + private metrics = new Map(); + private saveInterval: NodeJS.Timeout | null = null; + + constructor() { + this.load(); + this.connectToPool(); + this.startAutoSave(); + } + + // Record a successful query response + recordResponse(url: string, responseTimeMs: number): void; + + // Record connection establishment + recordConnect(url: string, connectTimeMs: number): void; + + // Record session end (for stability tracking) + recordSessionEnd(url: string, durationMs: number): void; + + // Record query result + recordQueryResult(url: string, success: boolean): void; + + // Get score for a relay (0-10) + getScore(url: string): number; + + // Get all metrics for debugging + getMetrics(url: string): RelayPerformanceMetrics | undefined; + + // Persist to Dexie + async save(): Promise; + + // Load from Dexie + async load(): Promise; + + // Hook into relay pool events + private connectToPool(): void; +} + +export const relayScoreboard = new RelayScoreboard(); +export default relayScoreboard; +``` + +**Modify: `src/services/db.ts`** +- Add new table: `relayPerformance` +- Add DB version migration + +```typescript +export interface RelayPerformanceEntry { + url: string; + responseTimeMs: number; + responseTimeCount: number; + connectTimeMs: number; + connectTimeCount: number; + avgSessionDurationMs: number; + sessionCount: number; + successfulQueries: number; + failedQueries: number; + lastUpdated: number; + lastSuccess: number; + lastFailure: number; +} + +// Add to DB schema version 15: +this.version(15).stores({ + // ... existing tables ... + relayPerformance: "&url", +}); +``` + +### Integration Points + +1. **Pool connection events**: Track connect time when WebSocket opens +2. **Subscription EOSE**: Track response time from REQ to EOSE +3. **Relay disconnect**: Track session duration +4. **Query errors**: Track success/failure rate + +--- + +## 2. Adaptive Timeouts + +### Goal +Use historical performance data to set smart per-relay timeouts for relay list fetches. + +### Algorithm + +```typescript +function getAdaptiveTimeout(url: string): number { + const metrics = relayScoreboard.getMetrics(url); + + if (!metrics || metrics.responseTimeCount < 3) { + // Not enough data - use default + return 1000; + } + + // Base timeout: 2x average response time + let timeout = metrics.responseTimeMs * 2; + + // Adjust based on success rate + const totalQueries = metrics.successfulQueries + metrics.failedQueries; + if (totalQueries > 5) { + const successRate = metrics.successfulQueries / totalQueries; + if (successRate < 0.5) { + // Unreliable relay - shorter timeout + timeout = Math.min(timeout, 500); + } + } + + // Clamp to reasonable bounds + return Math.max(300, Math.min(2000, timeout)); +} +``` + +### Files to Modify + +**Modify: `src/services/relay-selection.ts`** + +```typescript +import relayScoreboard from "./relay-scoreboard"; + +// Replace fixed timeout with adaptive +async function fetchRelayList( + pubkey: string, + defaultTimeoutMs: number, +): Promise { + // Get cached relay list to find which relays to query + const cachedRelays = await relayListCache.getOutboxRelays(pubkey); + + // Use adaptive timeout based on known relays + // If we know which relay we'll query, use its specific timeout + // Otherwise use the default + const timeout = cachedRelays && cachedRelays.length > 0 + ? Math.max(...cachedRelays.map(r => getAdaptiveTimeout(r))) + : defaultTimeoutMs; + + // ... rest of fetch logic with adaptive timeout +} +``` + +--- + +## 3. Per-Relay Filter Optimization + +### Goal +Send only the relevant subset of authors to each relay, reducing bandwidth and improving relay processing. + +### Current Behavior + +```typescript +// Current: Same filter to all relays +const relays = selectOptimalRelays(pointers, options); +// All relays get: { authors: [A, B, C, D, E], kinds: [1] } +``` + +### Proposed Behavior + +```typescript +// New: Per-relay filters +interface RelayFilterMap { + relay: string; + filter: NostrFilter; + authors: string[]; // Authors this relay covers +} + +function createPerRelayFilters( + selectedPointers: ProfilePointer[], + baseFilter: NostrFilter +): RelayFilterMap[] { + const relayToAuthors = new Map>(); + + // Group authors by relay + for (const pointer of selectedPointers) { + for (const relay of pointer.relays || []) { + if (!relayToAuthors.has(relay)) { + relayToAuthors.set(relay, new Set()); + } + relayToAuthors.get(relay)!.add(pointer.pubkey); + } + } + + // Create per-relay filters + return Array.from(relayToAuthors.entries()).map(([relay, authors]) => ({ + relay, + authors: Array.from(authors), + filter: { + ...baseFilter, + authors: Array.from(authors), + }, + })); +} +``` + +### Return Type Change + +```typescript +// Current +interface RelaySelectionResult { + relays: string[]; + reasoning: RelaySelectionReasoning[]; + isOptimized: boolean; +} + +// New: Add per-relay filter maps +interface RelaySelectionResult { + relays: string[]; + reasoning: RelaySelectionReasoning[]; + isOptimized: boolean; + perRelayFilters?: RelayFilterMap[]; // Optional for backward compat +} +``` + +### Consumer Changes + +Consumers that use per-relay filters can subscribe more efficiently: + +```typescript +// In useReqTimeline or similar: +if (selectionResult.perRelayFilters) { + // Subscribe to each relay with its specific filter + for (const { relay, filter } of selectionResult.perRelayFilters) { + pool.subscribe([relay], filter, handlers); + } +} else { + // Fallback: same filter to all relays + pool.subscribe(selectionResult.relays, filter, handlers); +} +``` + +--- + +## 4. Custom Scoring Function + +### Goal +Combine coverage optimization (applesauce's greedy algorithm) with performance scoring. + +### Implementation + +**Modify: `src/services/relay-selection.ts`** + +```typescript +import relayScoreboard from "./relay-scoreboard"; +import liveness from "./relay-liveness"; + +// Custom scoring function for selectOptimalRelays +function scoreRelay( + relay: string, + coverage: number, // How many uncovered users this relay covers + popularity: number, // How many total users use this relay +): number { + // Base score: coverage efficiency + const coverageScore = coverage / Math.max(1, popularity); + + // Performance score from scoreboard (0-10, normalized to 0-1) + const perfScore = relayScoreboard.getScore(relay) / 10; + + // Health multiplier from liveness + const isHealthy = liveness.isHealthy(relay); + const healthMultiplier = isHealthy ? 1.0 : 0.3; // Penalize unhealthy relays + + // Combined score + // Coverage is weighted higher (we need the events) + // Performance helps break ties and prefer faster relays + return ( + coverageScore * 0.6 + + perfScore * 0.4 + ) * healthMultiplier; +} + +// Usage in selectRelaysForFilter: +const selectedAuthors = selectOptimalRelays(processedAuthorPointers, { + maxConnections: authorRelayBudget, + maxRelaysPerUser, + score: scoreRelay, // Custom scoring function +}); +``` + +### Benefits + +1. **Reliability**: Still prioritizes coverage (getting all authors) +2. **Performance**: Prefers faster relays when coverage is equal +3. **Health-aware**: Deprioritizes (but doesn't exclude) unhealthy relays + +--- + +## Implementation Order + +### Step 1: Relay Performance Scoring +1. Create `RelayPerformanceEntry` interface in `db.ts` +2. Add DB version 15 with `relayPerformance` table +3. Create `relay-scoreboard.ts` service +4. Hook into pool events to collect metrics +5. Add tests for scoring algorithm + +### Step 2: Custom Scoring Function +1. Import scoreboard in `relay-selection.ts` +2. Create `scoreRelay` function +3. Pass to `selectOptimalRelays` calls +4. Add tests for custom scoring + +### Step 3: Adaptive Timeouts +1. Create `getAdaptiveTimeout` function in `relay-scoreboard.ts` +2. Modify `fetchRelayList` to use adaptive timeouts +3. Add tests for timeout calculation + +### Step 4: Per-Relay Filter Optimization +1. Create `RelayFilterMap` type +2. Add `createPerRelayFilters` function +3. Add `perRelayFilters` to `RelaySelectionResult` +4. Update `useOutboxRelays` hook to expose per-relay filters +5. Optionally update consumers to use per-relay subscriptions + +--- + +## Testing Strategy + +### Unit Tests + +```typescript +// relay-scoreboard.test.ts +describe("RelayScoreboard", () => { + describe("calculateRelayScore", () => { + it("scores fast relays higher", () => { + const fast = makeMetrics({ responseTimeMs: 100 }); + const slow = makeMetrics({ responseTimeMs: 900 }); + expect(calculateRelayScore(fast)).toBeGreaterThan(calculateRelayScore(slow)); + }); + + it("scores reliable relays higher", () => { + const reliable = makeMetrics({ successfulQueries: 95, failedQueries: 5 }); + const flaky = makeMetrics({ successfulQueries: 50, failedQueries: 50 }); + expect(calculateRelayScore(reliable)).toBeGreaterThan(calculateRelayScore(flaky)); + }); + }); + + describe("getAdaptiveTimeout", () => { + it("uses 2x average response time", () => { + scoreboard.recordResponse("wss://fast.relay/", 200); + scoreboard.recordResponse("wss://fast.relay/", 200); + scoreboard.recordResponse("wss://fast.relay/", 200); + expect(getAdaptiveTimeout("wss://fast.relay/")).toBe(400); + }); + + it("caps timeout for unreliable relays", () => { + // Record many failures + for (let i = 0; i < 10; i++) { + scoreboard.recordQueryResult("wss://flaky.relay/", false); + } + expect(getAdaptiveTimeout("wss://flaky.relay/")).toBeLessThanOrEqual(500); + }); + }); +}); +``` + +### Integration Tests + +- Test that scoring persists across page reloads +- Test that pool events are properly captured +- Test relay selection with real filter scenarios + +--- + +## Rollout Plan + +1. **Phase 1**: Ship scoring + custom function behind feature flag +2. **Phase 2**: Enable by default, monitor metrics +3. **Phase 3**: Add adaptive timeouts +4. **Phase 4**: Add per-relay filters (optional consumer adoption) + +--- + +## Success Metrics + +- **Response time**: Measure time from REQ to first event +- **Coverage**: Measure % of expected events received +- **Connection count**: Measure average relays connected per query +- **Cache hit rate**: Track scoreboard lookups vs. new relays + +--- + +*Created: 2024-12-24* +*Status: Planning*