Compare commits

...

3 Commits

Author SHA1 Message Date
Naiyuan Qing
931d53fbbd test(analytics): add exception-dedupe safety matrix
This file was written alongside the dedupe implementation but missed the
original commit, so the $exception fail-open / cap / fingerprint matrix
never landed on the branch. No implementation change — the tests pass as
written against the existing exception-dedupe.ts.

Covers: first-3-then-drop, fingerprint independence, colno discrimination,
hash-only storage (no PII), degraded/missing frames, undefined / throwing
/ corrupt-JSON sessionStorage fail-open, setItem-failure under-counts, and
the distinct-fingerprint cap (51st new fingerprint kept).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 14:03:00 +08:00
Naiyuan Qing
1d7f8ed75c feat(diagnostics): global 60s cooldown for client_unresponsive
A single sustained freeze is delivered as several long-task entries, so
emitting per entry made client_unresponsive volume grow without bound
with the freeze length (MUL-3331). Cap it with a module-level (page-
lifetime) global cooldown: at most one event per 60s window. No route
bucketing — a global window is the most direct cap on volume.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 13:53:03 +08:00
Naiyuan Qing
d0f067c8b8 feat(analytics): session-level $exception dedupe in before_send
A runaway client error (a render loop, a polling fetch that keeps
throwing) emits 100+ identical $exception events per session, which
showed up as a top PostHog cost/noise source after exception
autocapture landed (MUL-3331 / MUL-3330).

Add a per-tab-session fuse in before_send, after redaction: fingerprint
the already-redacted exception (type + redacted value + one deterministic
stack frame incl. colno), keep the first 3 per (session, fingerprint),
drop the rest. State lives in sessionStorage as a hash->count blob, so no
PII is persisted. Every storage failure fails open (keep the event);
before_send never throws.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 13:52:51 +08:00
6 changed files with 557 additions and 0 deletions

View File

@@ -0,0 +1,234 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { shouldDropException } from "./exception-dedupe";
const STORAGE_KEY = "mc_exc_fp";
// In-memory sessionStorage stand-in. Optional flags let a test force getItem /
// setItem to throw (quota, disabled storage) so we can assert the fail-open
// direction.
function makeStorage(opts: { throwOnGet?: boolean; throwOnSet?: boolean } = {}) {
const data = new Map<string, string>();
return {
data,
getItem(k: string): string | null {
if (opts.throwOnGet) throw new Error("getItem blocked");
return data.has(k) ? data.get(k)! : null;
},
setItem(k: string, v: string): void {
if (opts.throwOnSet) throw new Error("quota exceeded");
data.set(k, v);
},
removeItem(k: string): void {
data.delete(k);
},
clear(): void {
data.clear();
},
key(i: number): string | null {
return Array.from(data.keys())[i] ?? null;
},
get length(): number {
return data.size;
},
};
}
// Build a redacted-shape `$exception` properties object. By the time dedupe
// runs, redactExceptionProperties has already scrubbed value/message.
function exc(o: {
type?: string;
value?: string;
frames?: Array<Record<string, unknown>> | null;
} = {}): Record<string, unknown> {
const entry: Record<string, unknown> = {
type: o.type ?? "TypeError",
value: o.value ?? "boom",
};
if (o.frames !== null) {
entry.stacktrace = {
type: "raw",
frames: o.frames ?? [
{ filename: "app.tsx", function: "render", lineno: 10, colno: 5 },
],
};
}
return { $exception_list: [entry] };
}
afterEach(() => {
vi.unstubAllGlobals();
});
describe("shouldDropException — per-fingerprint limit", () => {
beforeEach(() => {
vi.stubGlobal("sessionStorage", makeStorage());
});
it("keeps the first 3 of a fingerprint and drops from the 4th", () => {
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(true);
expect(shouldDropException(exc())).toBe(true);
});
it("treats different fingerprints independently — one does not drop the other", () => {
// Exhaust fingerprint A.
const a = () => exc({ type: "TypeError", value: "a" });
const b = () => exc({ type: "RangeError", value: "b" });
shouldDropException(a());
shouldDropException(a());
shouldDropException(a());
expect(shouldDropException(a())).toBe(true); // A fused
// B is untouched.
expect(shouldDropException(b())).toBe(false);
expect(shouldDropException(b())).toBe(false);
expect(shouldDropException(b())).toBe(false);
expect(shouldDropException(b())).toBe(true);
});
it("discriminates on colno (minified bundles collapse statements onto one line)", () => {
const at = (colno: number) =>
exc({ frames: [{ filename: "b.js", function: "x", lineno: 1, colno }] });
// Same file/line/function, different column → distinct fingerprints, so
// each keeps its own first-3 budget.
shouldDropException(at(10));
shouldDropException(at(10));
shouldDropException(at(10));
expect(shouldDropException(at(10))).toBe(true);
expect(shouldDropException(at(20))).toBe(false);
});
it("stores only a hash + counter — no raw value reaches storage", () => {
const storage = makeStorage();
vi.stubGlobal("sessionStorage", storage);
shouldDropException(exc({ value: "secret-marker-12345" }));
const blob = storage.data.get(STORAGE_KEY) ?? "";
expect(blob).not.toContain("secret-marker-12345");
expect(blob).not.toContain("app.tsx");
});
});
describe("shouldDropException — degraded frames", () => {
beforeEach(() => {
vi.stubGlobal("sessionStorage", makeStorage());
});
it("tolerates missing lineno/colno/function and still dedupes", () => {
const partial = () => exc({ frames: [{ filename: "only-file.js" }] });
expect(() => shouldDropException(partial())).not.toThrow();
shouldDropException(partial());
shouldDropException(partial());
expect(shouldDropException(partial())).toBe(true);
});
it("tolerates no stacktrace at all (fingerprints on type + value)", () => {
const noframes = () => exc({ frames: null });
shouldDropException(noframes());
shouldDropException(noframes());
shouldDropException(noframes());
expect(shouldDropException(noframes())).toBe(true);
});
it("keeps events with no usable signal (empty type/value/frames)", () => {
const empty = { $exception_list: [{ type: "", value: "" }] };
expect(shouldDropException(empty)).toBe(false);
expect(shouldDropException(empty)).toBe(false);
expect(shouldDropException(empty)).toBe(false);
expect(shouldDropException(empty)).toBe(false); // never fused — no fingerprint
});
it("is safe on undefined / malformed properties", () => {
expect(shouldDropException(undefined)).toBe(false);
expect(
shouldDropException({ $exception_list: "nope" as unknown as [] }),
).toBe(false);
});
});
describe("shouldDropException — storage fail-open", () => {
it("fails open when sessionStorage is undefined (SSR)", () => {
vi.stubGlobal("sessionStorage", undefined);
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(false);
expect(shouldDropException(exc())).toBe(false);
});
it("fails open when accessing sessionStorage throws (sandboxed iframe)", () => {
Object.defineProperty(globalThis, "sessionStorage", {
configurable: true,
get() {
throw new Error("blocked by sandbox");
},
});
try {
expect(() => shouldDropException(exc())).not.toThrow();
expect(shouldDropException(exc())).toBe(false);
} finally {
// Remove the throwing getter so it doesn't leak into other tests.
Object.defineProperty(globalThis, "sessionStorage", {
configurable: true,
value: undefined,
});
}
});
it("fails open when getItem throws", () => {
vi.stubGlobal("sessionStorage", makeStorage({ throwOnGet: true }));
expect(() => shouldDropException(exc())).not.toThrow();
expect(shouldDropException(exc())).toBe(false);
});
it("fails open on a corrupted JSON blob and re-seeds clean state", () => {
const storage = makeStorage();
storage.data.set(STORAGE_KEY, "{not valid json");
vi.stubGlobal("sessionStorage", storage);
expect(shouldDropException(exc())).toBe(false);
// Blob is now valid JSON again with this fingerprint counted once.
const reseeded = JSON.parse(storage.data.get(STORAGE_KEY)!);
expect(typeof reseeded).toBe("object");
expect(Object.values(reseeded)).toEqual([1]);
});
it("setItem failure under-counts (fewer drops), never over-drops", () => {
vi.stubGlobal("sessionStorage", makeStorage({ throwOnSet: true }));
// Persisting the increment always fails, so the counter never advances and
// no event is ever dropped — the required "less drop" direction.
for (let i = 0; i < 5; i++) {
expect(shouldDropException(exc())).toBe(false);
}
});
});
describe("shouldDropException — distinct-fingerprint cap", () => {
it("keeps (does not track) a new fingerprint once the cap is reached", () => {
const storage = makeStorage();
// Seed 50 distinct fingerprints already at count 1.
const seed: Record<string, number> = {};
for (let i = 0; i < 50; i++) seed[`seed-${i}`] = 1;
storage.data.set(STORAGE_KEY, JSON.stringify(seed));
vi.stubGlobal("sessionStorage", storage);
// The 51st, brand-new fingerprint is kept and NOT added to the blob.
expect(shouldDropException(exc({ value: "fingerprint-51" }))).toBe(false);
const after = JSON.parse(storage.data.get(STORAGE_KEY)!);
expect(Object.keys(after)).toHaveLength(50);
});
it("still fuses a fingerprint that is already tracked at the cap", () => {
const storage = makeStorage();
const seed: Record<string, number> = {};
for (let i = 0; i < 49; i++) seed[`seed-${i}`] = 1;
vi.stubGlobal("sessionStorage", storage);
// Track a real one to reach 50 distinct, exhausting its budget.
const target = () => exc({ value: "tracked-at-cap" });
storage.data.set(STORAGE_KEY, JSON.stringify(seed));
shouldDropException(target()); // 50th distinct, count 1
shouldDropException(target()); // 2
shouldDropException(target()); // 3
expect(shouldDropException(target())).toBe(true); // fused despite cap
});
});

View File

@@ -0,0 +1,193 @@
// Session-scoped dedupe / throttle for `$exception` events.
//
// Runs in posthog-js `before_send` AFTER `redactExceptionProperties`, so the
// fingerprint is built purely from already-redacted fields — no raw message,
// value, or PII is ever written to storage (only a hash + a small counter).
//
// The fuse: keep the first EXCEPTION_SAMPLE_LIMIT of each (tab-session,
// fingerprint) pair and drop the rest. One runaway error — a render loop, a
// polling fetch that keeps throwing — otherwise emits 100+ identical
// `$exception` events per session (MUL-3331 / MUL-3330). Different fingerprints
// never affect each other.
//
// Safety invariant (load-bearing): `before_send` must never throw — a throw
// there breaks ALL event delivery — and every storage failure must fail OPEN.
// When in doubt we KEEP the event: emitting a duplicate is cheap, silently
// dropping a real first-occurrence error is not. setItem failures therefore
// only ever under-count (fewer drops), never over-drop.
//
// Scope is the browser tab session (`sessionStorage`): cleared when the tab
// closes, isolated per tab. This is intentionally NOT the posthog 30-min
// session — see the dedupe discussion on MUL-3331.
const STORAGE_KEY = "mc_exc_fp";
// Keep the first N of each fingerprint per session, drop from N+1.
const EXCEPTION_SAMPLE_LIMIT = 3;
// Cap distinct fingerprints tracked per session so a session that throws many
// *different* errors can't grow the blob without bound. Past the cap, new
// fingerprints are not tracked and fail open (kept).
const MAX_FINGERPRINTS = 50;
type FingerprintCounts = Record<string, number>;
/**
* Decide whether this already-redacted `$exception` event should be dropped as
* a session-level duplicate. Returns `true` to drop, `false` to keep.
*
* Never throws. Any missing fingerprint signal, unavailable/corrupt storage, or
* unexpected error results in `false` (keep) — the fail-open direction.
*/
export function shouldDropException(
properties: Record<string, unknown> | undefined,
): boolean {
const fingerprint = buildFingerprint(properties);
// Nothing stable to dedupe on → keep.
if (fingerprint === null) return false;
const storage = getSessionStorage();
if (!storage) return false;
// The entire read-decide-write sequence is guarded: a throw anywhere (parse,
// getItem, property access) degrades to keep.
try {
const counts = readCounts(storage);
const current = typeof counts[fingerprint] === "number" ? counts[fingerprint] : 0;
// Already at the limit for this fingerprint → fuse blows, drop.
if (current >= EXCEPTION_SAMPLE_LIMIT) return true;
// A brand-new fingerprint once the cap is reached: don't track it (would
// grow the blob), and keep the event.
if (current === 0 && Object.keys(counts).length >= MAX_FINGERPRINTS) {
return false;
}
counts[fingerprint] = current + 1;
try {
storage.setItem(STORAGE_KEY, JSON.stringify(counts));
} catch {
// Persisting the increment failed (quota / disabled). We still keep this
// event (return false below). The unpersisted increment only means the
// next identical error is also kept — under-counting toward the limit,
// i.e. fewer drops, never more. This is the required failure direction.
}
return false;
} catch {
return false;
}
}
/** Read and validate the counts blob. A corrupt or unexpected payload is
* treated as empty (fail open — this event is kept and re-seeds the blob). */
function readCounts(storage: Storage): FingerprintCounts {
const raw = storage.getItem(STORAGE_KEY);
if (!raw) return {};
try {
const parsed: unknown = JSON.parse(raw);
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
return parsed as FingerprintCounts;
}
} catch {
// Corrupt JSON blob → start fresh.
}
return {};
}
/**
* Build a stable fingerprint from the redacted exception properties. Uses the
* exception type, the redacted message/value, and a single deterministic stack
* frame. Returns `null` when there's nothing stable to key on (keep the event).
*
* Every frame field (`function` / `lineno` / `colno`) is treated as optional
* and degrades to empty — minified or partial stacks must not throw or collapse
* every error into one bucket via an undefined access.
*/
function buildFingerprint(properties: Record<string, unknown> | undefined): string | null {
if (!properties || typeof properties !== "object") return null;
const list = properties.$exception_list;
const entry =
Array.isArray(list) && list.length > 0 && list[0] && typeof list[0] === "object"
? (list[0] as Record<string, unknown>)
: undefined;
const type = readString(entry?.type) ?? readString(properties.$exception_type) ?? "";
const value =
readString(entry?.value) ?? readString(properties.$exception_message) ?? "";
const frame = topFrame(entry);
// No signal at all → don't dedupe.
if (type === "" && value === "" && !frame) return null;
const parts = [type, value];
if (frame) {
// colno is kept (load-bearing): minified bundles collapse many statements
// onto one line, so line alone under-discriminates distinct errors.
parts.push(frame.filename, frame.fn, frame.lineno, frame.colno);
}
return hash(parts.join(""));
}
interface TopFrame {
filename: string;
fn: string;
lineno: string;
colno: string;
}
/**
* Extract a single deterministic stack frame for fingerprinting. We always take
* the LAST frame in the array — a fixed end, with NO engine/order detection.
* The same error within a session yields the same frames array and therefore
* the same chosen frame, which is all the fingerprint needs; we don't care
* which end is semantically "topmost". Missing pieces degrade to "".
*/
function topFrame(entry: Record<string, unknown> | undefined): TopFrame | null {
if (!entry) return null;
const stacktrace = entry.stacktrace;
const frames =
stacktrace && typeof stacktrace === "object"
? (stacktrace as Record<string, unknown>).frames
: undefined;
if (!Array.isArray(frames) || frames.length === 0) return null;
const f = frames[frames.length - 1];
if (!f || typeof f !== "object") return null;
const frame = f as Record<string, unknown>;
return {
filename: readString(frame.filename) ?? "",
fn: readString(frame.function) ?? "",
lineno: readNumberAsString(frame.lineno) ?? "",
colno: readNumberAsString(frame.colno) ?? "",
};
}
function readString(v: unknown): string | undefined {
return typeof v === "string" && v.length > 0 ? v : undefined;
}
function readNumberAsString(v: unknown): string | undefined {
return typeof v === "number" && Number.isFinite(v) ? String(v) : undefined;
}
/** djb2 — a tiny stable string hash. Only used to bound the storage-key length;
* collision risk across a single tab session's exceptions is negligible. */
function hash(input: string): string {
let h = 5381;
for (let i = 0; i < input.length; i++) {
h = ((h << 5) + h) ^ input.charCodeAt(i);
}
return (h >>> 0).toString(36);
}
/** Resolve `sessionStorage`, returning `null` if it is absent (SSR) or throws
* on access (sandboxed iframe, storage disabled). */
function getSessionStorage(): Storage | null {
try {
if (typeof sessionStorage === "undefined") return null;
return sessionStorage;
} catch {
return null;
}
}

View File

@@ -216,3 +216,75 @@ describe("captureException", () => {
expect(posthog.captureException).toHaveBeenCalledWith(err, expect.any(Object));
});
});
describe("before_send $exception pipeline", () => {
// before_send is registered inside posthog.init's config; pull it back out of
// the mock and drive it directly. Dedupe needs a working sessionStorage.
function makeMemoryStorage() {
const data = new Map<string, string>();
return {
getItem: (k: string) => (data.has(k) ? data.get(k)! : null),
setItem: (k: string, v: string) => void data.set(k, v),
removeItem: (k: string) => void data.delete(k),
clear: () => data.clear(),
key: (i: number) => Array.from(data.keys())[i] ?? null,
get length() {
return data.size;
},
};
}
type BeforeSend = (
e: { event: string; properties: Record<string, unknown> } | null,
) => unknown;
function getBeforeSend(posthog: { init: ReturnType<typeof vi.fn> }): BeforeSend {
const config = posthog.init.mock.calls[0]?.[1] as { before_send: BeforeSend };
return config.before_send;
}
function excEvent() {
return {
event: "$exception",
properties: {
$exception_list: [
{
type: "TypeError",
value: "Bad email bob@corp.com",
stacktrace: {
frames: [{ filename: "a.tsx", function: "f", lineno: 1, colno: 2 }],
},
},
],
},
};
}
beforeEach(() => {
vi.stubGlobal("sessionStorage", makeMemoryStorage());
});
it("redacts the message, then drops repeats past the per-fingerprint limit", async () => {
const { analytics, posthog } = await loadModule();
analytics.initAnalytics({ key: "k", host: "" });
const beforeSend = getBeforeSend(posthog);
const first = beforeSend(excEvent()) as { properties: { $exception_list: Array<{ value: string }> } };
// Redaction still runs before the fuse.
expect(first.properties.$exception_list[0]!.value).toBe("Bad email [redacted]");
expect(beforeSend(excEvent())).not.toBeNull();
expect(beforeSend(excEvent())).not.toBeNull();
// 4th identical exception is dropped.
expect(beforeSend(excEvent())).toBeNull();
});
it("passes non-$exception events through untouched", async () => {
const { analytics, posthog } = await loadModule();
analytics.initAnalytics({ key: "k", host: "" });
const beforeSend = getBeforeSend(posthog);
const evt = { event: "$pageview", properties: { $current_url: "/acme/issues" } };
expect(beforeSend(evt)).toBe(evt);
});
});

View File

@@ -14,6 +14,7 @@
import posthog from "posthog-js";
import { redactExceptionProperties } from "./redact-exception";
import { shouldDropException } from "./exception-dedupe";
export const EVENT_SCHEMA_VERSION = 2;
@@ -156,10 +157,17 @@ export function initAnalytics(config: AnalyticsConfig | null | undefined): boole
// typed value, a URL with a token), so `before_send` scrubs the message
// and `$exception_list[].value` before the event leaves the client. Stack
// frames (code locations) are kept. See redact-exception.ts.
//
// After scrubbing, a session-level fuse drops repeats of the same error so
// a render loop or a polling fetch that keeps throwing can't emit 100+
// identical `$exception` events per session (MUL-3331). The fingerprint is
// built only from the already-redacted fields, so no PII reaches storage.
// Order matters: redact first, then fingerprint the redacted shape.
capture_exceptions: true,
before_send: (event) => {
if (event && event.event === "$exception") {
redactExceptionProperties(event.properties);
if (shouldDropException(event.properties)) return null;
}
return event;
},

View File

@@ -45,6 +45,7 @@ beforeEach(() => {
afterEach(() => {
vi.unstubAllGlobals();
vi.clearAllMocks();
vi.useRealTimers();
});
describe("installFreezeWatchdog", () => {
@@ -96,4 +97,38 @@ describe("installFreezeWatchdog", () => {
expect(() => installFreezeWatchdog()).not.toThrow();
});
it("emits at most one client_unresponsive per 60s cooldown window", async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2026-01-01T00:00:00Z"));
const { installFreezeWatchdog, captureEvent } = await load();
installFreezeWatchdog();
// A sustained freeze arrives as several long-task entries back to back.
fireLongTask(2500);
fireLongTask(2500);
fireLongTask(3000);
expect(captureEvent).toHaveBeenCalledTimes(1);
});
it("emits again only after the cooldown window elapses", async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2026-01-01T00:00:00Z"));
const { installFreezeWatchdog, captureEvent } = await load();
installFreezeWatchdog();
fireLongTask(2500);
expect(captureEvent).toHaveBeenCalledTimes(1);
// Still inside the window → suppressed.
vi.advanceTimersByTime(59_999);
fireLongTask(2500);
expect(captureEvent).toHaveBeenCalledTimes(1);
// Window elapsed → emits again.
vi.advanceTimersByTime(1);
fireLongTask(2500);
expect(captureEvent).toHaveBeenCalledTimes(2);
});
});

View File

@@ -24,6 +24,16 @@ import { captureEvent } from "../analytics";
// felt a real stall" without flooding on routine heavy renders.
const FREEZE_THRESHOLD_MS = 2000;
// A single sustained freeze is delivered by the browser as several separate
// long-task entries, so emitting per entry makes client_unresponsive volume
// grow without bound with the freeze length (MUL-3331). A global cooldown caps
// it to at most one event per window. Module-level (page-lifetime) state is the
// right scope here — it matches the `installed` singleton and resets on a full
// reload, which is rare and itself a distinct signal. No route bucketing: a
// global window is the most direct cap on volume.
const COOLDOWN_MS = 60_000;
let lastEmitMs = 0;
let installed = false;
/**
@@ -41,6 +51,11 @@ export function installFreezeWatchdog(): void {
const observer = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
if (entry.duration < FREEZE_THRESHOLD_MS) continue;
// Cooldown is checked only against qualifying freezes, so sub-threshold
// long tasks neither emit nor reset the window.
const now = Date.now();
if (now - lastEmitMs < COOLDOWN_MS) continue;
lastEmitMs = now;
captureEvent("client_unresponsive", {
source: "longtask",
duration_ms: Math.round(entry.duration),