fix(editor): de-ambiguate escaped-label regexes to kill ReDoS (MUL-4016)

The mention/slash/file-card label regexes used `(?:\\.|[^\]])` where both
alternatives can consume a backslash. On an unterminated match, each `\x`
run is enumerated 2^n ways — pasting a Java stacktrace (`\~\[...\]`) or a
crafted ~50-char string freezes the main thread for seconds (GitHub #4881).

Exclude backslash from the char class (`[^\]\\]`) so a backslash can only be
consumed by `\\.`. The alternatives become disjoint and matching is linear;
legal escaped-bracket labels like `David\[TF\]` still parse unchanged.

Fixed in all four sites that shared the pattern:
- mention-extension.ts tokenize()
- slash-command-extension.ts start() + tokenize()
- file-card.tsx FILE_CARD_MARKDOWN_RE
- packages/ui/markdown/file-cards.ts NEW_FILE_CARD_RE (runs on every
  read-only comment/description render, not just the editor)

Adds adversarial regression tests (repeated `\a` + missing closing bracket)
that fail in ~10-40s against the old regexes and pass in <1ms after the fix.
Builds on #4889's marker-first mention start().

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
Naiyuan Qing
2026-07-03 17:40:05 +08:00
parent a7d644b55f
commit 63893cc41f
7 changed files with 88 additions and 9 deletions

View File

@@ -47,9 +47,14 @@ export function isAllowedFileCardHref(href: string): boolean {
)
}
/** New syntax: !file[name](url) — unambiguous, no hostname matching needed. */
/**
* New syntax: !file[name](url) — unambiguous, no hostname matching needed.
* Backslash is excluded from the label char class so "\x" runs can only be
* consumed by \\. — overlapping alternatives backtrack in 2^n ways (ReDoS,
* GitHub #4881). This runs on every comment/description render.
*/
const NEW_FILE_CARD_RE = new RegExp(
`^!file\\[((?:\\\\.|[^\\]])*)\\]\\((${FILE_CARD_URL_PATTERN.source})\\)$`,
`^!file\\[((?:\\\\.|[^\\]\\\\])*)\\]\\((${FILE_CARD_URL_PATTERN.source})\\)$`,
)
/** Legacy syntax: [name](cdnUrl) on its own line — matched by CDN hostname. */

View File

@@ -61,6 +61,20 @@ describe("file-card tokenizer", () => {
expect(token).toBeDefined();
expect(token!.attributes.filename).toBe("readme.md");
});
it("rejects an unterminated file card with escape-pair runs in linear time", () => {
// Each "\a" pair is ambiguous under (?:\\.|[^\]]) — the pre-fix regex
// enumerates 2^28 backtrack paths (~10s) before failing. The disjoint
// char class must fail fast instead.
const src = `!file[${"\\a".repeat(28)}](/uploads/x`;
const t0 = performance.now();
const token = tokenize(src);
const elapsed = performance.now() - t0;
expect(token).toBeUndefined();
expect(elapsed).toBeLessThan(100);
});
});
// ---------------------------------------------------------------------------
@@ -81,4 +95,17 @@ describe("preprocessFileCards", () => {
expect(result).toContain('data-type="fileCard"');
expect(result).toContain('data-filename="readme.md"');
});
it("rejects an unterminated file card with escape-pair runs in linear time", () => {
// This path runs on every read-only comment/description render, so a
// backtracking label regex here is reachable without opening the editor.
const input = `!file[${"\\a".repeat(28)}](/uploads/x`;
const t0 = performance.now();
const result = preprocessFileCards(input, "cdn.example.com");
const elapsed = performance.now() - t0;
expect(result).toBe(input);
expect(elapsed).toBeLessThan(100);
});
});

View File

@@ -21,8 +21,11 @@ import { FILE_CARD_URL_PATTERN } from "@multica/ui/markdown";
import { escapeMarkdownLabel } from "../utils/escape-markdown-label";
import { Attachment } from "../attachment";
// Backslash is excluded from the label char class so "\x" runs can only be
// consumed by \\. — overlapping alternatives backtrack in 2^n ways (ReDoS,
// GitHub #4881).
const FILE_CARD_MARKDOWN_RE = new RegExp(
`^!file\\[((?:\\\\.|[^\\]])*)\\]\\((${FILE_CARD_URL_PATTERN.source})\\)`,
`^!file\\[((?:\\\\.|[^\\]\\\\])*)\\]\\((${FILE_CARD_URL_PATTERN.source})\\)`,
);

View File

@@ -117,4 +117,18 @@ describe("mention tokenizer", () => {
expect(start).toBe(-1);
expect(elapsed).toBeLessThan(50);
});
it("rejects an unterminated mention with escape-pair runs in linear time", () => {
// Each "\a" pair is ambiguous under (?:\\.|[^\]]) — the pre-fix regex
// enumerates 2^28 backtrack paths (~10s) before failing. The disjoint
// char class must fail fast instead.
const src = `[@${"\\a".repeat(28)}](mention://member/abc`;
const t0 = performance.now();
const token = tokenizeFn(src);
const elapsed = performance.now() - t0;
expect(token).toBeUndefined();
expect(elapsed).toBeLessThan(100);
});
});

View File

@@ -81,11 +81,13 @@ export const BaseMentionExtension = Mention.extend({
return findMentionStart(src);
},
tokenize(src: string) {
// Label accepts escaped chars (\\[ \\]) or any non-] character.
// This prevents the label from crossing a ]( Markdown link boundary
// while still supporting bracket-containing names like "David\[TF\]".
// Label accepts escaped chars (\\[ \\]) or any non-] non-backslash
// character. Excluding backslash from the char class keeps the two
// alternatives disjoint — otherwise "\x" runs backtrack in 2^n ways
// (ReDoS, GitHub #4881) — while still supporting bracket-containing
// names like "David\[TF\]".
const match = src.match(
/^\[@?((?:\\.|[^\]])+)\]\(mention:\/\/(\w+)\/([^)]+)\)/,
/^\[@?((?:\\.|[^\]\\])+)\]\(mention:\/\/(\w+)\/([^)]+)\)/,
);
if (!match) return undefined;
// Unescape backslash-escaped brackets that renderMarkdown may produce.

View File

@@ -86,4 +86,29 @@ describe("slash command tokenizer", () => {
it("does not match slash action links", () => {
expect(tokenize("[/deploy](slash://action/deploy)")).toBeUndefined();
});
it("rejects an unterminated slash link with escape-pair runs in linear time", () => {
// Each "\a" pair is ambiguous under (?:\\.|[^\]]) — the pre-fix regex
// enumerates 2^28 backtrack paths (~10s) before failing. The disjoint
// char class must fail fast instead.
const src = `[/${"\\a".repeat(28)}](slash://skill/abc`;
const t0 = performance.now();
const token = tokenizeFn(src);
const elapsed = performance.now() - t0;
expect(token).toBeUndefined();
expect(elapsed).toBeLessThan(100);
});
it("returns -1 fast from start() when escape-pair runs precede no slash link", () => {
const src = `[/${"\\a".repeat(28)}] plain text, no slash link`;
const t0 = performance.now();
const start = startFn(src);
const elapsed = performance.now() - t0;
expect(start).toBe(-1);
expect(elapsed).toBeLessThan(100);
});
});

View File

@@ -35,11 +35,14 @@ export const SlashCommandExtension = Mention.extend({
name: "slashCommand",
level: "inline" as const,
start(src: string) {
return src.search(/\[\/(?:\\.|[^\]])+\]\(slash:\/\/skill\//);
// Backslash is excluded from the char class so "\x" runs can only be
// consumed by \\. — overlapping alternatives backtrack in 2^n ways
// (ReDoS, GitHub #4881).
return src.search(/\[\/(?:\\.|[^\]\\])+\]\(slash:\/\/skill\//);
},
tokenize(src: string) {
const match = src.match(
/^\[\/((?:\\.|[^\]])+)\]\(slash:\/\/skill\/([^)]+)\)/,
/^\[\/((?:\\.|[^\]\\])+)\]\(slash:\/\/skill\/([^)]+)\)/,
);
if (!match) return undefined;
const rawLabel = match[1]?.replace(/\\\[/g, "[").replace(/\\\]/g, "]");