Compare commits

...

2 Commits

Author SHA1 Message Date
J
8a7e3748ec docs(markdown): drop inaccurate .io example from bare-filename comment
io is not in the FILE_EXTENSIONS list, so .io domains are never suppressed.
Listing it as an example was misleading.

Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 17:04:58 +08:00
J
86a93888dc fix(markdown): don't auto-link bare filenames as external URLs
Agent comments that mention a project file like `plan.md` were turned into
clickable links to https://plan.md (dead external site). linkify-it fuzzy
detection matches `plan.md` as a domain because its extension is also a valid
TLD (md = Moldova; likewise io, sh, rs, py).

Suppress schemeless (fuzzy) linkify matches whose token is a bare filename
(single segment ending in a known source/config extension). Explicit schemes
(`https://plan.md`) and real domains (`example.com`) are unaffected. The file
extension list is now shared between the file-path and bare-filename detectors
so they can't drift.

Fixes #4222

Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 16:41:40 +08:00
2 changed files with 78 additions and 13 deletions

View File

@@ -10,10 +10,24 @@ import LinkifyIt from 'linkify-it'
// Initialize linkify-it with default settings (fuzzy URLs, emails enabled)
const linkify = new LinkifyIt()
// Common source/config file extensions. Shared between the file-path detector
// and the bare-filename guard below so the two never drift.
const FILE_EXTENSIONS =
'ts|tsx|js|jsx|mjs|cjs|md|json|yaml|yml|py|go|rs|css|scss|less|html|htm|txt|log|sh|bash|zsh|swift|kt|java|c|cpp|h|hpp|rb|php|xml|toml|ini|cfg|conf|env|sql|graphql|vue|svelte|astro|prisma|dockerfile|makefile|gitignore'
// File path regex - detects /path, ~/path, ./path with common extensions
// Matches paths that start with /, ~/, or ./ followed by path chars and a file extension
const FILE_PATH_REGEX =
/(?:^|[\s([{<])((\/|~\/|\.\/)[\w\-./@]+\.(?:ts|tsx|js|jsx|mjs|cjs|md|json|yaml|yml|py|go|rs|css|scss|less|html|htm|txt|log|sh|bash|zsh|swift|kt|java|c|cpp|h|hpp|rb|php|xml|toml|ini|cfg|conf|env|sql|graphql|vue|svelte|astro|prisma|dockerfile|makefile|gitignore))(?=[\s)\]}.,;:!?>]|$)/gi
const FILE_PATH_REGEX = new RegExp(
`(?:^|[\\s([{<])((\\/|~\\/|\\.\\/)[\\w\\-./@]+\\.(?:${FILE_EXTENSIONS}))(?=[\\s)\\]}.,;:!?>]|$)`,
'gi'
)
// A bare filename token like "plan.md" or "vite.config.ts": a single path
// segment ending in a known file extension, with no slash, scheme, or port.
// linkify-it fuzzy-matches these as domains because several of the extensions
// (md, sh, rs, py, …) are also valid TLDs. We use this to stop bare
// filenames from being auto-linked to dead external sites like https://plan.md.
const BARE_FILENAME_REGEX = new RegExp(`^[\\w.-]+\\.(?:${FILE_EXTENSIONS})$`, 'i')
// CJK full-width punctuation that should terminate a URL.
// linkify-it only treats ASCII punctuation as URL boundaries, so in Chinese /
@@ -223,19 +237,27 @@ function collectLinkifyMatches(text: string, offset: number, out: DetectedLink[]
const truncate = cjkIdx > 0
const matchText = truncate ? match.text.slice(0, cjkIdx) : match.text
// linkify-it may prepend a scheme (e.g. "http://" or "mailto:") to url
// while leaving text as the raw substring. Preserve that prefix.
const schemePrefix = match.url.slice(0, match.url.length - match.text.length)
const matchUrl = truncate ? schemePrefix + matchText : match.url
const matchEnd = truncate ? match.index + cjkIdx : match.lastIndex
out.push({
type: match.schema === 'mailto:' ? 'email' : 'url',
text: matchText,
url: matchUrl,
start: match.index + offset,
end: matchEnd + offset
})
// Bare filenames such as "plan.md" or "README.md" are fuzzy-matched as
// domains because their extension is also a valid TLD. They are file
// references, not URLs — leave them as plain text rather than link to a
// dead external site. Only schemeless (fuzzy) matches are suppressed; an
// explicit "https://plan.md" the author typed is still honored.
if (!(match.schema === '' && BARE_FILENAME_REGEX.test(matchText))) {
// linkify-it may prepend a scheme (e.g. "http://" or "mailto:") to url
// while leaving text as the raw substring. Preserve that prefix.
const schemePrefix = match.url.slice(0, match.url.length - match.text.length)
const matchUrl = truncate ? schemePrefix + matchText : match.url
out.push({
type: match.schema === 'mailto:' ? 'email' : 'url',
text: matchText,
url: matchUrl,
start: match.index + offset,
end: matchEnd + offset
})
}
if (truncate) {
// Rescan the tail after the CJK punct — linkify-it had greedily swallowed

View File

@@ -93,3 +93,46 @@ describe("preprocessLinks — CJK punctuation boundary", () => {
);
});
});
// The bug (#4222): an agent mentions a project file like `plan.md` in a comment.
// linkify-it fuzzy-matches it as the domain `plan.md` (md is Moldova's ccTLD) and
// turns it into a clickable https://plan.md link that goes nowhere. Bare filename
// tokens must stay plain text — only an explicit scheme makes them a link.
describe("preprocessLinks — bare filenames are not auto-linked as URLs", () => {
it("leaves a bare .md filename in CJK prose as plain text", () => {
const out = preprocessLinks("决策已锁定plan.md 已更新");
expect(out).toBe("决策已锁定plan.md 已更新");
});
it("leaves README.md as plain text", () => {
expect(preprocessLinks("see README.md for details")).toBe("see README.md for details");
});
it("leaves other extensions that collide with TLDs (sh, rs, py) as plain text", () => {
expect(preprocessLinks("run build.sh then main.rs and app.py")).toBe(
"run build.sh then main.rs and app.py",
);
});
it("honors an explicit scheme on a filename-shaped host", () => {
expect(preprocessLinks("open https://plan.md now")).toBe(
"open [https://plan.md](https://plan.md) now",
);
});
it("still linkifies real fuzzy domains whose TLD is not a file extension", () => {
expect(preprocessLinks("官网 NBA.com")).toBe("官网 [NBA.com](http://NBA.com)");
});
it("suppresses the bare filename but still linkifies a real domain after it", () => {
expect(preprocessLinks("plan.mdexample.com")).toBe(
"plan.md[example.com](http://example.com)",
);
});
it("still detects explicit ./ file paths (FILE_PATH_REGEX regression)", () => {
expect(preprocessLinks("see ./src/main.go here")).toBe(
"see [./src/main.go](./src/main.go) here",
);
});
});