Fix slash mystery (#4263)

This commit is contained in:
pablonyx 2025-03-12 10:03:21 -07:00 committed by GitHub
parent 997f40500d
commit a9e5ae2f11
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -62,19 +62,13 @@ export function extractCodeText(
// We must preprocess LaTeX in the LLM output to avoid improper formatting
export const preprocessLaTeX = (content: string) => {
// 1) Escape dollar signs used outside of LaTeX context
const escapedCurrencyContent = content.replace(
/\$(\d+(?:\.\d*)?)/g,
(_, p1) => `\\$${p1}`
);
// 2) Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessedContent = escapedCurrencyContent.replace(
// 1) Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessedContent = content.replace(
/\\\[([\s\S]*?)\\\]/g,
(_, equation) => `$$${equation}$$`
);
// 3) Replace inline LaTeX delimiters \( \) with $ $
// 2) Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessedContent = blockProcessedContent.replace(
/\\\(([\s\S]*?)\\\)/g,
(_, equation) => `$${equation}$`
@ -82,223 +76,3 @@ export const preprocessLaTeX = (content: string) => {
return inlineProcessedContent;
};
interface MarkdownSegment {
type: "text" | "link" | "code" | "bold" | "italic" | "codeblock";
text: string; // The visible/plain text
raw: string; // The raw markdown including syntax
length: number; // Length of the visible text
}
export function parseMarkdownToSegments(markdown: string): MarkdownSegment[] {
if (!markdown) {
return [];
}
const segments: MarkdownSegment[] = [];
let currentIndex = 0;
const maxIterations = markdown.length * 2; // Prevent infinite loops
let iterations = 0;
while (currentIndex < markdown.length && iterations < maxIterations) {
iterations++;
let matched = false;
// Check for code blocks first (they take precedence)
const codeBlockMatch = markdown
.slice(currentIndex)
.match(/^```(\w*)\n([\s\S]*?)```/);
if (codeBlockMatch && codeBlockMatch[0]) {
const [fullMatch, , code] = codeBlockMatch;
segments.push({
type: "codeblock",
text: code || "",
raw: fullMatch,
length: (code || "").length,
});
currentIndex += fullMatch.length;
matched = true;
continue;
}
// Check for inline code
const inlineCodeMatch = markdown.slice(currentIndex).match(/^`([^`]+)`/);
if (inlineCodeMatch && inlineCodeMatch[0]) {
const [fullMatch, code] = inlineCodeMatch;
segments.push({
type: "code",
text: code || "",
raw: fullMatch,
length: (code || "").length,
});
currentIndex += fullMatch.length;
matched = true;
continue;
}
// Check for links
const linkMatch = markdown
.slice(currentIndex)
.match(/^\[([^\]]+)\]\(([^)]+)\)/);
if (linkMatch && linkMatch[0]) {
const [fullMatch, text] = linkMatch;
segments.push({
type: "link",
text: text || "",
raw: fullMatch,
length: (text || "").length,
});
currentIndex += fullMatch.length;
matched = true;
continue;
}
// Check for bold
const boldMatch = markdown
.slice(currentIndex)
.match(/^(\*\*|__)([^*_\n]*?)\1/);
if (boldMatch && boldMatch[0]) {
const [fullMatch, , text] = boldMatch;
segments.push({
type: "bold",
text: text || "",
raw: fullMatch,
length: (text || "").length,
});
currentIndex += fullMatch.length;
matched = true;
continue;
}
// Check for italic
const italicMatch = markdown
.slice(currentIndex)
.match(/^(\*|_)([^*_\n]+?)\1(?!\*|_)/);
if (italicMatch && italicMatch[0]) {
const [fullMatch, , text] = italicMatch;
segments.push({
type: "italic",
text: text || "",
raw: fullMatch,
length: (text || "").length,
});
currentIndex += fullMatch.length;
matched = true;
continue;
}
// If no matches were found, handle regular text
if (!matched) {
let nextSpecialChar = markdown.slice(currentIndex).search(/[`\[*_]/);
if (nextSpecialChar === -1) {
// No more special characters, add the rest as text
const text = markdown.slice(currentIndex);
if (text) {
segments.push({
type: "text",
text: text,
raw: text,
length: text.length,
});
}
break;
} else {
// Add the text up to the next special character
const text = markdown.slice(
currentIndex,
currentIndex + nextSpecialChar
);
if (text) {
segments.push({
type: "text",
text: text,
raw: text,
length: text.length,
});
}
currentIndex += nextSpecialChar;
}
}
}
return segments;
}
export function getMarkdownForSelection(
content: string,
selectedText: string
): string {
const segments = parseMarkdownToSegments(content);
// Build plain text and create mapping to markdown segments
let plainText = "";
const markdownPieces: string[] = [];
let currentPlainIndex = 0;
segments.forEach((segment) => {
plainText += segment.text;
markdownPieces.push(segment.raw);
currentPlainIndex += segment.length;
});
// Find the selection in the plain text
const startIndex = plainText.indexOf(selectedText);
if (startIndex === -1) {
return selectedText;
}
const endIndex = startIndex + selectedText.length;
// Find which segments the selection spans
let currentIndex = 0;
let result = "";
let selectionStart = startIndex;
let selectionEnd = endIndex;
segments.forEach((segment) => {
const segmentStart = currentIndex;
const segmentEnd = segmentStart + segment.length;
// Check if this segment overlaps with the selection
if (segmentEnd > selectionStart && segmentStart < selectionEnd) {
// Calculate how much of this segment to include
const overlapStart = Math.max(0, selectionStart - segmentStart);
const overlapEnd = Math.min(segment.length, selectionEnd - segmentStart);
if (segment.type === "text") {
const textPortion = segment.text.slice(overlapStart, overlapEnd);
result += textPortion;
} else {
// For markdown elements, wrap just the selected portion with the appropriate markdown
const selectedPortion = segment.text.slice(overlapStart, overlapEnd);
switch (segment.type) {
case "bold":
result += `**${selectedPortion}**`;
break;
case "italic":
result += `*${selectedPortion}*`;
break;
case "code":
result += `\`${selectedPortion}\``;
break;
case "link":
// For links, we need to preserve the URL if it exists in the raw markdown
const urlMatch = segment.raw.match(/\]\((.*?)\)/);
const url = urlMatch ? urlMatch[1] : "";
result += `[${selectedPortion}](${url})`;
break;
case "codeblock":
result += `\`\`\`\n${selectedPortion}\n\`\`\``;
break;
default:
result += selectedPortion;
}
}
}
currentIndex += segment.length;
});
return result;
}