diff --git a/web/src/app/chat/message/codeUtils.ts b/web/src/app/chat/message/codeUtils.ts index 9d329c05f..3c2dd12fd 100644 --- a/web/src/app/chat/message/codeUtils.ts +++ b/web/src/app/chat/message/codeUtils.ts @@ -62,19 +62,13 @@ export function extractCodeText( // We must preprocess LaTeX in the LLM output to avoid improper formatting export const preprocessLaTeX = (content: string) => { - // 1) Escape dollar signs used outside of LaTeX context - const escapedCurrencyContent = content.replace( - /\$(\d+(?:\.\d*)?)/g, - (_, p1) => `\\$${p1}` - ); - - // 2) Replace block-level LaTeX delimiters \[ \] with $$ $$ - const blockProcessedContent = escapedCurrencyContent.replace( + // 1) Replace block-level LaTeX delimiters \[ \] with $$ $$ + const blockProcessedContent = content.replace( /\\\[([\s\S]*?)\\\]/g, (_, equation) => `$$${equation}$$` ); - // 3) Replace inline LaTeX delimiters \( \) with $ $ + // 2) Replace inline LaTeX delimiters \( \) with $ $ const inlineProcessedContent = blockProcessedContent.replace( /\\\(([\s\S]*?)\\\)/g, (_, equation) => `$${equation}$` @@ -82,223 +76,3 @@ export const preprocessLaTeX = (content: string) => { return inlineProcessedContent; }; - -interface MarkdownSegment { - type: "text" | "link" | "code" | "bold" | "italic" | "codeblock"; - text: string; // The visible/plain text - raw: string; // The raw markdown including syntax - length: number; // Length of the visible text -} - -export function parseMarkdownToSegments(markdown: string): MarkdownSegment[] { - if (!markdown) { - return []; - } - - const segments: MarkdownSegment[] = []; - let currentIndex = 0; - const maxIterations = markdown.length * 2; // Prevent infinite loops - let iterations = 0; - - while (currentIndex < markdown.length && iterations < maxIterations) { - iterations++; - let matched = false; - - // Check for code blocks first (they take precedence) - const codeBlockMatch = markdown - .slice(currentIndex) - .match(/^```(\w*)\n([\s\S]*?)```/); - if (codeBlockMatch && codeBlockMatch[0]) { - const [fullMatch, , code] = codeBlockMatch; - segments.push({ - type: "codeblock", - text: code || "", - raw: fullMatch, - length: (code || "").length, - }); - currentIndex += fullMatch.length; - matched = true; - continue; - } - - // Check for inline code - const inlineCodeMatch = markdown.slice(currentIndex).match(/^`([^`]+)`/); - if (inlineCodeMatch && inlineCodeMatch[0]) { - const [fullMatch, code] = inlineCodeMatch; - segments.push({ - type: "code", - text: code || "", - raw: fullMatch, - length: (code || "").length, - }); - currentIndex += fullMatch.length; - matched = true; - continue; - } - - // Check for links - const linkMatch = markdown - .slice(currentIndex) - .match(/^\[([^\]]+)\]\(([^)]+)\)/); - if (linkMatch && linkMatch[0]) { - const [fullMatch, text] = linkMatch; - segments.push({ - type: "link", - text: text || "", - raw: fullMatch, - length: (text || "").length, - }); - currentIndex += fullMatch.length; - matched = true; - continue; - } - - // Check for bold - const boldMatch = markdown - .slice(currentIndex) - .match(/^(\*\*|__)([^*_\n]*?)\1/); - if (boldMatch && boldMatch[0]) { - const [fullMatch, , text] = boldMatch; - segments.push({ - type: "bold", - text: text || "", - raw: fullMatch, - length: (text || "").length, - }); - currentIndex += fullMatch.length; - matched = true; - continue; - } - - // Check for italic - const italicMatch = markdown - .slice(currentIndex) - .match(/^(\*|_)([^*_\n]+?)\1(?!\*|_)/); - if (italicMatch && italicMatch[0]) { - const [fullMatch, , text] = italicMatch; - segments.push({ - type: "italic", - text: text || "", - raw: fullMatch, - length: (text || "").length, - }); - currentIndex += fullMatch.length; - matched = true; - continue; - } - - // If no matches were found, handle regular text - if (!matched) { - let nextSpecialChar = markdown.slice(currentIndex).search(/[`\[*_]/); - if (nextSpecialChar === -1) { - // No more special characters, add the rest as text - const text = markdown.slice(currentIndex); - if (text) { - segments.push({ - type: "text", - text: text, - raw: text, - length: text.length, - }); - } - break; - } else { - // Add the text up to the next special character - const text = markdown.slice( - currentIndex, - currentIndex + nextSpecialChar - ); - if (text) { - segments.push({ - type: "text", - text: text, - raw: text, - length: text.length, - }); - } - currentIndex += nextSpecialChar; - } - } - } - - return segments; -} - -export function getMarkdownForSelection( - content: string, - selectedText: string -): string { - const segments = parseMarkdownToSegments(content); - - // Build plain text and create mapping to markdown segments - let plainText = ""; - const markdownPieces: string[] = []; - let currentPlainIndex = 0; - - segments.forEach((segment) => { - plainText += segment.text; - markdownPieces.push(segment.raw); - currentPlainIndex += segment.length; - }); - - // Find the selection in the plain text - const startIndex = plainText.indexOf(selectedText); - if (startIndex === -1) { - return selectedText; - } - - const endIndex = startIndex + selectedText.length; - - // Find which segments the selection spans - let currentIndex = 0; - let result = ""; - let selectionStart = startIndex; - let selectionEnd = endIndex; - - segments.forEach((segment) => { - const segmentStart = currentIndex; - const segmentEnd = segmentStart + segment.length; - - // Check if this segment overlaps with the selection - if (segmentEnd > selectionStart && segmentStart < selectionEnd) { - // Calculate how much of this segment to include - const overlapStart = Math.max(0, selectionStart - segmentStart); - const overlapEnd = Math.min(segment.length, selectionEnd - segmentStart); - - if (segment.type === "text") { - const textPortion = segment.text.slice(overlapStart, overlapEnd); - result += textPortion; - } else { - // For markdown elements, wrap just the selected portion with the appropriate markdown - const selectedPortion = segment.text.slice(overlapStart, overlapEnd); - - switch (segment.type) { - case "bold": - result += `**${selectedPortion}**`; - break; - case "italic": - result += `*${selectedPortion}*`; - break; - case "code": - result += `\`${selectedPortion}\``; - break; - case "link": - // For links, we need to preserve the URL if it exists in the raw markdown - const urlMatch = segment.raw.match(/\]\((.*?)\)/); - const url = urlMatch ? urlMatch[1] : ""; - result += `[${selectedPortion}](${url})`; - break; - case "codeblock": - result += `\`\`\`\n${selectedPortion}\n\`\`\``; - break; - default: - result += selectedPortion; - } - } - } - - currentIndex += segment.length; - }); - - return result; -}