mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
Fix slash mystery (#4263)
This commit is contained in:
parent
997f40500d
commit
a9e5ae2f11
@ -62,19 +62,13 @@ export function extractCodeText(
|
||||
|
||||
// We must preprocess LaTeX in the LLM output to avoid improper formatting
|
||||
export const preprocessLaTeX = (content: string) => {
|
||||
// 1) Escape dollar signs used outside of LaTeX context
|
||||
const escapedCurrencyContent = content.replace(
|
||||
/\$(\d+(?:\.\d*)?)/g,
|
||||
(_, p1) => `\\$${p1}`
|
||||
);
|
||||
|
||||
// 2) Replace block-level LaTeX delimiters \[ \] with $$ $$
|
||||
const blockProcessedContent = escapedCurrencyContent.replace(
|
||||
// 1) Replace block-level LaTeX delimiters \[ \] with $$ $$
|
||||
const blockProcessedContent = content.replace(
|
||||
/\\\[([\s\S]*?)\\\]/g,
|
||||
(_, equation) => `$$${equation}$$`
|
||||
);
|
||||
|
||||
// 3) Replace inline LaTeX delimiters \( \) with $ $
|
||||
// 2) Replace inline LaTeX delimiters \( \) with $ $
|
||||
const inlineProcessedContent = blockProcessedContent.replace(
|
||||
/\\\(([\s\S]*?)\\\)/g,
|
||||
(_, equation) => `$${equation}$`
|
||||
@ -82,223 +76,3 @@ export const preprocessLaTeX = (content: string) => {
|
||||
|
||||
return inlineProcessedContent;
|
||||
};
|
||||
|
||||
interface MarkdownSegment {
|
||||
type: "text" | "link" | "code" | "bold" | "italic" | "codeblock";
|
||||
text: string; // The visible/plain text
|
||||
raw: string; // The raw markdown including syntax
|
||||
length: number; // Length of the visible text
|
||||
}
|
||||
|
||||
export function parseMarkdownToSegments(markdown: string): MarkdownSegment[] {
|
||||
if (!markdown) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const segments: MarkdownSegment[] = [];
|
||||
let currentIndex = 0;
|
||||
const maxIterations = markdown.length * 2; // Prevent infinite loops
|
||||
let iterations = 0;
|
||||
|
||||
while (currentIndex < markdown.length && iterations < maxIterations) {
|
||||
iterations++;
|
||||
let matched = false;
|
||||
|
||||
// Check for code blocks first (they take precedence)
|
||||
const codeBlockMatch = markdown
|
||||
.slice(currentIndex)
|
||||
.match(/^```(\w*)\n([\s\S]*?)```/);
|
||||
if (codeBlockMatch && codeBlockMatch[0]) {
|
||||
const [fullMatch, , code] = codeBlockMatch;
|
||||
segments.push({
|
||||
type: "codeblock",
|
||||
text: code || "",
|
||||
raw: fullMatch,
|
||||
length: (code || "").length,
|
||||
});
|
||||
currentIndex += fullMatch.length;
|
||||
matched = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for inline code
|
||||
const inlineCodeMatch = markdown.slice(currentIndex).match(/^`([^`]+)`/);
|
||||
if (inlineCodeMatch && inlineCodeMatch[0]) {
|
||||
const [fullMatch, code] = inlineCodeMatch;
|
||||
segments.push({
|
||||
type: "code",
|
||||
text: code || "",
|
||||
raw: fullMatch,
|
||||
length: (code || "").length,
|
||||
});
|
||||
currentIndex += fullMatch.length;
|
||||
matched = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for links
|
||||
const linkMatch = markdown
|
||||
.slice(currentIndex)
|
||||
.match(/^\[([^\]]+)\]\(([^)]+)\)/);
|
||||
if (linkMatch && linkMatch[0]) {
|
||||
const [fullMatch, text] = linkMatch;
|
||||
segments.push({
|
||||
type: "link",
|
||||
text: text || "",
|
||||
raw: fullMatch,
|
||||
length: (text || "").length,
|
||||
});
|
||||
currentIndex += fullMatch.length;
|
||||
matched = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for bold
|
||||
const boldMatch = markdown
|
||||
.slice(currentIndex)
|
||||
.match(/^(\*\*|__)([^*_\n]*?)\1/);
|
||||
if (boldMatch && boldMatch[0]) {
|
||||
const [fullMatch, , text] = boldMatch;
|
||||
segments.push({
|
||||
type: "bold",
|
||||
text: text || "",
|
||||
raw: fullMatch,
|
||||
length: (text || "").length,
|
||||
});
|
||||
currentIndex += fullMatch.length;
|
||||
matched = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for italic
|
||||
const italicMatch = markdown
|
||||
.slice(currentIndex)
|
||||
.match(/^(\*|_)([^*_\n]+?)\1(?!\*|_)/);
|
||||
if (italicMatch && italicMatch[0]) {
|
||||
const [fullMatch, , text] = italicMatch;
|
||||
segments.push({
|
||||
type: "italic",
|
||||
text: text || "",
|
||||
raw: fullMatch,
|
||||
length: (text || "").length,
|
||||
});
|
||||
currentIndex += fullMatch.length;
|
||||
matched = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If no matches were found, handle regular text
|
||||
if (!matched) {
|
||||
let nextSpecialChar = markdown.slice(currentIndex).search(/[`\[*_]/);
|
||||
if (nextSpecialChar === -1) {
|
||||
// No more special characters, add the rest as text
|
||||
const text = markdown.slice(currentIndex);
|
||||
if (text) {
|
||||
segments.push({
|
||||
type: "text",
|
||||
text: text,
|
||||
raw: text,
|
||||
length: text.length,
|
||||
});
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
// Add the text up to the next special character
|
||||
const text = markdown.slice(
|
||||
currentIndex,
|
||||
currentIndex + nextSpecialChar
|
||||
);
|
||||
if (text) {
|
||||
segments.push({
|
||||
type: "text",
|
||||
text: text,
|
||||
raw: text,
|
||||
length: text.length,
|
||||
});
|
||||
}
|
||||
currentIndex += nextSpecialChar;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
export function getMarkdownForSelection(
|
||||
content: string,
|
||||
selectedText: string
|
||||
): string {
|
||||
const segments = parseMarkdownToSegments(content);
|
||||
|
||||
// Build plain text and create mapping to markdown segments
|
||||
let plainText = "";
|
||||
const markdownPieces: string[] = [];
|
||||
let currentPlainIndex = 0;
|
||||
|
||||
segments.forEach((segment) => {
|
||||
plainText += segment.text;
|
||||
markdownPieces.push(segment.raw);
|
||||
currentPlainIndex += segment.length;
|
||||
});
|
||||
|
||||
// Find the selection in the plain text
|
||||
const startIndex = plainText.indexOf(selectedText);
|
||||
if (startIndex === -1) {
|
||||
return selectedText;
|
||||
}
|
||||
|
||||
const endIndex = startIndex + selectedText.length;
|
||||
|
||||
// Find which segments the selection spans
|
||||
let currentIndex = 0;
|
||||
let result = "";
|
||||
let selectionStart = startIndex;
|
||||
let selectionEnd = endIndex;
|
||||
|
||||
segments.forEach((segment) => {
|
||||
const segmentStart = currentIndex;
|
||||
const segmentEnd = segmentStart + segment.length;
|
||||
|
||||
// Check if this segment overlaps with the selection
|
||||
if (segmentEnd > selectionStart && segmentStart < selectionEnd) {
|
||||
// Calculate how much of this segment to include
|
||||
const overlapStart = Math.max(0, selectionStart - segmentStart);
|
||||
const overlapEnd = Math.min(segment.length, selectionEnd - segmentStart);
|
||||
|
||||
if (segment.type === "text") {
|
||||
const textPortion = segment.text.slice(overlapStart, overlapEnd);
|
||||
result += textPortion;
|
||||
} else {
|
||||
// For markdown elements, wrap just the selected portion with the appropriate markdown
|
||||
const selectedPortion = segment.text.slice(overlapStart, overlapEnd);
|
||||
|
||||
switch (segment.type) {
|
||||
case "bold":
|
||||
result += `**${selectedPortion}**`;
|
||||
break;
|
||||
case "italic":
|
||||
result += `*${selectedPortion}*`;
|
||||
break;
|
||||
case "code":
|
||||
result += `\`${selectedPortion}\``;
|
||||
break;
|
||||
case "link":
|
||||
// For links, we need to preserve the URL if it exists in the raw markdown
|
||||
const urlMatch = segment.raw.match(/\]\((.*?)\)/);
|
||||
const url = urlMatch ? urlMatch[1] : "";
|
||||
result += `[${selectedPortion}](${url})`;
|
||||
break;
|
||||
case "codeblock":
|
||||
result += `\`\`\`\n${selectedPortion}\n\`\`\``;
|
||||
break;
|
||||
default:
|
||||
result += selectedPortion;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentIndex += segment.length;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user