fix slashes (#4259)

This commit is contained in:
pablonyx 2025-03-31 11:08:17 -07:00 committed by GitHub
parent feae7d0cc4
commit 04911db715
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 202 additions and 12 deletions

View File

@ -450,7 +450,6 @@ export const AIMessage = ({
)}
</>
) : null}
{toolCall &&
!TOOLS_WITH_CUSTOM_HANDLING.includes(
toolCall.tool_name
@ -467,12 +466,10 @@ export const AIMessage = ({
isRunning={!toolCall.tool_result || !content}
/>
)}
{toolCall &&
(!files || files.length == 0) &&
toolCall.tool_name === IMAGE_GENERATION_TOOL_NAME &&
!toolCall.tool_result && <GeneratingImageDisplay />}
{toolCall &&
toolCall.tool_name === INTERNET_SEARCH_TOOL_NAME && (
<ToolRunDisplay
@ -487,7 +484,6 @@ export const AIMessage = ({
isRunning={!toolCall.tool_result}
/>
)}
{docs && docs.length > 0 && (
<div
className={`mobile:hidden ${
@ -522,7 +518,6 @@ export const AIMessage = ({
</div>
</div>
)}
{content || files ? (
<>
<FileDisplay files={files || []} />
@ -974,7 +969,7 @@ export const HumanMessage = ({
</div>
) : typeof content === "string" ? (
<>
<div className="ml-auto flex items-center mr-1 h-fit my-auto">
<div className="ml-auto flex items-center mr-1 mt-2 h-fit mb-auto">
{onEdit &&
isHovered &&
!isEditing &&

View File

@ -0,0 +1,133 @@
import { preprocessLaTeX } from "./codeUtils";
describe("preprocessLaTeX", () => {
describe("currency formatting", () => {
it("should properly escape dollar signs in text with amounts", () => {
const input =
"Maria wants to buy a new laptop that costs $1,200. She has saved $800 so far. If she saves an additional $100 each month, how many months will it take her to have enough money to buy the laptop?";
const processed = preprocessLaTeX(input);
// Should escape all dollar signs in currency amounts
expect(processed).toContain("costs \\$1,200");
expect(processed).toContain("saved \\$800");
expect(processed).toContain("additional \\$100");
expect(processed).not.toContain("costs $1,200");
});
it("should handle dollar signs with backslashes already present", () => {
const input =
"Maria wants to buy a new laptop that costs \\$1,200. She has saved \\$800 so far.";
const processed = preprocessLaTeX(input);
// Should preserve the existing escaped dollar signs
expect(processed).toContain("\\$1,200");
expect(processed).toContain("\\$800");
});
});
describe("code block handling", () => {
it("should not process dollar signs in code blocks", () => {
const input = "```plaintext\nThe total cost is $50.\n```";
const processed = preprocessLaTeX(input);
// Dollar sign in code block should remain untouched
expect(processed).toContain("The total cost is $50.");
expect(processed).not.toContain("The total cost is \\$50.");
});
it("should not process dollar signs in inline code", () => {
const input =
'Use the `printf "$%.2f" $amount` command to format currency.';
const processed = preprocessLaTeX(input);
// Dollar signs in inline code should remain untouched
expect(processed).toContain('`printf "$%.2f" $amount`');
expect(processed).not.toContain('`printf "\\$%.2f" \\$amount`');
});
it("should handle mixed content with code blocks and currency", () => {
const input =
"The cost is $100.\n\n```javascript\nconst price = '$50';\n```\n\nThe remaining balance is $50.";
const processed = preprocessLaTeX(input);
// Dollar signs outside code blocks should be escaped
expect(processed).toContain("The cost is \\$100");
expect(processed).toContain("The remaining balance is \\$50");
// Dollar sign in code block should be preserved
expect(processed).toContain("const price = '$50';");
expect(processed).not.toContain("const price = '\\$50';");
});
});
describe("LaTeX handling", () => {
it("should preserve proper LaTeX delimiters", () => {
const input =
"The formula $x^2 + y^2 = z^2$ represents the Pythagorean theorem.";
const processed = preprocessLaTeX(input);
// LaTeX delimiters should be preserved
expect(processed).toContain("$x^2 + y^2 = z^2$");
});
it("should convert LaTeX block delimiters", () => {
const input = "Consider the equation: \\[E = mc^2\\]";
const processed = preprocessLaTeX(input);
// Block LaTeX delimiters should be converted
expect(processed).toContain("$$E = mc^2$$");
expect(processed).not.toContain("\\[E = mc^2\\]");
});
it("should convert LaTeX inline delimiters", () => {
const input =
"The speed of light \\(c\\) is approximately 299,792,458 m/s.";
const processed = preprocessLaTeX(input);
// Inline LaTeX delimiters should be converted
expect(processed).toContain("$c$");
expect(processed).not.toContain("\\(c\\)");
});
});
describe("special cases", () => {
it("should handle shell variables in text", () => {
const input =
"In bash, you can access arguments with $1, $2, and use echo $HOME to print the home directory.";
const processed = preprocessLaTeX(input);
// Verify current behavior (numeric shell variables are being escaped)
expect(processed).toContain("\\$1");
expect(processed).toContain("\\$2");
// But $HOME is not escaped (non-numeric)
expect(processed).toContain("$HOME");
});
it("should handle shell commands with dollar signs", () => {
const input = "Use awk '{print $2}' to print the second column.";
const processed = preprocessLaTeX(input);
// Dollar sign in awk command should not be escaped
expect(processed).toContain("{print $2}");
expect(processed).not.toContain("{print \\$2}");
});
it("should handle Einstein's equation with mixed LaTeX and code blocks", () => {
const input =
"Sure! The equation for Einstein's mass-energy equivalence, \\(E = mc^2\\), can be written in LaTeX as follows: ```latex\nE = mc^2\n``` When rendered, it looks like this: \\[ E = mc^2 \\]";
const processed = preprocessLaTeX(input);
// LaTeX inline delimiters should be converted
expect(processed).toContain("equivalence, $E = mc^2$,");
expect(processed).not.toContain("equivalence, \\(E = mc^2\\),");
// LaTeX block delimiters should be converted
expect(processed).toContain("it looks like this: $$ E = mc^2 $$");
expect(processed).not.toContain("it looks like this: \\[ E = mc^2 \\]");
// LaTeX within code blocks should remain untouched
expect(processed).toContain("```latex\nE = mc^2\n```");
});
});
});

View File

@ -59,20 +59,82 @@ export function extractCodeText(
return codeText || "";
}
// We must preprocess LaTeX in the LLM output to avoid improper formatting
export const preprocessLaTeX = (content: string) => {
// 1) Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessedContent = content.replace(
// First detect if content is within a code block
const codeBlockRegex = /^```[\s\S]*?```$/;
const isCodeBlock = codeBlockRegex.test(content.trim());
// If the entire content is a code block, don't process LaTeX
if (isCodeBlock) {
return content;
}
// Extract code blocks and replace with placeholders
const codeBlocks: string[] = [];
const withCodeBlocksReplaced = content.replace(/```[\s\S]*?```/g, (match) => {
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
codeBlocks.push(match);
return placeholder;
});
// First, protect code-like expressions where $ is used for variables
const codeProtected = withCodeBlocksReplaced.replace(
/\b(\w+(?:\s*-\w+)*\s*(?:'[^']*')?)\s*\{[^}]*?\$\d+[^}]*?\}/g,
(match) => {
// Replace $ with a temporary placeholder in code contexts
return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
}
);
// Also protect common shell variable patterns like $1, $2, etc.
const shellProtected = codeProtected.replace(
/\b(?:print|echo|awk|sed|grep)\s+.*?\$\d+/g,
(match) => match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___")
);
// Protect inline code blocks with backticks
const inlineCodeProtected = shellProtected.replace(/`[^`]+`/g, (match) => {
return match.replace(/\$/g, "___DOLLAR_PLACEHOLDER___");
});
// Process LaTeX expressions now that code is protected
// Valid LaTeX should have matching dollar signs with non-space chars surrounding content
const processedForLatex = inlineCodeProtected.replace(
/\$([^\s$][^$]*?[^\s$])\$/g,
(_, equation) => `$${equation}$`
);
// Escape currency mentions
const currencyEscaped = processedForLatex.replace(
/\$(\d+(?:\.\d*)?)/g,
(_, p1) => `\\$${p1}`
);
// Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessed = currencyEscaped.replace(
/\\\[([\s\S]*?)\\\]/g,
(_, equation) => `$$${equation}$$`
);
// 2) Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessedContent = blockProcessedContent.replace(
// Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessed = blockProcessed.replace(
/\\\(([\s\S]*?)\\\)/g,
(_, equation) => `$${equation}$`
);
return inlineProcessedContent;
// Restore original dollar signs in code contexts
const restoredDollars = inlineProcessed.replace(
/___DOLLAR_PLACEHOLDER___/g,
"$"
);
// Restore code blocks
const restoredCodeBlocks = restoredDollars.replace(
/___CODE_BLOCK_(\d+)___/g,
(_, index) => codeBlocks[parseInt(index)]
);
return restoredCodeBlocks;
};