diff --git a/.gitignore b/.gitignore index 24739991f..d0dc58e3b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ /web/test-results/ backend/onyx/agent_search/main/test_data.json backend/tests/regression/answer_quality/test_data.json +jira_test_env/ \ No newline at end of file diff --git a/backend/onyx/connectors/web/connector.py b/backend/onyx/connectors/web/connector.py index 1209915a1..5d1f8baea 100644 --- a/backend/onyx/connectors/web/connector.py +++ b/backend/onyx/connectors/web/connector.py @@ -47,6 +47,40 @@ IFRAME_TEXT_LENGTH_THRESHOLD = 700 # Message indicating JavaScript is disabled, which often appears when scraping fails JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser" +# Define common headers that mimic a real browser +DEFAULT_USER_AGENT = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" +) +DEFAULT_HEADERS = { + "User-Agent": DEFAULT_USER_AGENT, + "Accept": ( + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp," + "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7" + ), + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Sec-CH-UA": '"Google Chrome";v="123", "Not:A-Brand";v="8"', + "Sec-CH-UA-Mobile": "?0", + "Sec-CH-UA-Platform": '"macOS"', +} + +# Common PDF MIME types +PDF_MIME_TYPES = [ + "application/pdf", + "application/x-pdf", + "application/acrobat", + "application/vnd.pdf", + "text/pdf", + "text/x-pdf", +] + class WEB_CONNECTOR_VALID_SETTINGS(str, Enum): # Given a base site, index everything under that path @@ -95,7 +129,7 @@ def protected_url_check(url: str) -> None: def check_internet_connection(url: str) -> None: try: - response = requests.get(url, timeout=3) + response = requests.get(url, timeout=3, headers=DEFAULT_HEADERS) response.raise_for_status() except requests.exceptions.HTTPError as e: # Extract status code from the response, defaulting to -1 if response is None @@ -155,12 +189,40 @@ def get_internal_links( return internal_links +def is_pdf_content(response: requests.Response) -> bool: + """Check if the response contains PDF content based on content-type header""" + content_type = response.headers.get("content-type", "").lower() + return any(pdf_type in content_type for pdf_type in PDF_MIME_TYPES) + + def start_playwright() -> Tuple[Playwright, BrowserContext]: playwright = sync_playwright().start() browser = playwright.chromium.launch(headless=True) - context = browser.new_context() + # Create a context with realistic browser properties + context = browser.new_context( + user_agent=DEFAULT_USER_AGENT, + viewport={"width": 1440, "height": 900}, + device_scale_factor=2.0, + locale="en-US", + timezone_id="America/Los_Angeles", + has_touch=False, + java_script_enabled=True, + color_scheme="light", + ) + + # Set additional headers to mimic a real browser + context.set_extra_http_headers( + { + "Accept": DEFAULT_HEADERS["Accept"], + "Accept-Language": DEFAULT_HEADERS["Accept-Language"], + "Sec-Fetch-Dest": DEFAULT_HEADERS["Sec-Fetch-Dest"], + "Sec-Fetch-Mode": DEFAULT_HEADERS["Sec-Fetch-Mode"], + "Sec-Fetch-Site": DEFAULT_HEADERS["Sec-Fetch-Site"], + "Sec-Fetch-User": DEFAULT_HEADERS["Sec-Fetch-User"], + } + ) if ( WEB_CONNECTOR_OAUTH_CLIENT_ID @@ -183,7 +245,7 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]: def extract_urls_from_sitemap(sitemap_url: str) -> list[str]: try: - response = requests.get(sitemap_url) + response = requests.get(sitemap_url, headers=DEFAULT_HEADERS) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") @@ -330,9 +392,15 @@ class WebConnector(LoadConnector): playwright, context = start_playwright() restart_playwright = False - if initial_url.split(".")[-1] == "pdf": + # First do a HEAD request to check content type without downloading the entire content + head_response = requests.head( + initial_url, headers=DEFAULT_HEADERS, allow_redirects=True + ) + is_pdf = is_pdf_content(head_response) + + if is_pdf or initial_url.lower().endswith(".pdf"): # PDF files are not checked for links - response = requests.get(initial_url) + response = requests.get(initial_url, headers=DEFAULT_HEADERS) page_text, metadata, images = read_pdf_file( file=io.BytesIO(response.content) ) diff --git a/web/src/app/chat/message/AgenticMessage.tsx b/web/src/app/chat/message/AgenticMessage.tsx index 1626006ae..5fa1516c6 100644 --- a/web/src/app/chat/message/AgenticMessage.tsx +++ b/web/src/app/chat/message/AgenticMessage.tsx @@ -43,6 +43,14 @@ import { LlmDescriptor } from "@/lib/hooks"; import { ContinueGenerating } from "./ContinueMessage"; import { MemoizedAnchor, MemoizedParagraph } from "./MemoizedTextComponents"; import { extractCodeText, preprocessLaTeX } from "./codeUtils"; +import { ThinkingBox } from "./ThinkingBox"; +import { + hasCompletedThinkingTokens, + hasPartialThinkingTokens, + extractThinkingContent, + isThinkingComplete, + removeThinkingTokens, +} from "../utils/thinkingTokens"; import remarkMath from "remark-math"; import rehypeKatex from "rehype-katex"; @@ -138,6 +146,11 @@ export const AgenticMessage = ({ let processed = incoming; + // Apply thinking tokens processing first + if (hasCompletedThinkingTokens(processed) || hasPartialThinkingTokens(processed)) { + processed = removeThinkingTokens(processed) as string; + } + const codeBlockRegex = /```(\w*)\n[\s\S]*?```|```[\s\S]*?$/g; const matches = processed.match(codeBlockRegex); if (matches) { @@ -175,6 +188,29 @@ export const AgenticMessage = ({ const finalContent = processContent(content) as string; const finalAlternativeContent = processContent(alternativeContent) as string; + // Check if content contains thinking tokens + const hasThinkingTokens = useMemo(() => { + return hasCompletedThinkingTokens(content) || hasPartialThinkingTokens(content); + }, [content]); + + // Extract thinking content + const thinkingContent = useMemo(() => { + if (!hasThinkingTokens) return ""; + return extractThinkingContent(content); + }, [content, hasThinkingTokens]); + + // Track if thinking is complete + const isThinkingTokenComplete = useMemo(() => { + return isThinkingComplete(thinkingContent); + }, [thinkingContent]); + + // Enable streaming when thinking tokens are detected + useEffect(() => { + if (hasThinkingTokens) { + setAllowStreaming(true); + } + }, [hasThinkingTokens]); + const [isViewingInitialAnswer, setIsViewingInitialAnswer] = useState(true); const [canShowResponse, setCanShowResponse] = useState(isComplete); @@ -454,6 +490,16 @@ export const AgenticMessage = ({ unToggle={false} /> )} + {/* Render thinking box if thinking tokens exist */} + {hasThinkingTokens && thinkingContent && ( +
+ +
+ )} {/* For debugging purposes */} {/* */} {/* */} diff --git a/web/src/app/chat/message/Messages.tsx b/web/src/app/chat/message/Messages.tsx index 829d8db2c..d82b9f4c5 100644 --- a/web/src/app/chat/message/Messages.tsx +++ b/web/src/app/chat/message/Messages.tsx @@ -73,6 +73,14 @@ import rehypeKatex from "rehype-katex"; import "katex/dist/katex.min.css"; import { copyAll, handleCopy } from "./copyingUtils"; import { transformLinkUri } from "@/lib/utils"; +import { ThinkingBox } from "./ThinkingBox"; +import { + hasCompletedThinkingTokens, + hasPartialThinkingTokens, + extractThinkingContent, + isThinkingComplete, + removeThinkingTokens, +} from "../utils/thinkingTokens"; import { FileResponse } from "../my-documents/DocumentsContext"; const TOOLS_WITH_CUSTOM_HANDLING = [ @@ -268,6 +276,47 @@ export const AIMessage = ({ }) => { const toolCallGenerating = toolCall && !toolCall.tool_result; + // Check if content contains thinking tokens (complete or partial) + const hasThinkingTokens = useMemo(() => { + return hasCompletedThinkingTokens(content) || hasPartialThinkingTokens(content); + }, [content]); + + // Extract thinking content + const thinkingContent = useMemo(() => { + if (!hasThinkingTokens) return ""; + return extractThinkingContent(content); + }, [content, hasThinkingTokens]); + + // Track if thinking is complete + const isThinkingTokenComplete = useMemo(() => { + return isThinkingComplete(thinkingContent); + }, [thinkingContent]); + + // Extract final content (remove thinking tokens) + const finalContent = useMemo(() => { + if (!hasThinkingTokens) return content; + return removeThinkingTokens(content); + }, [content, hasThinkingTokens]); + + // Only show the message content when we've completed the thinking section + // or there are no thinking tokens to begin with + const shouldShowContent = useMemo(() => { + if (!hasThinkingTokens) return true; + + // If the message is complete, we always show the content + if (isComplete) return true; + + // If thinking is not complete, we don't show the content yet + if (!isThinkingTokenComplete) return false; + + // If thinking is complete but we're not done with the message yet, + // only show the content if there's actually something to show + const cleanedContent = (typeof finalContent === 'string') ? + finalContent.trim() : finalContent; + + return !!cleanedContent && cleanedContent !== ''; + }, [hasThinkingTokens, isComplete, isThinkingTokenComplete, finalContent]); + const processContent = (content: string | JSX.Element) => { if (typeof content !== "string") { return content; @@ -299,7 +348,7 @@ export const AIMessage = ({ ); }; - const finalContent = processContent(content as string); + const finalContentProcessed = processContent(finalContent as string); const [isRegenerateDropdownVisible, setIsRegenerateDropdownVisible] = useState(false); @@ -403,7 +452,7 @@ export const AIMessage = ({ code: ({ node, className, children }: any) => { const codeText = extractCodeText( node, - finalContent as string, + finalContentProcessed as string, children ); @@ -414,15 +463,15 @@ export const AIMessage = ({ ); }, }), - [anchorCallback, paragraphCallback, finalContent] + [anchorCallback, paragraphCallback, finalContentProcessed] ); const markdownRef = useRef(null); // Process selection copying with HTML formatting const renderedMarkdown = useMemo(() => { - if (typeof finalContent !== "string") { - return finalContent; + if (typeof finalContentProcessed !== "string") { + return finalContentProcessed; } return ( @@ -433,10 +482,10 @@ export const AIMessage = ({ rehypePlugins={[[rehypePrism, { ignoreMissing: true }], rehypeKatex]} urlTransform={transformLinkUri} > - {finalContent} + {finalContentProcessed} ); - }, [finalContent, markdownComponents]); + }, [finalContentProcessed, markdownComponents]); const includeMessageSwitcher = currentMessageInd !== undefined && @@ -636,7 +685,20 @@ export const AIMessage = ({ )} - {content || files ? ( + + {/* Render thinking box if thinking tokens exist */} + {hasThinkingTokens && thinkingContent && ( +
+ +
+ )} + + {/* Only show the message content once thinking is complete or if there's no thinking */} + {shouldShowContent && (content || files) ? ( <> - copyAll(finalContent as string, markdownRef) + copyAll(finalContentProcessed as string, markdownRef) } /> @@ -778,7 +840,7 @@ export const AIMessage = ({ - copyAll(finalContent as string, markdownRef) + copyAll(finalContentProcessed as string, markdownRef) } /> diff --git a/web/src/app/chat/message/ThinkingBox.css b/web/src/app/chat/message/ThinkingBox.css new file mode 100644 index 000000000..2bca4f2f7 --- /dev/null +++ b/web/src/app/chat/message/ThinkingBox.css @@ -0,0 +1,305 @@ +/* ThinkingBox.css */ +:root { + --thinking-border-color: rgba(0, 0, 0, 0.1); + --thinking-bg-color: transparent; + --thinking-text-color: #6b7280; + --thinking-title-color: #374151; + --thinking-fade-start: rgba(249, 250, 251, 1); + --thinking-fade-end: rgba(249, 250, 251, 0); + --thinking-fade-start-rgb: 249, 250, 251; +} + +.dark { + --thinking-border-color: rgba(255, 255, 255, 0.1); + --thinking-bg-color: transparent; + --thinking-text-color: #9ca3af; + --thinking-title-color: #e5e7eb; + --thinking-fade-start: rgba(30, 41, 59, 1); + --thinking-fade-end: rgba(30, 41, 59, 0); + --thinking-fade-start-rgb: 30, 41, 59; +} + +.thinking-box { + width: 98%; + max-width: 100%; + margin: 0.75rem 0; + position: relative; +} + +.thinking-box__container { + border: 1px solid var(--thinking-border-color); + border-radius: 0.75rem; + background-color: var(--thinking-bg-color); + overflow: hidden; + transition: all 0.2s ease-in-out; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); + transform: translateZ(0); + backface-visibility: hidden; + perspective: 1000px; +} + +/* More subtle collapsed state */ +.thinking-box__container--collapsed { + border-color: var(--thinking-border-color); + opacity: 0.9; +} + +/* No preview - make the bottom border curved too */ +.thinking-box__container--no-preview { + border-bottom-left-radius: 0.75rem; + border-bottom-right-radius: 0.75rem; +} + +/* Remove the bottom border when there's no preview */ +.thinking-box__container--no-preview .thinking-box__header { + border-bottom: none; +} + +.thinking-box__header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.75rem 1rem; + cursor: pointer; + transition: background-color 0.2s ease-in-out; + user-select: none; + border-bottom: 1px solid var(--thinking-border-color); +} + +.thinking-box__header:hover { + background-color: rgba(0, 0, 0, 0.02); +} + +.dark .thinking-box__header:hover { + background-color: rgba(255, 255, 255, 0.02); +} + +.thinking-box__title { + display: flex; + align-items: center; + gap: 0.5rem; + color: var(--thinking-title-color); +} + +.thinking-box__icon { + color: var(--thinking-text-color); + margin-right: 0.25rem; + animation: pulse 1.5s infinite ease-in-out; +} + +.thinking-box__title-text { + font-size: 0.8rem; + font-weight: 500; +} + +.thinking-box__timer { + font-size: 0.8rem; + color: var(--thinking-text-color); + margin-left: 0.25rem; +} + +.thinking-box__collapse-icon { + color: var(--thinking-text-color); + display: flex; + align-items: center; +} + +.thinking-box__content { + border-top: 1px solid var(--thinking-border-color); + padding: 1.25rem; + max-height: 400px; + overflow-y: auto; + color: var(--thinking-text-color); + animation: fadeIn 0.3s ease-in-out; +} + +.thinking-box__markdown { + font-size: 0.875rem; + color: var(--thinking-text-color); + line-height: 1.5; + overflow-wrap: break-word; +} + +/* Preview container (collapsed state) */ +.thinking-box__preview { + position: relative; + height: 2.5rem; + overflow: hidden; + width: 100%; + padding: 0.15rem 0; + transition: all 0.35s cubic-bezier(0.16, 1, 0.3, 1); +} + +/* Active animation styling - highlight active thinking */ +.thinking-box__preview--crawling { + height: 5rem; + transition: all 0.5s cubic-bezier(0.16, 1, 0.3, 1); + border-top: 1px solid var(--thinking-border-color); + background-color: rgba(0, 0, 0, 0.01); +} + +.dark .thinking-box__preview--crawling { + background-color: rgba(255, 255, 255, 0.025); +} + +.thinking-box__fade-container { + position: relative; + height: 100%; + overflow: hidden; + transition: all 0.4s cubic-bezier(0.16, 1, 0.3, 1); +} + +/* Create fade effect at top and bottom */ +.thinking-box__fade-container::before, +.thinking-box__fade-container::after { + content: ""; + position: absolute; + left: 0; + right: 0; + height: 0.85rem; /* Increased for more visible gradient */ + z-index: 10; + pointer-events: none; + transition: all 0.3s cubic-bezier(0.16, 1, 0.3, 1); + opacity: 0.95; +} + +/* Enhanced gradients with smoother transitions */ +.thinking-box__fade-container::before { + top: 0; + background: linear-gradient( + to bottom, + var(--thinking-fade-start), + rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%, + var(--thinking-fade-end) 100% + ); +} + +.thinking-box__fade-container::after { + bottom: 0; + background: linear-gradient( + to top, + var(--thinking-fade-start), + rgba(var(--thinking-fade-start-rgb, 249, 250, 251), 0.85) 25%, + var(--thinking-fade-end) 100% + ); +} + +.dark .thinking-box__fade-container::before { + background: linear-gradient( + to bottom, + var(--thinking-fade-start), + rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%, + var(--thinking-fade-end) 100% + ); +} + +.dark .thinking-box__fade-container::after { + background: linear-gradient( + to top, + var(--thinking-fade-start), + rgba(var(--thinking-fade-start-rgb, 30, 41, 59), 0.85) 25%, + var(--thinking-fade-end) 100% + ); +} + +/* Make gradients more visible during crawling */ +.thinking-box__preview--crawling .thinking-box__fade-container::before, +.thinking-box__preview--crawling .thinking-box__fade-container::after { + height: 1.5rem; + opacity: 0.95; +} + +.thinking-box__scroll-content { + padding: 0.75rem 1rem; + height: 100%; + width: 100%; + overflow-y: hidden; + will-change: transform; + transform: translateZ(0); + backface-visibility: hidden; + -webkit-font-smoothing: antialiased; + -webkit-mask-image: linear-gradient(to bottom, transparent, black 12%, black 88%, transparent); + mask-image: linear-gradient(to bottom, transparent, black 12%, black 88%, transparent); +} + +/* Enhanced text during crawling */ +.thinking-box__preview--crawling .thinking-box__preview-text { + opacity: 0.98; + font-size: 0.75rem; + line-height: 1.6; +} + +.thinking-box__expand-prompt { + display: none; +} + +/* Animation for thinking indicator */ +@keyframes pulse { + 0% { opacity: 0.5; } + 50% { opacity: 1; } + 100% { opacity: 0.5; } +} + +/* Fade in animation */ +@keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } +} + +/* Smooth scrolling effect */ +@keyframes scrollText { + 0% { transform: translateY(0); } + 100% { transform: translateY(-100%); } +} + +.thinking-box__preview-text { + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 0.7rem; + color: var(--thinking-text-color); + white-space: pre-wrap; + margin: 0; + display: block; + opacity: 0.85; + line-height: 1.4; + transition: all 0.3s ease; +} + +/* Enhanced masking during crawling */ +.thinking-box__preview--crawling .thinking-box__scroll-content { + -webkit-mask-image: linear-gradient(to bottom, transparent, black 8%, black 92%, transparent); + mask-image: linear-gradient(to bottom, transparent, black 8%, black 92%, transparent); + padding: 0.75rem 1rem; +} + +.thinking-box__icon--active { + animation: pulse 1.5s infinite ease-in-out; + opacity: 1; + color: var(--thinking-title-color); +} + +/* Make sure the preview adjusts immediately when new content arrives */ +.thinking-box__preview--crawling .thinking-box__scroll-content { + transition: height 0.3s ease-out; +} + +/* Enhance visibility of actual content */ +.thinking-box__preview--crawling .thinking-box__preview-text { + opacity: 0.98; + font-size: 0.75rem; + line-height: 1.7; + text-shadow: 0 0 0.1px rgba(0, 0, 0, 0.2); +} + +/* Faster transition for expanding preview */ +.thinking-box__preview { + transition: all 0.25s cubic-bezier(0.16, 1, 0.3, 1); +} + +/* Enhanced hover feedback for collapsed header */ +.thinking-box__container--collapsed .thinking-box__header:hover { + background-color: rgba(0, 0, 0, 0.03); +} + +.dark .thinking-box__container--collapsed .thinking-box__header:hover { + background-color: rgba(255, 255, 255, 0.03); +} \ No newline at end of file diff --git a/web/src/app/chat/message/ThinkingBox.tsx b/web/src/app/chat/message/ThinkingBox.tsx new file mode 100644 index 000000000..69c2497d5 --- /dev/null +++ b/web/src/app/chat/message/ThinkingBox.tsx @@ -0,0 +1,266 @@ +"use client"; + +import React, { useState, useRef, useEffect } from "react"; +import { FiChevronDown, FiChevronUp } from "react-icons/fi"; +import { TbBrain } from "react-icons/tb"; +import ReactMarkdown from "react-markdown"; +import remarkGfm from "remark-gfm"; +import remarkMath from "remark-math"; +import rehypePrism from "rehype-prism-plus"; +import rehypeKatex from "rehype-katex"; +import "katex/dist/katex.min.css"; +import { transformLinkUri } from "@/lib/utils"; +import { handleCopy } from "./copyingUtils"; +import { cleanThinkingContent, hasPartialThinkingTokens, isThinkingComplete } from "../utils/thinkingTokens"; +import "./ThinkingBox.css"; + +interface ThinkingBoxProps { + content: string; + isComplete: boolean; + isStreaming?: boolean; +} + +export const ThinkingBox: React.FC = ({ + content, + isComplete = false, + isStreaming = false, +}) => { + const [isExpanded, setIsExpanded] = useState(false); + const [elapsedTime, setElapsedTime] = useState(0); + + // DOM refs + const markdownRef = useRef(null); + const scrollContainerRef = useRef(null); + + // Timing refs + const startTimeRef = useRef(null); + + // Content tracking refs + const previousContentRef = useRef(""); + const contentLinesRef = useRef([]); + const lastLineCountRef = useRef(0); + + // Token state tracking - separate from streaming state + const hasOpeningTokenRef = useRef(false); + const hasClosingTokenRef = useRef(false); + const thinkingStoppedTimeRef = useRef(null); // Store the exact time when thinking stops + + // Smooth scrolling state + const targetScrollTopRef = useRef(0); + const currentScrollTopRef = useRef(0); + const scrollAnimationRef = useRef(null); + + // Clean the thinking content + const cleanedThinkingContent = cleanThinkingContent(content); + + // Smooth scroll to latest content + const scrollToLatestContent = () => { + if (!scrollContainerRef.current) { + scrollAnimationRef.current = null; + return; + } + + const container = scrollContainerRef.current; + + // Calculate how far to move this frame (15% of remaining distance) + const remainingDistance = targetScrollTopRef.current - currentScrollTopRef.current; + const step = remainingDistance * 0.15; + + // Update position + currentScrollTopRef.current += step; + container.scrollTop = Math.round(currentScrollTopRef.current); + + // Continue animation if we're not close enough yet + if (Math.abs(remainingDistance) > 1) { + scrollAnimationRef.current = requestAnimationFrame(scrollToLatestContent); + } else { + scrollAnimationRef.current = null; + } + }; + + // Detect thinking token states + useEffect(() => { + // For past messages with complete thinking tokens, initialize both as true + if (!hasOpeningTokenRef.current && !hasClosingTokenRef.current && (isComplete || isThinkingComplete(content))) { + hasOpeningTokenRef.current = true; + hasClosingTokenRef.current = true; + + // For past messages, set the elapsed time based on content length as an approximation + const approximateTimeInSeconds = Math.max( + 3, // Minimum 3 seconds + Math.min( + Math.floor(cleanedThinkingContent.length / 30), // ~30 chars per second as a rough estimate + 120 // Cap at 2 minutes + ) + ); + setElapsedTime(approximateTimeInSeconds); + return; + } + + // Check if we have the opening token + if (!hasOpeningTokenRef.current && hasPartialThinkingTokens(content)) { + hasOpeningTokenRef.current = true; + startTimeRef.current = Date.now(); // Only set start time when thinking actually begins + } + + // Check if we have the closing token + if (hasOpeningTokenRef.current && !hasClosingTokenRef.current && isThinkingComplete(content)) { + hasClosingTokenRef.current = true; + thinkingStoppedTimeRef.current = Date.now(); // Record exactly when thinking stopped + + // Immediately update elapsed time to final value + const finalElapsedTime = Math.floor((thinkingStoppedTimeRef.current - startTimeRef.current!) / 1000); + setElapsedTime(finalElapsedTime); + } + }, [content, cleanedThinkingContent, isComplete]); + + // Track content changes and new lines + useEffect(() => { + // Skip animation for past messages that are already complete + if (hasClosingTokenRef.current && (isComplete || isThinkingComplete(content))) { + // For past messages, just store the content lines without animating + const currentLines = cleanedThinkingContent.split('\n').filter(line => line.trim()); + contentLinesRef.current = currentLines; + previousContentRef.current = cleanedThinkingContent; + lastLineCountRef.current = currentLines.length; + return; + } + + // Don't process if thinking is not active + if (!hasOpeningTokenRef.current || hasClosingTokenRef.current) return; + + // Process content changes if we have new content + if (cleanedThinkingContent !== previousContentRef.current) { + const currentLines = cleanedThinkingContent.split('\n').filter(line => line.trim()); + contentLinesRef.current = currentLines; + + // If we have new lines, update scroll position to show them + if (currentLines.length > lastLineCountRef.current && scrollContainerRef.current) { + // Calculate position to show the latest content + const container = scrollContainerRef.current; + targetScrollTopRef.current = container.scrollHeight - container.clientHeight; + + // Start smooth scroll animation if not already running + if (!scrollAnimationRef.current) { + currentScrollTopRef.current = container.scrollTop; + scrollToLatestContent(); + } + } + + lastLineCountRef.current = currentLines.length; + previousContentRef.current = cleanedThinkingContent; + } + }, [cleanedThinkingContent, content, isComplete]); + + // Update elapsed time + useEffect(() => { + // Only count time while thinking is active and we have a start time + if (!hasOpeningTokenRef.current || hasClosingTokenRef.current || startTimeRef.current === null) return; + + const timer = setInterval(() => { + // If thinking has stopped, use the final time + if (thinkingStoppedTimeRef.current) { + setElapsedTime(Math.floor((thinkingStoppedTimeRef.current - startTimeRef.current!) / 1000)); + return; + } + + // Otherwise, use the current time + setElapsedTime(Math.floor((Date.now() - startTimeRef.current!) / 1000)); + }, 1000); + + return () => clearInterval(timer); + }, []); + + // Clean up animations on unmount + useEffect(() => { + return () => { + if (scrollAnimationRef.current) { + cancelAnimationFrame(scrollAnimationRef.current); + scrollAnimationRef.current = null; + } + }; + }, []); + + // Get suitable preview content for collapsed view + const getPeekContent = () => { + const lines = contentLinesRef.current; + + if (lines.length <= 3) return lines.join('\n'); + + // Show a combination of first and last lines with preference to recent content + const maxLines = 7; + const startIndex = Math.max(0, lines.length - maxLines); + const endIndex = lines.length; + + const previewLines = lines.slice(startIndex, endIndex); + return previewLines.join('\n'); + }; + + // Don't render anything if content is empty + if (!cleanedThinkingContent.trim()) return null; + + // Determine if thinking is active (has opening token but not closing token) + const isThinkingActive = hasOpeningTokenRef.current && !hasClosingTokenRef.current; + + // Determine if we should show the preview section + const shouldShowPreview = !isExpanded && cleanedThinkingContent.trim().length > 0; + const hasPreviewContent = getPeekContent().trim().length > 0; + + return ( +
+
+
setIsExpanded(!isExpanded)} + > +
+ + + {isThinkingActive ? "Thinking" : "Thought for"} + + + {elapsedTime}s + +
+
+ {isExpanded ? : } +
+
+ + {isExpanded ? ( +
+
handleCopy(e, markdownRef)} + > + + {cleanedThinkingContent} + +
+
+ ) : ( + shouldShowPreview && hasPreviewContent && ( +
+
+
+
{getPeekContent()}
+
+
+
+ ) + )} +
+
+ ); +}; + +export default ThinkingBox; \ No newline at end of file diff --git a/web/src/app/chat/utils/thinkingTokens.ts b/web/src/app/chat/utils/thinkingTokens.ts new file mode 100644 index 000000000..a840afdd4 --- /dev/null +++ b/web/src/app/chat/utils/thinkingTokens.ts @@ -0,0 +1,122 @@ +/** + * Utility functions to handle thinking tokens in AI messages + */ + +/** + * Check if a message contains complete thinking tokens + */ +export function hasCompletedThinkingTokens(content: string | JSX.Element): boolean { + if (typeof content !== 'string') return false; + + return /[\s\S]*?<\/think>/.test(content) || + /[\s\S]*?<\/thinking>/.test(content); +} + +/** + * Check if a message contains partial thinking tokens (streaming) + */ +export function hasPartialThinkingTokens(content: string | JSX.Element): boolean { + if (typeof content !== 'string') return false; + + // Count opening and closing tags + const thinkOpenCount = (content.match(//g) || []).length; + const thinkCloseCount = (content.match(/<\/think>/g) || []).length; + const thinkingOpenCount = (content.match(//g) || []).length; + const thinkingCloseCount = (content.match(/<\/thinking>/g) || []).length; + + // Return true if we have any unmatched tags + return thinkOpenCount > thinkCloseCount || thinkingOpenCount > thinkingCloseCount; +} + +/** + * Extract thinking content from a message + */ +export function extractThinkingContent(content: string | JSX.Element): string { + if (typeof content !== 'string') return ''; + + // For complete thinking tags, extract all sections + const completeThinkRegex = /[\s\S]*?<\/think>/g; + const completeThinkingRegex = /[\s\S]*?<\/thinking>/g; + + const thinkMatches = Array.from(content.matchAll(completeThinkRegex)); + const thinkingMatches = Array.from(content.matchAll(completeThinkingRegex)); + + if (thinkMatches.length > 0 || thinkingMatches.length > 0) { + // Combine all matches and sort by their position in the original string + const allMatches = [...thinkMatches, ...thinkingMatches] + .sort((a, b) => (a.index || 0) - (b.index || 0)); + return allMatches.map(match => match[0]).join('\n'); + } + + // For partial thinking tokens (streaming) + if (hasPartialThinkingTokens(content)) { + // Find the last opening tag position + const lastThinkPos = content.lastIndexOf(''); + const lastThinkingPos = content.lastIndexOf(''); + + // Use the position of whichever tag appears last + const startPos = Math.max(lastThinkPos, lastThinkingPos); + + if (startPos >= 0) { + // Extract everything from the last opening tag to the end + return content.substring(startPos); + } + } + + return ''; +} + +/** + * Check if thinking tokens are complete + */ +export function isThinkingComplete(content: string | JSX.Element): boolean { + if (typeof content !== 'string') return false; + + // Count opening and closing tags + const thinkOpenCount = (content.match(//g) || []).length; + const thinkCloseCount = (content.match(/<\/think>/g) || []).length; + const thinkingOpenCount = (content.match(//g) || []).length; + const thinkingCloseCount = (content.match(/<\/thinking>/g) || []).length; + + // All tags must be matched + return thinkOpenCount === thinkCloseCount && thinkingOpenCount === thinkingCloseCount; +} + +/** + * Remove thinking tokens from content + */ +export function removeThinkingTokens(content: string | JSX.Element): string | JSX.Element { + if (typeof content !== 'string') return content; + + // First, remove complete thinking blocks + let result = content.replace(/[\s\S]*?<\/think>/g, ''); + result = result.replace(/[\s\S]*?<\/thinking>/g, ''); + + // Handle case where there's an incomplete thinking token at the end + if (hasPartialThinkingTokens(result)) { + // Find the last opening tag position + const lastThinkPos = result.lastIndexOf(''); + const lastThinkingPos = result.lastIndexOf(''); + + // Use the position of whichever tag appears last + const startPos = Math.max(lastThinkPos, lastThinkingPos); + + if (startPos >= 0) { + // Only keep content before the last opening tag + result = result.substring(0, startPos); + } + } + + return result.trim(); +} + +// /** +// * Clean the extracted thinking content (remove tags) +// */ +export function cleanThinkingContent(thinkingContent: string): string { + if (!thinkingContent) return ''; + + return thinkingContent + .replace(/|<\/think>||<\/thinking>/g, '') + .trim(); +} \ No newline at end of file