diff --git a/backend/Dockerfile b/backend/Dockerfile index 2f8de6e79960..d77b4e8737e0 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -73,6 +73,7 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* && \ rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key + # Pre-downloading models for setups with limited egress RUN python -c "from tokenizers import Tokenizer; \ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')" diff --git a/backend/danswer/file_store/file_store.py b/backend/danswer/file_store/file_store.py index 9bc4c41d361e..e57b9222a1b9 100644 --- a/backend/danswer/file_store/file_store.py +++ b/backend/danswer/file_store/file_store.py @@ -59,6 +59,12 @@ class FileStore(ABC): Contents of the file and metadata dict """ + @abstractmethod + def read_file_record(self, file_name: str) -> PGFileStore: + """ + Read the file record by the name + """ + @abstractmethod def delete_file(self, file_name: str) -> None: """ diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 954728c32a34..7b1413e1350c 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -707,14 +707,18 @@ def upload_files_for_chat( } -@router.get("/file/{file_id}") +@router.get("/file/{file_id:path}") def fetch_chat_file( file_id: str, db_session: Session = Depends(get_session), _: User | None = Depends(current_user), ) -> Response: file_store = get_default_file_store(db_session) + file_record = file_store.read_file_record(file_id) + if not file_record: + raise HTTPException(status_code=404, detail="File not found") + + media_type = file_record.file_type file_io = file_store.read_file(file_id, mode="b") - # NOTE: specifying "image/jpeg" here, but it still works for pngs - # TODO: do this properly - return Response(content=file_io.read(), media_type="image/jpeg") + + return StreamingResponse(file_io, media_type=media_type) diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index 2f558629defb..21448ccd66d9 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -163,47 +163,92 @@ SUPPORTED_EMBEDDING_MODELS = [ dim=1024, index_name="danswer_chunk_cohere_embed_english_v3_0", ), + SupportedEmbeddingModel( + name="cohere/embed-english-v3.0", + dim=1024, + index_name="danswer_chunk_embed_english_v3_0", + ), SupportedEmbeddingModel( name="cohere/embed-english-light-v3.0", dim=384, index_name="danswer_chunk_cohere_embed_english_light_v3_0", ), + SupportedEmbeddingModel( + name="cohere/embed-english-light-v3.0", + dim=384, + index_name="danswer_chunk_embed_english_light_v3_0", + ), SupportedEmbeddingModel( name="openai/text-embedding-3-large", dim=3072, index_name="danswer_chunk_openai_text_embedding_3_large", ), + SupportedEmbeddingModel( + name="openai/text-embedding-3-large", + dim=3072, + index_name="danswer_chunk_text_embedding_3_large", + ), SupportedEmbeddingModel( name="openai/text-embedding-3-small", dim=1536, index_name="danswer_chunk_openai_text_embedding_3_small", ), + SupportedEmbeddingModel( + name="openai/text-embedding-3-small", + dim=1536, + index_name="danswer_chunk_text_embedding_3_small", + ), SupportedEmbeddingModel( name="google/text-embedding-004", dim=768, index_name="danswer_chunk_google_text_embedding_004", ), + SupportedEmbeddingModel( + name="google/text-embedding-004", + dim=768, + index_name="danswer_chunk_text_embedding_004", + ), SupportedEmbeddingModel( name="google/textembedding-gecko@003", dim=768, index_name="danswer_chunk_google_textembedding_gecko_003", ), + SupportedEmbeddingModel( + name="google/textembedding-gecko@003", + dim=768, + index_name="danswer_chunk_textembedding_gecko_003", + ), SupportedEmbeddingModel( name="voyage/voyage-large-2-instruct", dim=1024, index_name="danswer_chunk_voyage_large_2_instruct", ), + SupportedEmbeddingModel( + name="voyage/voyage-large-2-instruct", + dim=1024, + index_name="danswer_chunk_large_2_instruct", + ), SupportedEmbeddingModel( name="voyage/voyage-light-2-instruct", dim=384, index_name="danswer_chunk_voyage_light_2_instruct", ), + SupportedEmbeddingModel( + name="voyage/voyage-light-2-instruct", + dim=384, + index_name="danswer_chunk_light_2_instruct", + ), # Self-hosted models SupportedEmbeddingModel( name="nomic-ai/nomic-embed-text-v1", dim=768, index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1", ), + SupportedEmbeddingModel( + name="nomic-ai/nomic-embed-text-v1", + dim=768, + index_name="danswer_chunk_nomic_embed_text_v1", + ), SupportedEmbeddingModel( name="intfloat/e5-base-v2", dim=768, diff --git a/backend/tests/integration/common_utils/managers/file.py b/backend/tests/integration/common_utils/managers/file.py new file mode 100644 index 000000000000..461874f7ec5c --- /dev/null +++ b/backend/tests/integration/common_utils/managers/file.py @@ -0,0 +1,62 @@ +import mimetypes +from typing import cast +from typing import IO +from typing import List +from typing import Tuple + +import requests + +from danswer.file_store.models import FileDescriptor +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestUser + + +class FileManager: + @staticmethod + def upload_files( + files: List[Tuple[str, IO]], + user_performing_action: DATestUser | None = None, + ) -> Tuple[List[FileDescriptor], str]: + headers = ( + user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS + ) + headers.pop("Content-Type", None) + + files_param = [] + for filename, file_obj in files: + mime_type, _ = mimetypes.guess_type(filename) + if mime_type is None: + mime_type = "application/octet-stream" + files_param.append(("files", (filename, file_obj, mime_type))) + + response = requests.post( + f"{API_SERVER_URL}/chat/file", + files=files_param, + headers=headers, + ) + + if not response.ok: + return ( + cast(List[FileDescriptor], []), + f"Failed to upload files - {response.json().get('detail', 'Unknown error')}", + ) + + response_json = response.json() + return response_json.get("files", cast(List[FileDescriptor], [])), "" + + @staticmethod + def fetch_uploaded_file( + file_id: str, + user_performing_action: DATestUser | None = None, + ) -> bytes: + response = requests.get( + f"{API_SERVER_URL}/chat/file/{file_id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return response.content diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json new file mode 100644 index 000000000000..b3aaf2c4dece --- /dev/null +++ b/node_modules/.package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "danswer", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx index 634dc0624b83..11a4dd0f2653 100644 --- a/web/src/app/chat/ChatPage.tsx +++ b/web/src/app/chat/ChatPage.tsx @@ -106,6 +106,7 @@ import { NoAssistantModal } from "@/components/modals/NoAssistantModal"; import { useAssistants } from "@/components/context/AssistantsContext"; import { Separator } from "@/components/ui/separator"; import AssistantBanner from "../../components/assistants/AssistantBanner"; +import TextView from "@/components/chat_search/TextView"; import AssistantSelector from "@/components/chat_search/AssistantSelector"; import { Modal } from "@/components/Modal"; @@ -279,6 +280,9 @@ export function ChatPage({ const [alternativeAssistant, setAlternativeAssistant] = useState(null); + const [presentingDocument, setPresentingDocument] = + useState(null); + const { visibleAssistants: assistants, recentAssistants, @@ -490,6 +494,7 @@ export function ChatPage({ clientScrollToBottom(true); } } + setIsFetchingChatMessages(false); // if this is a seeded chat, then kick off the AI message generation @@ -1649,7 +1654,6 @@ export function ChatPage({ scrollDist, endDivRef, debounceNumber, - waitForScrollRef, mobile: settings?.isMobile, enableAutoScroll: autoScrollEnabled, }); @@ -1946,6 +1950,7 @@ export function ChatPage({ {popup} + {currentFeedback && ( )} + {presentingDocument && ( + setPresentingDocument(null)} + /> + )} + {stackTraceModalContent && ( setStackTraceModalContent(null)} @@ -2127,6 +2140,7 @@ export function ChatPage({ `} > void; document: DanswerDocument; modal?: boolean; isSelected: boolean; handleSelect: (documentId: string) => void; tokenLimitReached: boolean; + setPresentingDocument: Dispatch>; } export function DocumentMetadataBlock({ @@ -55,11 +58,13 @@ export function DocumentMetadataBlock({ } export function ChatDocumentDisplay({ + closeSidebar, document, modal, isSelected, handleSelect, tokenLimitReached, + setPresentingDocument, }: DocumentDisplayProps) { const isInternet = document.is_internet; @@ -67,6 +72,18 @@ export function ChatDocumentDisplay({ return null; } + const handleViewFile = async () => { + if (document.link) { + window.open(document.link, "_blank"); + } else { + closeSidebar(); + + setTimeout(async () => { + setPresentingDocument(document); + }, 100); + } + }; + return ( ); diff --git a/web/src/app/chat/documentSidebar/ChatFilters.tsx b/web/src/app/chat/documentSidebar/ChatFilters.tsx index 616595abfc12..b6801b3489b6 100644 --- a/web/src/app/chat/documentSidebar/ChatFilters.tsx +++ b/web/src/app/chat/documentSidebar/ChatFilters.tsx @@ -3,7 +3,14 @@ import { ChatDocumentDisplay } from "./ChatDocumentDisplay"; import { usePopup } from "@/components/admin/connectors/Popup"; import { removeDuplicateDocs } from "@/lib/documentUtils"; import { Message } from "../interfaces"; -import { ForwardedRef, forwardRef, useEffect, useState } from "react"; +import { + Dispatch, + ForwardedRef, + forwardRef, + SetStateAction, + useEffect, + useState, +} from "react"; import { FilterManager } from "@/lib/hooks"; import { CCPairBasicInfo, DocumentSet, Tag } from "@/lib/types"; import { SourceSelector } from "../shared_chat_search/SearchFilters"; @@ -25,6 +32,7 @@ interface ChatFiltersProps { tags: Tag[]; documentSets: DocumentSet[]; showFilters: boolean; + setPresentingDocument: Dispatch>; } export const ChatFilters = forwardRef( @@ -43,6 +51,7 @@ export const ChatFilters = forwardRef( isOpen, ccPairs, tags, + setPresentingDocument, documentSets, showFilters, }, @@ -134,6 +143,8 @@ export const ChatFilters = forwardRef( }`} > ; - waitForScrollRef: RefObject; scrollDist: MutableRefObject; endDivRef: RefObject; debounceNumber: number; diff --git a/web/src/app/chat/message/MemoizedTextComponents.tsx b/web/src/app/chat/message/MemoizedTextComponents.tsx index 7c8144e8cedb..779b6ce8461e 100644 --- a/web/src/app/chat/message/MemoizedTextComponents.tsx +++ b/web/src/app/chat/message/MemoizedTextComponents.tsx @@ -6,45 +6,53 @@ import { ValidSources } from "@/lib/types"; import React, { memo } from "react"; import isEqual from "lodash/isEqual"; -export const MemoizedAnchor = memo(({ docs, children }: any) => { - console.log(children); - const value = children?.toString(); - if (value?.startsWith("[") && value?.endsWith("]")) { - const match = value.match(/\[(\d+)\]/); - if (match) { - const index = parseInt(match[1], 10) - 1; - const associatedDoc = docs && docs[index]; +export const MemoizedAnchor = memo( + ({ docs, updatePresentingDocument, children }: any) => { + const value = children?.toString(); + if (value?.startsWith("[") && value?.endsWith("]")) { + const match = value.match(/\[(\d+)\]/); + if (match) { + const index = parseInt(match[1], 10) - 1; + const associatedDoc = docs && docs[index]; - const url = associatedDoc?.link - ? new URL(associatedDoc.link).origin + "/favicon.ico" - : ""; + const url = associatedDoc?.link + ? new URL(associatedDoc.link).origin + "/favicon.ico" + : ""; - const getIcon = (sourceType: ValidSources, link: string) => { - return getSourceMetadata(sourceType).icon({ size: 18 }); - }; + const getIcon = (sourceType: ValidSources, link: string) => { + return getSourceMetadata(sourceType).icon({ size: 18 }); + }; - const icon = - associatedDoc?.source_type === "web" ? ( - - ) : ( - getIcon( - associatedDoc?.source_type || "web", - associatedDoc?.link || "" - ) + const icon = + associatedDoc?.source_type === "web" ? ( + + ) : ( + getIcon( + associatedDoc?.source_type || "web", + associatedDoc?.link || "" + ) + ); + + return ( + + {children} + ); - - return ( - - {children} - - ); + } } + return ( + + {children} + + ); } - return {children}; -}); +); export const MemoizedLink = memo((props: any) => { - const { node, document, ...rest } = props; + const { node, document, updatePresentingDocument, ...rest } = props; const value = rest.children; if (value?.toString().startsWith("*")) { @@ -58,22 +66,21 @@ export const MemoizedLink = memo((props: any) => { icon={document?.icon as React.ReactNode} link={rest?.href} document={document as LoadedDanswerDocument} + updatePresentingDocument={updatePresentingDocument} > {rest.children} ); - } else { - return ( - - rest.href ? window.open(rest.href, "_blank") : undefined - } - className="cursor-pointer text-link hover:text-link-hover" - > - {rest.children} - - ); } + + return ( + rest.href && window.open(rest.href, "_blank")} + className="cursor-pointer text-link hover:text-link-hover" + > + {rest.children} + + ); }); export const MemoizedParagraph = memo( diff --git a/web/src/app/chat/message/Messages.tsx b/web/src/app/chat/message/Messages.tsx index 0aa9ba82683b..103c6a5644fc 100644 --- a/web/src/app/chat/message/Messages.tsx +++ b/web/src/app/chat/message/Messages.tsx @@ -10,6 +10,7 @@ import { import { FeedbackType } from "../types"; import React, { memo, + ReactNode, useCallback, useContext, useEffect, @@ -21,6 +22,7 @@ import ReactMarkdown from "react-markdown"; import { DanswerDocument, FilteredDanswerDocument, + LoadedDanswerDocument, } from "@/lib/search/interfaces"; import { SearchSummary } from "./SearchSummary"; @@ -188,6 +190,7 @@ export const AIMessage = ({ currentPersona, otherMessagesCanSwitchTo, onMessageSelection, + setPresentingDocument, index, }: { index?: number; @@ -218,6 +221,7 @@ export const AIMessage = ({ retrievalDisabled?: boolean; overriddenModel?: string; regenerate?: (modelOverRide: LlmOverride) => Promise; + setPresentingDocument?: (document: DanswerDocument) => void; }) => { const toolCallGenerating = toolCall && !toolCall.tool_result; const processContent = (content: string | JSX.Element) => { @@ -308,7 +312,12 @@ export const AIMessage = ({ const anchorCallback = useCallback( (props: any) => ( - {props.children} + + {props.children} + ), [docs] ); diff --git a/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx b/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx index f0acaa0ace32..7f302904c648 100644 --- a/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx +++ b/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx @@ -17,6 +17,8 @@ import { SettingsContext } from "@/components/settings/SettingsProvider"; import { DanswerInitializingLoader } from "@/components/DanswerInitializingLoader"; import { Persona } from "@/app/admin/assistants/interfaces"; import { Button } from "@/components/ui/button"; +import { DanswerDocument } from "@/lib/search/interfaces"; +import TextView from "@/components/chat_search/TextView"; function BackToDanswerButton() { const router = useRouter(); @@ -41,6 +43,9 @@ export function SharedChatDisplay({ persona: Persona; }) { const [isReady, setIsReady] = useState(false); + const [presentingDocument, setPresentingDocument] = + useState(null); + useEffect(() => { Prism.highlightAll(); setIsReady(true); @@ -63,61 +68,70 @@ export function SharedChatDisplay({ ); return ( -
-
-
-
-
-

- {chatSession.description || - `Chat ${chatSession.chat_session_id}`} -

-

- {humanReadableFormat(chatSession.time_created)} -

+ <> + {presentingDocument && ( + setPresentingDocument(null)} + /> + )} +
+
+
+
+
+

+ {chatSession.description || + `Chat ${chatSession.chat_session_id}`} +

+

+ {humanReadableFormat(chatSession.time_created)} +

- -
- {isReady ? ( -
- {messages.map((message) => { - if (message.type === "user") { - return ( - - ); - } else { - return ( - - ); - } - })} +
- ) : ( -
-
- + {isReady ? ( +
+ {messages.map((message) => { + if (message.type === "user") { + return ( + + ); + } else { + return ( + + ); + } + })}
-
- )} + ) : ( +
+
+ +
+
+ )} +
-
- -
+ +
+ ); } diff --git a/web/src/components/chat_search/TextView.tsx b/web/src/components/chat_search/TextView.tsx new file mode 100644 index 000000000000..d4aec38a4c88 --- /dev/null +++ b/web/src/components/chat_search/TextView.tsx @@ -0,0 +1,173 @@ +"use client"; + +import { useState, useEffect, useCallback } from "react"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Download, XIcon, ZoomIn, ZoomOut } from "lucide-react"; +import { DanswerDocument } from "@/lib/search/interfaces"; +import { MinimalMarkdown } from "./MinimalMarkdown"; + +interface TextViewProps { + presentingDocument: DanswerDocument; + onClose: () => void; +} +export default function TextView({ + presentingDocument, + onClose, +}: TextViewProps) { + const [zoom, setZoom] = useState(100); + const [fileContent, setFileContent] = useState(""); + const [fileUrl, setFileUrl] = useState(""); + const [fileName, setFileName] = useState(""); + const [isLoading, setIsLoading] = useState(true); + const [fileType, setFileType] = useState("application/octet-stream"); + + const isMarkdownFormat = (mimeType: string): boolean => { + const markdownFormats = [ + "text/markdown", + "text/x-markdown", + "text/plain", + "text/x-rst", + "text/x-org", + ]; + return markdownFormats.some((format) => mimeType.startsWith(format)); + }; + + const isSupportedIframeFormat = (mimeType: string): boolean => { + const supportedFormats = [ + "application/pdf", + "image/png", + "image/jpeg", + "image/gif", + "image/svg+xml", + ]; + return supportedFormats.some((format) => mimeType.startsWith(format)); + }; + + const fetchFile = useCallback(async () => { + setIsLoading(true); + const fileId = presentingDocument.document_id.split("__")[1]; + try { + const response = await fetch( + `/api/chat/file/${encodeURIComponent(fileId)}`, + { + method: "GET", + } + ); + const blob = await response.blob(); + const url = window.URL.createObjectURL(blob); + setFileUrl(url); + setFileName(presentingDocument.semantic_identifier || "document"); + const contentType = + response.headers.get("Content-Type") || "application/octet-stream"; + setFileType(contentType); + + if (isMarkdownFormat(blob.type)) { + const text = await blob.text(); + setFileContent(text); + } + } catch (error) { + console.error("Error fetching file:", error); + } finally { + setTimeout(() => { + setIsLoading(false); + }, 1000); + } + }, [presentingDocument]); + + useEffect(() => { + fetchFile(); + }, [fetchFile]); + + const handleDownload = () => { + const link = document.createElement("a"); + link.href = fileUrl; + link.download = fileName; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + }; + + const handleZoomIn = () => setZoom((prev) => Math.min(prev + 25, 200)); + const handleZoomOut = () => setZoom((prev) => Math.max(prev - 25, 100)); + + return ( + + + + + {fileName} + +
+ + {zoom}% + + + +
+
+
+
+ {isLoading ? ( +
+
+

+ Loading document... +

+
+ ) : ( +
+ {isSupportedIframeFormat(fileType) ? ( +