Bugfix/chat images 2 (#4630)

* don't hardcode -1 * extra spaces * fix binary data in blurb * add note to binary handling --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
2025-09-14 02:29:15 +02:00 · 2025-04-29 18:29:10 -07:00
parent dd242c9926
commit 94de23fe87
6 changed files with 54 additions and 13 deletions
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -96,9 +96,9 @@ from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import FileDescriptor
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import get_user_files
 from onyx.file_store.utils import load_all_chat_files
-from onyx.file_store.utils import load_all_user_file_files
+from onyx.file_store.utils import load_in_memory_chat_files
 from onyx.file_store.utils import load_all_user_files
 from onyx.file_store.utils import save_files
 from onyx.llm.exceptions import GenAIDisabledException
 from onyx.llm.factory import get_llms_for_persona
@@ -849,12 +849,12 @@ def stream_chat_message_objects(
        user_file_files: list[UserFile] | None = None
        if user_file_ids or user_folder_ids:
            # Load user files
-            user_files = load_all_user_files(
+            user_files = load_in_memory_chat_files(
                user_file_ids or [],
                user_folder_ids or [],
                db_session,
            )
-            user_file_files = load_all_user_file_files(
+            user_file_files = get_user_files(
                user_file_ids or [],
                user_folder_ids or [],
                db_session,
--- a/backend/onyx/chat/prune_and_merge.py
+++ b/backend/onyx/chat/prune_and_merge.py
@@ -155,6 +155,7 @@ def _apply_pruning(
    section_idx_token_count: dict[int, int] = {}
    ind = 0
    final_section_ind = None
    total_tokens = 0
    for ind, section in enumerate(sections):
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -870,7 +870,10 @@ def create_search_doc_from_user_file(
            content_sample = associated_chat_file.content[:100]
            # Remove null bytes which can cause SQL errors
            content_sample = content_sample.replace(b"\x00", b"")
-            blurb = content_sample.decode("utf-8", errors="replace")
+
            # NOTE(rkuo): this used to be "replace" instead of strict, but
            # that would bypass the binary handling below
            blurb = content_sample.decode("utf-8", errors="strict")
        except Exception:
            # If decoding fails completely, provide a generic description
            blurb = f"[Binary file: {db_user_file.name}]"
--- a/backend/onyx/file_store/utils.py
+++ b/backend/onyx/file_store/utils.py
@@ -157,16 +157,32 @@ def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
        )
-def load_all_user_files(
+def load_in_memory_chat_files(
    user_file_ids: list[int],
    user_folder_ids: list[int],
    db_session: Session,
 ) -> list[InMemoryChatFile]:
    """
    Loads the actual content of user files specified by individual IDs and those
    within specified folder IDs into memory.
    Args:
        user_file_ids: A list of specific UserFile IDs to load.
        user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be loaded.
        db_session: The SQLAlchemy database session.
    Returns:
        A list of InMemoryChatFile objects, each containing the file content (as bytes),
        file ID, file type, and filename. Prioritizes loading plaintext versions if available.
    """
    # Use parallel execution to load files concurrently
    return cast(
        list[InMemoryChatFile],
        run_functions_tuples_in_parallel(
            # 1. Load files specified by individual IDs
            [(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
        )
        # 2. Load all files within specified folders
        + [
            file
            for folder_id in user_folder_ids
@@ -175,24 +191,47 @@ def load_all_user_files(
    )
-def load_all_user_file_files(
+def get_user_files(
    user_file_ids: list[int],
    user_folder_ids: list[int],
    db_session: Session,
 ) -> list[UserFile]:
    """
    Fetches UserFile database records based on provided file and folder IDs.
    Args:
        user_file_ids: A list of specific UserFile IDs to fetch.
        user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be fetched.
        db_session: The SQLAlchemy database session.
    Returns:
        A list containing UserFile SQLAlchemy model objects corresponding to the
        specified file IDs and all files within the specified folder IDs.
        It does NOT return the actual file content.
    """
    user_files: list[UserFile] = []
    # 1. Fetch UserFile records for specific file IDs
    for user_file_id in user_file_ids:
        # Query the database for a UserFile with the matching ID
        user_file = (
            db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
        )
        # If found, add it to the list
        if user_file is not None:
            user_files.append(user_file)
    # 2. Fetch UserFile records for all files within specified folder IDs
    for user_folder_id in user_folder_ids:
        # Query the database for all UserFiles belonging to the current folder ID
        # and extend the list with the results
        user_files.extend(
            db_session.query(UserFile)
            .filter(UserFile.folder_id == user_folder_id)
            .all()
        )
    # 3. Return the combined list of UserFile database objects
    return user_files
--- a/backend/onyx/server/user_documents/api.py
+++ b/backend/onyx/server/user_documents/api.py
@@ -42,6 +42,7 @@ from onyx.file_processing.html_utils import web_html_cleanup
 from onyx.server.documents.connector import trigger_indexing_for_cc_pair
 from onyx.server.documents.models import ConnectorBase
 from onyx.server.documents.models import CredentialBase
 from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
 from onyx.server.user_documents.models import MessageResponse
 from onyx.server.user_documents.models import UserFileSnapshot
 from onyx.server.user_documents.models import UserFolderSnapshot
@@ -141,9 +142,6 @@ def get_folder(
    return folder_snapshot
 RECENT_DOCS_FOLDER_ID = -1
@router.post("/user/file/upload")
 def upload_user_files(
    files: List[UploadFile] = File(...),
@@ -157,7 +155,7 @@ def upload_user_files(
    try:
        # Use our consolidated function that handles indexing properly
        user_files = upload_files_to_user_files_with_indexing(
-            files, folder_id or -1, user, db_session
+            files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
        )
        return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
--- a/web/src/components/chat/sources/SourceCard.tsx
+++ b/web/src/components/chat/sources/SourceCard.tsx
@@ -125,7 +125,7 @@ export function SeeMoreBlock({
    <button
      onClick={toggleDocumentSelection}
      className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
-        h-[80px] p-3 border border-[1.5px] border-new-background-light   text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
+        h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
    >
      <div className="flex items-center gap-1">
        {docs.length > 2 && iconsToRender.map((icon, index) => icon)}
@@ -204,7 +204,7 @@ export function FilesSeeMoreBlock({
    <button
      onClick={toggleDocumentSelection}
      className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
-        h-[80px] p-3 border border-[1.5px] border-new-background-light   text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
+        h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
    >
      <div className="flex items-center gap-1">
        {files.length > 2 && iconsToRender.map((icon, index) => icon)}