mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-14 02:29:15 +02:00
Bugfix/chat images 2 (#4630)
* don't hardcode -1 * extra spaces * fix binary data in blurb * add note to binary handling --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
This commit is contained in:
@@ -96,9 +96,9 @@ from onyx.document_index.factory import get_default_document_index
|
|||||||
from onyx.file_store.models import ChatFileType
|
from onyx.file_store.models import ChatFileType
|
||||||
from onyx.file_store.models import FileDescriptor
|
from onyx.file_store.models import FileDescriptor
|
||||||
from onyx.file_store.models import InMemoryChatFile
|
from onyx.file_store.models import InMemoryChatFile
|
||||||
|
from onyx.file_store.utils import get_user_files
|
||||||
from onyx.file_store.utils import load_all_chat_files
|
from onyx.file_store.utils import load_all_chat_files
|
||||||
from onyx.file_store.utils import load_all_user_file_files
|
from onyx.file_store.utils import load_in_memory_chat_files
|
||||||
from onyx.file_store.utils import load_all_user_files
|
|
||||||
from onyx.file_store.utils import save_files
|
from onyx.file_store.utils import save_files
|
||||||
from onyx.llm.exceptions import GenAIDisabledException
|
from onyx.llm.exceptions import GenAIDisabledException
|
||||||
from onyx.llm.factory import get_llms_for_persona
|
from onyx.llm.factory import get_llms_for_persona
|
||||||
@@ -849,12 +849,12 @@ def stream_chat_message_objects(
|
|||||||
user_file_files: list[UserFile] | None = None
|
user_file_files: list[UserFile] | None = None
|
||||||
if user_file_ids or user_folder_ids:
|
if user_file_ids or user_folder_ids:
|
||||||
# Load user files
|
# Load user files
|
||||||
user_files = load_all_user_files(
|
user_files = load_in_memory_chat_files(
|
||||||
user_file_ids or [],
|
user_file_ids or [],
|
||||||
user_folder_ids or [],
|
user_folder_ids or [],
|
||||||
db_session,
|
db_session,
|
||||||
)
|
)
|
||||||
user_file_files = load_all_user_file_files(
|
user_file_files = get_user_files(
|
||||||
user_file_ids or [],
|
user_file_ids or [],
|
||||||
user_folder_ids or [],
|
user_folder_ids or [],
|
||||||
db_session,
|
db_session,
|
||||||
|
@@ -155,6 +155,7 @@ def _apply_pruning(
|
|||||||
|
|
||||||
section_idx_token_count: dict[int, int] = {}
|
section_idx_token_count: dict[int, int] = {}
|
||||||
|
|
||||||
|
ind = 0
|
||||||
final_section_ind = None
|
final_section_ind = None
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
for ind, section in enumerate(sections):
|
for ind, section in enumerate(sections):
|
||||||
|
@@ -870,7 +870,10 @@ def create_search_doc_from_user_file(
|
|||||||
content_sample = associated_chat_file.content[:100]
|
content_sample = associated_chat_file.content[:100]
|
||||||
# Remove null bytes which can cause SQL errors
|
# Remove null bytes which can cause SQL errors
|
||||||
content_sample = content_sample.replace(b"\x00", b"")
|
content_sample = content_sample.replace(b"\x00", b"")
|
||||||
blurb = content_sample.decode("utf-8", errors="replace")
|
|
||||||
|
# NOTE(rkuo): this used to be "replace" instead of strict, but
|
||||||
|
# that would bypass the binary handling below
|
||||||
|
blurb = content_sample.decode("utf-8", errors="strict")
|
||||||
except Exception:
|
except Exception:
|
||||||
# If decoding fails completely, provide a generic description
|
# If decoding fails completely, provide a generic description
|
||||||
blurb = f"[Binary file: {db_user_file.name}]"
|
blurb = f"[Binary file: {db_user_file.name}]"
|
||||||
|
@@ -157,16 +157,32 @@ def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_all_user_files(
|
def load_in_memory_chat_files(
|
||||||
user_file_ids: list[int],
|
user_file_ids: list[int],
|
||||||
user_folder_ids: list[int],
|
user_folder_ids: list[int],
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
) -> list[InMemoryChatFile]:
|
) -> list[InMemoryChatFile]:
|
||||||
|
"""
|
||||||
|
Loads the actual content of user files specified by individual IDs and those
|
||||||
|
within specified folder IDs into memory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_file_ids: A list of specific UserFile IDs to load.
|
||||||
|
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be loaded.
|
||||||
|
db_session: The SQLAlchemy database session.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of InMemoryChatFile objects, each containing the file content (as bytes),
|
||||||
|
file ID, file type, and filename. Prioritizes loading plaintext versions if available.
|
||||||
|
"""
|
||||||
|
# Use parallel execution to load files concurrently
|
||||||
return cast(
|
return cast(
|
||||||
list[InMemoryChatFile],
|
list[InMemoryChatFile],
|
||||||
run_functions_tuples_in_parallel(
|
run_functions_tuples_in_parallel(
|
||||||
|
# 1. Load files specified by individual IDs
|
||||||
[(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
|
[(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
|
||||||
)
|
)
|
||||||
|
# 2. Load all files within specified folders
|
||||||
+ [
|
+ [
|
||||||
file
|
file
|
||||||
for folder_id in user_folder_ids
|
for folder_id in user_folder_ids
|
||||||
@@ -175,24 +191,47 @@ def load_all_user_files(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_all_user_file_files(
|
def get_user_files(
|
||||||
user_file_ids: list[int],
|
user_file_ids: list[int],
|
||||||
user_folder_ids: list[int],
|
user_folder_ids: list[int],
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
) -> list[UserFile]:
|
) -> list[UserFile]:
|
||||||
|
"""
|
||||||
|
Fetches UserFile database records based on provided file and folder IDs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_file_ids: A list of specific UserFile IDs to fetch.
|
||||||
|
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be fetched.
|
||||||
|
db_session: The SQLAlchemy database session.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list containing UserFile SQLAlchemy model objects corresponding to the
|
||||||
|
specified file IDs and all files within the specified folder IDs.
|
||||||
|
It does NOT return the actual file content.
|
||||||
|
"""
|
||||||
user_files: list[UserFile] = []
|
user_files: list[UserFile] = []
|
||||||
|
|
||||||
|
# 1. Fetch UserFile records for specific file IDs
|
||||||
for user_file_id in user_file_ids:
|
for user_file_id in user_file_ids:
|
||||||
|
# Query the database for a UserFile with the matching ID
|
||||||
user_file = (
|
user_file = (
|
||||||
db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
|
db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
|
||||||
)
|
)
|
||||||
|
# If found, add it to the list
|
||||||
if user_file is not None:
|
if user_file is not None:
|
||||||
user_files.append(user_file)
|
user_files.append(user_file)
|
||||||
|
|
||||||
|
# 2. Fetch UserFile records for all files within specified folder IDs
|
||||||
for user_folder_id in user_folder_ids:
|
for user_folder_id in user_folder_ids:
|
||||||
|
# Query the database for all UserFiles belonging to the current folder ID
|
||||||
|
# and extend the list with the results
|
||||||
user_files.extend(
|
user_files.extend(
|
||||||
db_session.query(UserFile)
|
db_session.query(UserFile)
|
||||||
.filter(UserFile.folder_id == user_folder_id)
|
.filter(UserFile.folder_id == user_folder_id)
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 3. Return the combined list of UserFile database objects
|
||||||
return user_files
|
return user_files
|
||||||
|
|
||||||
|
|
||||||
|
@@ -42,6 +42,7 @@ from onyx.file_processing.html_utils import web_html_cleanup
|
|||||||
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
|
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
|
||||||
from onyx.server.documents.models import ConnectorBase
|
from onyx.server.documents.models import ConnectorBase
|
||||||
from onyx.server.documents.models import CredentialBase
|
from onyx.server.documents.models import CredentialBase
|
||||||
|
from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
|
||||||
from onyx.server.user_documents.models import MessageResponse
|
from onyx.server.user_documents.models import MessageResponse
|
||||||
from onyx.server.user_documents.models import UserFileSnapshot
|
from onyx.server.user_documents.models import UserFileSnapshot
|
||||||
from onyx.server.user_documents.models import UserFolderSnapshot
|
from onyx.server.user_documents.models import UserFolderSnapshot
|
||||||
@@ -141,9 +142,6 @@ def get_folder(
|
|||||||
return folder_snapshot
|
return folder_snapshot
|
||||||
|
|
||||||
|
|
||||||
RECENT_DOCS_FOLDER_ID = -1
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/user/file/upload")
|
@router.post("/user/file/upload")
|
||||||
def upload_user_files(
|
def upload_user_files(
|
||||||
files: List[UploadFile] = File(...),
|
files: List[UploadFile] = File(...),
|
||||||
@@ -157,7 +155,7 @@ def upload_user_files(
|
|||||||
try:
|
try:
|
||||||
# Use our consolidated function that handles indexing properly
|
# Use our consolidated function that handles indexing properly
|
||||||
user_files = upload_files_to_user_files_with_indexing(
|
user_files = upload_files_to_user_files_with_indexing(
|
||||||
files, folder_id or -1, user, db_session
|
files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
|
||||||
)
|
)
|
||||||
|
|
||||||
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
|
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
|
||||||
|
@@ -125,7 +125,7 @@ export function SeeMoreBlock({
|
|||||||
<button
|
<button
|
||||||
onClick={toggleDocumentSelection}
|
onClick={toggleDocumentSelection}
|
||||||
className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
|
className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
|
||||||
h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
|
h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
|
||||||
>
|
>
|
||||||
<div className="flex items-center gap-1">
|
<div className="flex items-center gap-1">
|
||||||
{docs.length > 2 && iconsToRender.map((icon, index) => icon)}
|
{docs.length > 2 && iconsToRender.map((icon, index) => icon)}
|
||||||
@@ -204,7 +204,7 @@ export function FilesSeeMoreBlock({
|
|||||||
<button
|
<button
|
||||||
onClick={toggleDocumentSelection}
|
onClick={toggleDocumentSelection}
|
||||||
className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
|
className={`w-full ${fullWidth ? "w-full" : "max-w-[200px]"}
|
||||||
h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
|
h-[80px] p-3 border border-[1.5px] border-new-background-light text-left bg-accent-background hover:bg-accent-background-hovered dark:bg-accent-background-hovered dark:hover:bg-neutral-700/80 cursor-pointer rounded-lg flex flex-col justify-between overflow-hidden`}
|
||||||
>
|
>
|
||||||
<div className="flex items-center gap-1">
|
<div className="flex items-center gap-1">
|
||||||
{files.length > 2 && iconsToRender.map((icon, index) => icon)}
|
{files.length > 2 && iconsToRender.map((icon, index) => icon)}
|
||||||
|
Reference in New Issue
Block a user