mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-13 18:12:14 +02:00
Bugfix/chat images 2 (#4630)
* don't hardcode -1 * extra spaces * fix binary data in blurb * add note to binary handling --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
This commit is contained in:
@@ -96,9 +96,9 @@ from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.file_store.models import ChatFileType
|
||||
from onyx.file_store.models import FileDescriptor
|
||||
from onyx.file_store.models import InMemoryChatFile
|
||||
from onyx.file_store.utils import get_user_files
|
||||
from onyx.file_store.utils import load_all_chat_files
|
||||
from onyx.file_store.utils import load_all_user_file_files
|
||||
from onyx.file_store.utils import load_all_user_files
|
||||
from onyx.file_store.utils import load_in_memory_chat_files
|
||||
from onyx.file_store.utils import save_files
|
||||
from onyx.llm.exceptions import GenAIDisabledException
|
||||
from onyx.llm.factory import get_llms_for_persona
|
||||
@@ -849,12 +849,12 @@ def stream_chat_message_objects(
|
||||
user_file_files: list[UserFile] | None = None
|
||||
if user_file_ids or user_folder_ids:
|
||||
# Load user files
|
||||
user_files = load_all_user_files(
|
||||
user_files = load_in_memory_chat_files(
|
||||
user_file_ids or [],
|
||||
user_folder_ids or [],
|
||||
db_session,
|
||||
)
|
||||
user_file_files = load_all_user_file_files(
|
||||
user_file_files = get_user_files(
|
||||
user_file_ids or [],
|
||||
user_folder_ids or [],
|
||||
db_session,
|
||||
|
@@ -155,6 +155,7 @@ def _apply_pruning(
|
||||
|
||||
section_idx_token_count: dict[int, int] = {}
|
||||
|
||||
ind = 0
|
||||
final_section_ind = None
|
||||
total_tokens = 0
|
||||
for ind, section in enumerate(sections):
|
||||
|
@@ -870,7 +870,10 @@ def create_search_doc_from_user_file(
|
||||
content_sample = associated_chat_file.content[:100]
|
||||
# Remove null bytes which can cause SQL errors
|
||||
content_sample = content_sample.replace(b"\x00", b"")
|
||||
blurb = content_sample.decode("utf-8", errors="replace")
|
||||
|
||||
# NOTE(rkuo): this used to be "replace" instead of strict, but
|
||||
# that would bypass the binary handling below
|
||||
blurb = content_sample.decode("utf-8", errors="strict")
|
||||
except Exception:
|
||||
# If decoding fails completely, provide a generic description
|
||||
blurb = f"[Binary file: {db_user_file.name}]"
|
||||
|
@@ -157,16 +157,32 @@ def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
|
||||
)
|
||||
|
||||
|
||||
def load_all_user_files(
|
||||
def load_in_memory_chat_files(
|
||||
user_file_ids: list[int],
|
||||
user_folder_ids: list[int],
|
||||
db_session: Session,
|
||||
) -> list[InMemoryChatFile]:
|
||||
"""
|
||||
Loads the actual content of user files specified by individual IDs and those
|
||||
within specified folder IDs into memory.
|
||||
|
||||
Args:
|
||||
user_file_ids: A list of specific UserFile IDs to load.
|
||||
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be loaded.
|
||||
db_session: The SQLAlchemy database session.
|
||||
|
||||
Returns:
|
||||
A list of InMemoryChatFile objects, each containing the file content (as bytes),
|
||||
file ID, file type, and filename. Prioritizes loading plaintext versions if available.
|
||||
"""
|
||||
# Use parallel execution to load files concurrently
|
||||
return cast(
|
||||
list[InMemoryChatFile],
|
||||
run_functions_tuples_in_parallel(
|
||||
# 1. Load files specified by individual IDs
|
||||
[(load_user_file, (file_id, db_session)) for file_id in user_file_ids]
|
||||
)
|
||||
# 2. Load all files within specified folders
|
||||
+ [
|
||||
file
|
||||
for folder_id in user_folder_ids
|
||||
@@ -175,24 +191,47 @@ def load_all_user_files(
|
||||
)
|
||||
|
||||
|
||||
def load_all_user_file_files(
|
||||
def get_user_files(
|
||||
user_file_ids: list[int],
|
||||
user_folder_ids: list[int],
|
||||
db_session: Session,
|
||||
) -> list[UserFile]:
|
||||
"""
|
||||
Fetches UserFile database records based on provided file and folder IDs.
|
||||
|
||||
Args:
|
||||
user_file_ids: A list of specific UserFile IDs to fetch.
|
||||
user_folder_ids: A list of UserFolder IDs. All UserFiles within these folders will be fetched.
|
||||
db_session: The SQLAlchemy database session.
|
||||
|
||||
Returns:
|
||||
A list containing UserFile SQLAlchemy model objects corresponding to the
|
||||
specified file IDs and all files within the specified folder IDs.
|
||||
It does NOT return the actual file content.
|
||||
"""
|
||||
user_files: list[UserFile] = []
|
||||
|
||||
# 1. Fetch UserFile records for specific file IDs
|
||||
for user_file_id in user_file_ids:
|
||||
# Query the database for a UserFile with the matching ID
|
||||
user_file = (
|
||||
db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
|
||||
)
|
||||
# If found, add it to the list
|
||||
if user_file is not None:
|
||||
user_files.append(user_file)
|
||||
|
||||
# 2. Fetch UserFile records for all files within specified folder IDs
|
||||
for user_folder_id in user_folder_ids:
|
||||
# Query the database for all UserFiles belonging to the current folder ID
|
||||
# and extend the list with the results
|
||||
user_files.extend(
|
||||
db_session.query(UserFile)
|
||||
.filter(UserFile.folder_id == user_folder_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
# 3. Return the combined list of UserFile database objects
|
||||
return user_files
|
||||
|
||||
|
||||
|
@@ -42,6 +42,7 @@ from onyx.file_processing.html_utils import web_html_cleanup
|
||||
from onyx.server.documents.connector import trigger_indexing_for_cc_pair
|
||||
from onyx.server.documents.models import ConnectorBase
|
||||
from onyx.server.documents.models import CredentialBase
|
||||
from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
|
||||
from onyx.server.user_documents.models import MessageResponse
|
||||
from onyx.server.user_documents.models import UserFileSnapshot
|
||||
from onyx.server.user_documents.models import UserFolderSnapshot
|
||||
@@ -141,9 +142,6 @@ def get_folder(
|
||||
return folder_snapshot
|
||||
|
||||
|
||||
RECENT_DOCS_FOLDER_ID = -1
|
||||
|
||||
|
||||
@router.post("/user/file/upload")
|
||||
def upload_user_files(
|
||||
files: List[UploadFile] = File(...),
|
||||
@@ -157,7 +155,7 @@ def upload_user_files(
|
||||
try:
|
||||
# Use our consolidated function that handles indexing properly
|
||||
user_files = upload_files_to_user_files_with_indexing(
|
||||
files, folder_id or -1, user, db_session
|
||||
files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
|
||||
)
|
||||
|
||||
return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
|
||||
|
Reference in New Issue
Block a user