Fix/add back search with files (#4767)

* Allow search w/ user files

* more

* More

* Fix

* Improve prompt

* Combine user files + regular uploaded files
This commit is contained in:
Chris Weaver
2025-05-24 15:44:39 -07:00
committed by GitHub
parent dad99cbec7
commit 0c7ba8e2ac
10 changed files with 303 additions and 349 deletions

View File

@ -43,6 +43,7 @@ from onyx.chat.models import UserKnowledgeFilePacket
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
from onyx.chat.user_files.parse_user_files import parse_user_files
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
@ -52,11 +53,9 @@ from onyx.configs.constants import BASIC_KEY
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import NO_AUTH_USER_ID
from onyx.context.search.enums import LLMEvaluationType
from onyx.context.search.enums import OptionalSearchSetting
from onyx.context.search.enums import QueryFlow
from onyx.context.search.enums import SearchType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import RetrievalDetails
from onyx.context.search.retrieval.search_runner import (
@ -95,9 +94,7 @@ from onyx.document_index.factory import get_default_document_index
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import get_user_files
from onyx.file_store.utils import load_all_chat_files
from onyx.file_store.utils import load_in_memory_chat_files
from onyx.file_store.utils import save_files
from onyx.llm.exceptions import GenAIDisabledException
from onyx.llm.factory import get_llms_for_persona
@ -312,8 +309,7 @@ def _handle_internet_search_tool_response_summary(
def _get_force_search_settings(
new_msg_req: CreateChatMessageRequest,
tools: list[Tool],
user_file_ids: list[int],
user_folder_ids: list[int],
search_tool_override_kwargs: SearchToolOverrideKwargs | None,
) -> ForceUseTool:
internet_search_available = any(
isinstance(tool, InternetSearchTool) for tool in tools
@ -321,45 +317,24 @@ def _get_force_search_settings(
search_tool_available = any(isinstance(tool, SearchTool) for tool in tools)
if not internet_search_available and not search_tool_available:
if new_msg_req.force_user_file_search:
return ForceUseTool(force_use=True, tool_name=SearchTool._NAME)
else:
# Does not matter much which tool is set here as force is false and neither tool is available
return ForceUseTool(force_use=False, tool_name=SearchTool._NAME)
tool_name = SearchTool._NAME if search_tool_available else InternetSearchTool._NAME
# Currently, the internet search tool does not support query override
args = (
{"query": new_msg_req.query_override}
if new_msg_req.query_override and tool_name == SearchTool._NAME
if new_msg_req.query_override and search_tool_available
else None
)
# Create override_kwargs for the search tool if user_file_ids are provided
override_kwargs = None
if (user_file_ids or user_folder_ids) and tool_name == SearchTool._NAME:
override_kwargs = SearchToolOverrideKwargs(
force_no_rerank=False,
alternate_db_session=None,
retrieved_sections_callback=None,
skip_query_analysis=False,
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
)
if new_msg_req.file_descriptors:
# If user has uploaded files they're using, don't run any of the search tools
return ForceUseTool(force_use=False, tool_name=tool_name)
should_force_search = any(
[
new_msg_req.force_user_file_search,
new_msg_req.retrieval_options
and new_msg_req.retrieval_options.run_search
== OptionalSearchSetting.ALWAYS,
new_msg_req.search_doc_ids,
new_msg_req.query_override is not None,
DISABLE_LLM_CHOOSE_SEARCH,
search_tool_override_kwargs is not None,
]
)
@ -369,13 +344,18 @@ def _get_force_search_settings(
return ForceUseTool(
force_use=True,
tool_name=tool_name,
tool_name=SearchTool._NAME,
args=args,
override_kwargs=override_kwargs,
override_kwargs=search_tool_override_kwargs,
)
return ForceUseTool(
force_use=False, tool_name=tool_name, args=args, override_kwargs=override_kwargs
force_use=False,
tool_name=(
SearchTool._NAME if search_tool_available else InternetSearchTool._NAME
),
args=args,
override_kwargs=None,
)
@ -488,7 +468,6 @@ def _process_tool_response(
retrieval_options: RetrievalDetails | None,
user_file_files: list[UserFile] | None,
user_files: list[InMemoryChatFile] | None,
file_id_to_user_file: dict[str, InMemoryChatFile],
search_for_ordering_only: bool,
) -> Generator[ChatPacket, None, dict[SubQuestionKey, AnswerPostInfo]]:
level, level_question_num = (
@ -540,7 +519,7 @@ def _process_tool_response(
yield from _get_user_knowledge_files(
info=info,
user_files=user_files,
file_id_to_user_file=file_id_to_user_file,
file_id_to_user_file={file.file_id: file for file in user_files},
)
yield info.qa_docs_response
@ -665,8 +644,6 @@ def stream_chat_message_objects(
try:
# Move these variables inside the try block
file_id_to_user_file = {}
user_id = user.id if user is not None else None
chat_session = get_chat_session_by_id(
@ -840,60 +817,22 @@ def stream_chat_message_objects(
for folder in persona.user_folders:
user_folder_ids.append(folder.id)
# Initialize flag for user file search
use_search_for_user_files = False
user_files: list[InMemoryChatFile] | None = None
search_for_ordering_only = False
user_file_files: list[UserFile] | None = None
if user_file_ids or user_folder_ids:
# Load user files
user_files = load_in_memory_chat_files(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
user_file_files = get_user_files(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
# Store mapping of file_id to file for later reordering
if user_files:
file_id_to_user_file = {file.file_id: file for file in user_files}
# Calculate token count for the files
from onyx.db.user_documents import calculate_user_files_token_count
from onyx.chat.prompt_builder.citations_prompt import (
compute_max_document_tokens_for_persona,
)
total_tokens = calculate_user_files_token_count(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
# Calculate available tokens for documents based on prompt, user input, etc.
available_tokens = compute_max_document_tokens_for_persona(
# Load in user files into memory and create search tool override kwargs if needed
# if we have enough tokens and no folders, we don't need to use search
# we can just pass them into the prompt directly
(
in_memory_user_files,
user_file_models,
search_tool_override_kwargs_for_user_files,
) = parse_user_files(
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
db_session=db_session,
persona=persona,
actual_user_input=message_text, # Use the actual user message
actual_user_input=message_text,
)
logger.debug(
f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}"
)
# ALWAYS use search for user files, but track if we need it for context or just ordering
use_search_for_user_files = True
# If files are small enough for context, we'll just use search for ordering
search_for_ordering_only = total_tokens <= available_tokens
if search_for_ordering_only:
# Add original user files to context since they fit
if user_files:
latest_query_files.extend(user_files)
if not search_tool_override_kwargs_for_user_files:
latest_query_files.extend(in_memory_user_files)
if user_message:
attach_files_to_chat_message(
@ -1052,10 +991,13 @@ def stream_chat_message_objects(
prompt_config=prompt_config,
db_session=db_session,
user=user,
user_knowledge_present=bool(user_files or user_folder_ids),
llm=llm,
fast_llm=fast_llm,
use_file_search=new_msg_req.force_user_file_search,
run_search_setting=(
retrieval_options.run_search
if retrieval_options
else OptionalSearchSetting.AUTO
),
search_tool_config=SearchToolConfig(
answer_style_config=answer_style_config,
document_pruning_config=document_pruning_config,
@ -1086,128 +1028,23 @@ def stream_chat_message_objects(
tools.extend(tool_list)
force_use_tool = _get_force_search_settings(
new_msg_req, tools, user_file_ids, user_folder_ids
new_msg_req, tools, search_tool_override_kwargs_for_user_files
)
# Set force_use if user files exceed token limit
if use_search_for_user_files:
try:
# Check if search tool is available in the tools list
search_tool_available = any(
isinstance(tool, SearchTool) for tool in tools
)
# If no search tool is available, add one
if not search_tool_available:
logger.info("No search tool available, creating one for user files")
# Create a basic search tool config
search_tool_config = SearchToolConfig(
answer_style_config=answer_style_config,
document_pruning_config=document_pruning_config,
retrieval_options=retrieval_options or RetrievalDetails(),
)
# Create and add the search tool
search_tool = SearchTool(
db_session=db_session,
user=user,
persona=persona,
retrieval_options=search_tool_config.retrieval_options,
prompt_config=prompt_config,
llm=llm,
fast_llm=fast_llm,
pruning_config=search_tool_config.document_pruning_config,
answer_style_config=search_tool_config.answer_style_config,
evaluation_type=(
LLMEvaluationType.BASIC
if persona.llm_relevance_filter
else LLMEvaluationType.SKIP
),
bypass_acl=bypass_acl,
)
# Add the search tool to the tools list
tools.append(search_tool)
logger.info(
"Added search tool for user files that exceed token limit"
)
# Now set force_use_tool.force_use to True
force_use_tool.force_use = True
force_use_tool.tool_name = SearchTool._NAME
# Set query argument if not already set
if not force_use_tool.args:
force_use_tool.args = {"query": final_msg.message}
# Pass the user file IDs to the search tool
if user_file_ids or user_folder_ids:
# Create a BaseFilters object with user_file_ids
if not retrieval_options:
retrieval_options = RetrievalDetails()
if not retrieval_options.filters:
retrieval_options.filters = BaseFilters()
# Set user file and folder IDs in the filters
retrieval_options.filters.user_file_ids = user_file_ids
retrieval_options.filters.user_folder_ids = user_folder_ids
# Create override kwargs for the search tool
override_kwargs = SearchToolOverrideKwargs(
force_no_rerank=search_for_ordering_only, # Skip reranking for ordering-only
alternate_db_session=None,
retrieved_sections_callback=None,
skip_query_analysis=search_for_ordering_only, # Skip query analysis for ordering-only
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
ordering_only=search_for_ordering_only, # Set ordering_only flag for fast path
)
# Set the override kwargs in the force_use_tool
force_use_tool.override_kwargs = override_kwargs
if search_for_ordering_only:
logger.info(
"Fast path: Configured search tool with optimized settings for ordering-only"
)
logger.info(
"Fast path: Skipping reranking and query analysis for ordering-only mode"
)
logger.info(
f"Using {len(user_file_ids or [])} files and {len(user_folder_ids or [])} folders"
)
else:
logger.info(
"Configured search tool to use ",
f"{len(user_file_ids or [])} files and {len(user_folder_ids or [])} folders",
)
except Exception as e:
logger.exception(
f"Error configuring search tool for user files: {str(e)}"
)
use_search_for_user_files = False
# TODO: unify message history with single message history
message_history = [
PreviousMessage.from_chat_message(msg, files) for msg in history_msgs
]
if not use_search_for_user_files and user_files:
if not search_tool_override_kwargs_for_user_files and in_memory_user_files:
yield UserKnowledgeFilePacket(
user_files=[
FileDescriptor(
id=str(file.file_id), type=ChatFileType.USER_KNOWLEDGE
id=str(file.file_id), type=file.file_type, name=file.filename
)
for file in user_files
for file in in_memory_user_files
]
)
if search_for_ordering_only:
logger.info(
"Performance: Forcing LLMEvaluationType.SKIP to prevent chunk evaluation for ordering-only search"
)
prompt_builder = AnswerPromptBuilder(
user_message=default_build_user_message(
user_query=final_msg.message,
@ -1265,10 +1102,13 @@ def stream_chat_message_objects(
selected_db_search_docs=selected_db_search_docs,
info_by_subq=info_by_subq,
retrieval_options=retrieval_options,
user_file_files=user_file_files,
user_files=user_files,
file_id_to_user_file=file_id_to_user_file,
search_for_ordering_only=search_for_ordering_only,
user_file_files=user_file_models,
user_files=in_memory_user_files,
search_for_ordering_only=(
search_tool_override_kwargs_for_user_files is not None
and search_tool_override_kwargs_for_user_files.ordering_only
is True
),
)
elif isinstance(packet, StreamStopInfo):

View File

@ -9,12 +9,12 @@ from onyx.context.search.models import InferenceChunk
from onyx.db.models import Persona
from onyx.db.prompts import get_default_prompt
from onyx.db.search_settings import get_multilingual_expansion
from onyx.file_store.models import InMemoryChatFile
from onyx.llm.factory import get_llms_for_persona
from onyx.llm.factory import get_main_llm_from_tuple
from onyx.llm.interfaces import LLMConfig
from onyx.llm.utils import build_content_with_imgs
from onyx.llm.utils import check_number_of_tokens
from onyx.llm.utils import message_to_prompt_and_imgs
from onyx.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from onyx.prompts.constants import DEFAULT_IGNORE_STATEMENT
from onyx.prompts.direct_qa_prompts import CITATIONS_PROMPT
@ -120,7 +120,8 @@ def build_citations_system_message(
def build_citations_user_message(
message: HumanMessage,
user_query: str,
files: list[InMemoryChatFile],
prompt_config: PromptConfig,
context_docs: list[LlmDoc] | list[InferenceChunk],
all_doc_useful: bool,
@ -135,7 +136,6 @@ def build_citations_user_message(
history_block = (
HISTORY_BLOCK.format(history_str=history_message) if history_message else ""
)
query, img_urls = message_to_prompt_and_imgs(message)
if context_docs:
context_docs_str = build_complete_context_str(context_docs)
@ -146,7 +146,7 @@ def build_citations_user_message(
optional_ignore_statement=optional_ignore,
context_docs_str=context_docs_str,
task_prompt=task_prompt_with_reminder,
user_query=query,
user_query=user_query,
history_block=history_block,
)
else:
@ -154,16 +154,17 @@ def build_citations_user_message(
user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format(
context_type=context_type,
task_prompt=task_prompt_with_reminder,
user_query=query,
user_query=user_query,
history_block=history_block,
)
user_prompt = user_prompt.strip()
tag_handled_prompt = handle_onyx_date_awareness(user_prompt, prompt_config)
user_msg = HumanMessage(
content=(
build_content_with_imgs(user_prompt, img_urls=img_urls)
if img_urls
else user_prompt
build_content_with_imgs(tag_handled_prompt, files)
if files
else tag_handled_prompt
)
)

View File

@ -0,0 +1,102 @@
from sqlalchemy.orm import Session
from onyx.db.models import Persona
from onyx.db.models import UserFile
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import get_user_files
from onyx.file_store.utils import load_in_memory_chat_files
from onyx.tools.models import SearchToolOverrideKwargs
from onyx.utils.logger import setup_logger
logger = setup_logger()
def parse_user_files(
user_file_ids: list[int],
user_folder_ids: list[int],
db_session: Session,
persona: Persona,
actual_user_input: str,
) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
"""
Parse user files and folders into in-memory chat files and create search tool override kwargs.
Only creates SearchToolOverrideKwargs if token overflow occurs or folders are present.
Args:
user_file_ids: List of user file IDs to load
user_folder_ids: List of user folder IDs to load
db_session: Database session
persona: Persona to calculate available tokens
actual_user_input: User's input message for token calculation
Returns:
Tuple of (
loaded user files,
user file models,
search tool override kwargs if token
overflow or folders present
)
"""
# Return empty results if no files or folders specified
if not user_file_ids and not user_folder_ids:
return [], [], None
# Load user files from the database into memory
user_files = load_in_memory_chat_files(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
user_file_models = get_user_files(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
# Calculate token count for the files, need to import here to avoid circular import
# TODO: fix this
from onyx.db.user_documents import calculate_user_files_token_count
from onyx.chat.prompt_builder.citations_prompt import (
compute_max_document_tokens_for_persona,
)
total_tokens = calculate_user_files_token_count(
user_file_ids or [],
user_folder_ids or [],
db_session,
)
# Calculate available tokens for documents based on prompt, user input, etc.
available_tokens = compute_max_document_tokens_for_persona(
db_session=db_session,
persona=persona,
actual_user_input=actual_user_input,
)
logger.debug(
f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}"
)
have_enough_tokens = total_tokens <= available_tokens
# If we have enough tokens and no folders, we don't need search
# we can just pass them into the prompt directly
if have_enough_tokens and not user_folder_ids:
# No search tool override needed - files can be passed directly
return user_files, user_file_models, None
# Token overflow or folders present - need to use search tool
search_for_ordering_only = have_enough_tokens
override_kwargs = SearchToolOverrideKwargs(
force_no_rerank=search_for_ordering_only,
alternate_db_session=None,
retrieved_sections_callback=None,
skip_query_analysis=search_for_ordering_only,
user_file_ids=user_file_ids,
user_folder_ids=user_folder_ids,
ordering_only=search_for_ordering_only,
)
return user_files, user_file_models, override_kwargs

View File

@ -12,11 +12,11 @@ from onyx.db.engine import get_session_with_current_tenant
from onyx.db.models import ChatMessage
from onyx.db.models import UserFile
from onyx.db.models import UserFolder
from onyx.file_processing.extract_file_text import IMAGE_MEDIA_TYPES
from onyx.file_store.file_store import get_default_file_store
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import FileDescriptor
from onyx.file_store.models import InMemoryChatFile
from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
from onyx.utils.b64 import get_image_type
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@ -119,27 +119,37 @@ def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile:
if not user_file:
raise ValueError(f"User file with id {file_id} not found")
# Try to load plaintext version first
# Get the file record to determine the appropriate chat file type
file_store = get_default_file_store(db_session)
file_record = file_store.read_file_record(user_file.file_id)
# Determine appropriate chat file type based on the original file's MIME type
chat_file_type = mime_type_to_chat_file_type(file_record.file_type)
# Try to load plaintext version first
plaintext_file_name = user_file_id_to_plaintext_file_name(file_id)
# check for plain text normalized version first, then use original file otherwise
try:
file_io = file_store.read_file(plaintext_file_name, mode="b")
# For plaintext versions, use PLAIN_TEXT type (unless it's an image which doesn't have plaintext)
plaintext_chat_file_type = (
ChatFileType.PLAIN_TEXT
if chat_file_type != ChatFileType.IMAGE
else chat_file_type
)
chat_file = InMemoryChatFile(
file_id=str(user_file.file_id),
content=file_io.read(),
file_type=ChatFileType.USER_KNOWLEDGE,
file_type=plaintext_chat_file_type,
filename=user_file.name,
)
status = "plaintext"
return chat_file
except Exception:
except Exception as e:
logger.warning(f"Failed to load plaintext for user file {user_file.id}: {e}")
# Fall back to original file if plaintext not available
file_io = file_store.read_file(user_file.file_id, mode="b")
file_record = file_store.read_file_record(user_file.file_id)
if file_record.file_type in IMAGE_MEDIA_TYPES:
chat_file_type = ChatFileType.IMAGE
chat_file = InMemoryChatFile(
file_id=str(user_file.file_id),

View File

@ -137,8 +137,6 @@ class CreateChatMessageRequest(ChunkContext):
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
force_user_file_search: bool = False
# If true, ignores most of the search options and uses pro search instead.
# TODO: decide how many of the above options we want to pass through to pro search
use_agentic_search: bool = False

View File

@ -16,6 +16,7 @@ from onyx.configs.app_configs import AZURE_DALLE_DEPLOYMENT_NAME
from onyx.configs.chat_configs import BING_API_KEY
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.context.search.enums import LLMEvaluationType
from onyx.context.search.enums import OptionalSearchSetting
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import RerankingDetails
from onyx.context.search.models import RetrievalDetails
@ -141,12 +142,11 @@ def construct_tools(
user: User | None,
llm: LLM,
fast_llm: LLM,
use_file_search: bool,
run_search_setting: OptionalSearchSetting,
search_tool_config: SearchToolConfig | None = None,
internet_search_tool_config: InternetSearchToolConfig | None = None,
image_generation_tool_config: ImageGenerationToolConfig | None = None,
custom_tool_config: CustomToolConfig | None = None,
user_knowledge_present: bool = False,
) -> dict[int, list[Tool]]:
"""Constructs tools based on persona configuration and available APIs"""
tool_dict: dict[int, list[Tool]] = {}
@ -163,7 +163,10 @@ def construct_tools(
)
# Handle Search Tool
if tool_cls.__name__ == SearchTool.__name__ and not user_knowledge_present:
if (
tool_cls.__name__ == SearchTool.__name__
and run_search_setting != OptionalSearchSetting.NEVER
):
if not search_tool_config:
search_tool_config = SearchToolConfig()
@ -256,33 +259,6 @@ def construct_tools(
for tool_list in tool_dict.values():
tools.extend(tool_list)
if use_file_search:
search_tool_config = SearchToolConfig()
search_tool = SearchTool(
db_session=db_session,
user=user,
persona=persona,
retrieval_options=search_tool_config.retrieval_options,
prompt_config=prompt_config,
llm=llm,
fast_llm=fast_llm,
pruning_config=search_tool_config.document_pruning_config,
answer_style_config=search_tool_config.answer_style_config,
selected_sections=search_tool_config.selected_sections,
chunks_above=search_tool_config.chunks_above,
chunks_below=search_tool_config.chunks_below,
full_doc=search_tool_config.full_doc,
evaluation_type=(
LLMEvaluationType.BASIC
if persona.llm_relevance_filter
else LLMEvaluationType.SKIP
),
rerank_settings=search_tool_config.rerank_settings,
bypass_acl=search_tool_config.bypass_acl,
)
tool_dict[1] = [search_tool]
# factor in tool definition size when pruning
if search_tool_config:
search_tool_config.document_pruning_config.tool_num_tokens = (

View File

@ -1,7 +1,5 @@
from typing import cast
from langchain_core.messages import HumanMessage
from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import LlmDoc
from onyx.chat.models import PromptConfig
@ -10,7 +8,6 @@ from onyx.chat.prompt_builder.citations_prompt import (
build_citations_system_message,
)
from onyx.chat.prompt_builder.citations_prompt import build_citations_user_message
from onyx.llm.utils import build_content_with_imgs
from onyx.tools.message import ToolCallSummary
from onyx.tools.models import ToolResponse
@ -45,12 +42,8 @@ def build_next_prompt_for_search_like_tool(
build_citations_user_message(
# make sure to use the original user query here in order to avoid duplication
# of the task prompt
message=HumanMessage(
content=build_content_with_imgs(
prompt_builder.raw_user_query,
prompt_builder.raw_user_uploaded_files,
)
),
user_query=prompt_builder.raw_user_query,
files=prompt_builder.raw_user_uploaded_files,
prompt_config=prompt_config,
context_docs=final_context_documents,
all_doc_useful=(

View File

@ -182,6 +182,7 @@ export function ChatPage({
addSelectedFile,
addSelectedFolder,
clearSelectedItems,
setSelectedFiles,
folders: userFolders,
files: allUserFiles,
uploadFile,
@ -1110,6 +1111,14 @@ export function ChatPage({
const resetInputBar = () => {
setMessage("");
setCurrentMessageFiles([]);
// Reset selectedFiles if they're under the context limit, but preserve selectedFolders.
// If under the context limit, the files will be included in the chat history
// so we don't need to keep them around.
if (selectedDocumentTokens < maxTokens) {
setSelectedFiles([]);
}
if (endPaddingRef.current) {
endPaddingRef.current.style.height = `95px`;
}
@ -1956,10 +1965,7 @@ export function ChatPage({
}
};
const handleImageUpload = async (
acceptedFiles: File[],
intent: UploadIntent
) => {
const handleMessageSpecificFileUpload = async (acceptedFiles: File[]) => {
const [_, llmModel] = getFinalLLM(
llmProviders,
liveAssistant ?? null,
@ -1982,8 +1988,6 @@ export function ChatPage({
updateChatState("uploading", currentSessionId());
const newlyUploadedFileDescriptors: FileDescriptor[] = [];
for (let file of acceptedFiles) {
const formData = new FormData();
formData.append("files", file);
@ -1992,9 +1996,6 @@ export function ChatPage({
if (response.length > 0 && response[0] !== undefined) {
const uploadedFile = response[0];
if (intent == UploadIntent.ADD_TO_DOCUMENTS) {
addSelectedFile(uploadedFile);
} else {
const newFileDescriptor: FileDescriptor = {
// Use file_id (storage ID) if available, otherwise fallback to DB id
// Ensure it's a string as FileDescriptor expects
@ -2009,7 +2010,6 @@ export function ChatPage({
};
setCurrentMessageFiles((prev) => [...prev, newFileDescriptor]);
}
} else {
setPopup({
type: "error",
@ -2616,10 +2616,7 @@ export function ChatPage({
<Dropzone
key={currentSessionId()}
onDrop={(acceptedFiles) =>
handleImageUpload(
acceptedFiles,
UploadIntent.ATTACH_TO_MESSAGE
)
handleMessageSpecificFileUpload(acceptedFiles)
}
noClick
>
@ -3354,7 +3351,7 @@ export function ChatPage({
}
setAlternativeAssistant={setAlternativeAssistant}
setFiles={setCurrentMessageFiles}
handleFileUpload={handleImageUpload}
handleFileUpload={handleMessageSpecificFileUpload}
textAreaRef={textAreaRef}
/>
{enterpriseSettings &&

View File

@ -188,7 +188,7 @@ interface ChatInputBarProps {
setAlternativeAssistant: (alternativeAssistant: Persona | null) => void;
toggleDocumentSidebar: () => void;
setFiles: (files: FileDescriptor[]) => void;
handleFileUpload: (files: File[], intent: UploadIntent) => void;
handleFileUpload: (files: File[]) => void;
textAreaRef: React.RefObject<HTMLTextAreaElement>;
filterManager: FilterManager;
availableSources: SourceMetadata[];
@ -270,7 +270,7 @@ export function ChatInputBar({
}
if (pastedFiles.length > 0) {
event.preventDefault();
handleFileUpload(pastedFiles, UploadIntent.ATTACH_TO_MESSAGE);
handleFileUpload(pastedFiles);
}
}
};
@ -444,6 +444,45 @@ export function ChatInputBar({
}
};
// Combine selectedFiles and currentMessageFiles for unified rendering
const allFiles = useMemo(() => {
const combined: Array<{
id: string;
name: string;
chatFileType: ChatFileType;
isUploading?: boolean;
source: "selected" | "current";
originalFile: any;
}> = [];
// Add selected files (excluding those already in currentMessageFiles)
selectedFiles.forEach((file) => {
if (!currentMessageFileIds.has(String(file.file_id || file.id))) {
combined.push({
id: String(file.file_id || file.id),
name: file.name,
chatFileType: file.chat_file_type,
source: "selected",
originalFile: file,
});
}
});
// Add current message files
currentMessageFiles.forEach((file, index) => {
combined.push({
id: file.id,
name: file.name || `File${file.id}`,
chatFileType: file.type,
isUploading: file.isUploading,
source: "current",
originalFile: file,
});
});
return combined;
}, [selectedFiles, currentMessageFiles, currentMessageFileIds]);
return (
<div id="onyx-chat-input">
<div className="flex justify-center mx-auto">
@ -669,18 +708,58 @@ export function ChatInputBar({
/>
))}
{/* This is excluding image types because they get rendered differently via currentMessageFiles.map
Seems quite hacky ... all rendering should probably be done in one place? */}
{selectedFiles.map(
(file) =>
!currentMessageFileIds.has(
String(file.file_id || file.id)
) && (
{/* Unified file rendering section for both selected and current message files */}
{allFiles.map((file, index) =>
file.chatFileType === ChatFileType.IMAGE ? (
<SourceChip
key={file.id}
icon={<FileIcon size={16} />}
key={`${file.source}-${file.id}-${index}`}
icon={
file.isUploading ? (
<FiLoader className="animate-spin" />
) : (
<img
className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
src={buildImgUrl(file.id)}
alt={file.name || "File image"}
/>
)
}
title={file.name}
onRemove={() => removeSelectedFile(file)}
onRemove={() => {
if (file.source === "selected") {
removeSelectedFile(file.originalFile);
} else {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
)
);
}
}}
/>
) : (
<SourceChip
key={`${file.source}-${file.id}-${index}`}
icon={
<FileIcon
className={
file.source === "current" ? "text-red-500" : ""
}
size={16}
/>
}
title={file.name}
onRemove={() => {
if (file.source === "selected") {
removeSelectedFile(file.originalFile);
} else {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
)
);
}
}}
/>
)
)}
@ -752,45 +831,6 @@ export function ChatInputBar({
onRemove={removeDocs}
/>
)}
{currentMessageFiles.map((file, index) =>
file.type === ChatFileType.IMAGE ? (
<SourceChip
key={`file-${index}`}
icon={
file.isUploading ? (
<FiLoader className="animate-spin" />
) : (
<img
className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
src={buildImgUrl(file.id)}
alt={file.name || "Uploaded image"}
/>
)
}
title={file.name || "File" + file.id}
onRemove={() => {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
)
);
}}
/>
) : (
<SourceChip
key={`file-${index}`}
icon={<FileIcon className="text-red-500" size={16} />}
title={file.name || "File"}
onRemove={() => {
setCurrentMessageFiles(
currentMessageFiles.filter(
(fileInFilter) => fileInFilter.id !== file.id
)
);
}}
/>
)
)}
</div>
</div>
)}

View File

@ -179,7 +179,6 @@ export interface SendMessageParams {
signal?: AbortSignal;
userFileIds?: number[];
userFolderIds?: number[];
forceUserFileSearch?: boolean;
useLanggraph?: boolean;
}
@ -202,7 +201,6 @@ export async function* sendMessage({
useExistingUserMessage,
alternateAssistantId,
signal,
forceUserFileSearch,
useLanggraph,
}: SendMessageParams): AsyncGenerator<PacketType, void, unknown> {
const documentsAreSelected =
@ -217,7 +215,6 @@ export async function* sendMessage({
// single assistant anyways
prompt_id: null,
search_doc_ids: documentsAreSelected ? selectedDocumentIds : null,
force_user_file_search: forceUserFileSearch,
file_descriptors: fileDescriptors,
user_file_ids: userFileIds,
user_folder_ids: userFolderIds,