From 6d67d472cd632639e99e219f709cd86c3dad2da7 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Sun, 4 Aug 2024 23:02:55 -0700 Subject: [PATCH] Add answers to search (#2020) --- .vscode/env_template.txt | 2 +- ...ename_index_origin_to_index_recursively.py | 4 +- backend/danswer/chat/models.py | 19 +- backend/danswer/chat/personas.yaml | 2 +- backend/danswer/chat/process_message.py | 33 ++- backend/danswer/configs/chat_configs.py | 16 +- backend/danswer/db/chat.py | 6 +- backend/danswer/llm/answering/answer.py | 1 + .../danswer/llm/answering/prune_and_merge.py | 19 +- .../one_shot_answer/answer_question.py | 38 ++-- backend/danswer/one_shot_answer/models.py | 10 +- backend/danswer/prompts/agentic_evaluation.py | 3 +- backend/danswer/search/enums.py | 21 +- backend/danswer/search/models.py | 6 +- backend/danswer/search/pipeline.py | 93 +++++--- .../search/postprocessing/postprocessing.py | 23 +- .../search/preprocessing/preprocessing.py | 27 ++- backend/danswer/search/utils.py | 41 ++++ .../secondary_llm_flows/agentic_evaluation.py | 36 +++- .../secondary_llm_flows/chunk_usefulness.py | 7 + .../server/query_and_chat/query_backend.py | 1 + .../tools/images/image_generation_tool.py | 51 +++-- backend/danswer/tools/search/search_tool.py | 26 ++- .../danswer/server/query_and_chat/models.py | 3 +- .../server/query_and_chat/query_backend.py | 27 ++- .../docker_compose/docker-compose.dev.yml | 6 +- .../docker_compose/docker-compose.gpu-dev.yml | 4 +- .../docker_compose/env.multilingual.template | 2 +- deployment/helm/values.yaml | 2 +- deployment/kubernetes/env-configmap.yaml | 2 +- .../modal/configuration/AssistantsTab.tsx | 1 + web/src/app/search/page.tsx | 4 +- web/src/components/icons/icons.tsx | 19 +- .../components/search/DateRangeSelector.tsx | 19 +- web/src/components/search/DocumentDisplay.tsx | 2 +- web/src/components/search/SearchBar.tsx | 96 +++++---- .../search/SearchResultsDisplay.tsx | 30 +-- web/src/components/search/SearchSection.tsx | 199 ++++++++++++++---- .../search/filtering/FilterDropdown.tsx | 9 +- .../components/search/filtering/Filters.tsx | 194 ++++++++++++++++- .../search/results/AnswerSection.tsx | 48 ++--- .../search/results/QuotesSection.tsx | 7 +- .../search/results/ResponseSection.tsx | 18 +- web/src/lib/constants.ts | 4 +- web/src/lib/search/interfaces.ts | 1 - web/src/lib/search/streamingQa.ts | 3 +- .../lib/search/streamingQuestionValidation.ts | 66 ------ 47 files changed, 806 insertions(+), 445 deletions(-) delete mode 100644 web/src/lib/search/streamingQuestionValidation.ts diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt index 015672a22..b3fae8cee 100644 --- a/.vscode/env_template.txt +++ b/.vscode/env_template.txt @@ -15,7 +15,7 @@ LOG_LEVEL=debug # This passes top N results to LLM an additional time for reranking prior to answer generation # This step is quite heavy on token usage so we disable it for dev generally -DISABLE_LLM_CHUNK_FILTER=True +DISABLE_LLM_DOC_RELEVANCE=True # Useful if you want to toggle auth on/off (google_oauth/OIDC specifically) diff --git a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py index aae6040a5..6aa2ffca0 100644 --- a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py +++ b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py @@ -10,8 +10,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "1d6ad76d1f37" down_revision = "e1392f05e840" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index ffcf0e5f5..967648011 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -46,15 +46,22 @@ class LLMRelevanceFilterResponse(BaseModel): relevant_chunk_indices: list[int] -class RelevanceChunk(BaseModel): - # TODO make this document level. Also slight misnomer here as this is actually - # done at the section level currently rather than the chunk - relevant: bool | None = None +class RelevanceAnalysis(BaseModel): + relevant: bool content: str | None = None -class LLMRelevanceSummaryResponse(BaseModel): - relevance_summaries: dict[str, RelevanceChunk] +class SectionRelevancePiece(RelevanceAnalysis): + """LLM analysis mapped to an Inference Section""" + + document_id: str + chunk_id: int # ID of the center chunk for a given inference section + + +class DocumentRelevance(BaseModel): + """Contains all relevance information for a given search""" + + relevance_summaries: dict[str, RelevanceAnalysis] class DanswerAnswerPiece(BaseModel): diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml index 8d5fcdb88..0aececcee 100644 --- a/backend/danswer/chat/personas.yaml +++ b/backend/danswer/chat/personas.yaml @@ -17,7 +17,7 @@ personas: num_chunks: 10 # Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine # if the chunk is useful or not towards the latest user query - # This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable + # This feature can be overriden for all personas via DISABLE_LLM_DOC_RELEVANCE env variable llm_relevance_filter: true # Enable/Disable usage of the LLM to extract query time filters including source type and time range filters llm_filter_extraction: true diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 8e24acc01..0df9807e2 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -52,6 +52,7 @@ from danswer.llm.factory import get_llms_for_persona from danswer.llm.factory import get_main_llm_from_tuple from danswer.llm.interfaces import LLMConfig from danswer.natural_language_processing.utils import get_tokenizer +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import OptionalSearchSetting from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType @@ -60,6 +61,7 @@ from danswer.search.retrieval.search_runner import inference_sections_from_ids from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.search.utils import dedupe_documents from danswer.search.utils import drop_llm_indices +from danswer.search.utils import relevant_documents_to_indices from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.utils import get_json_line @@ -501,6 +503,9 @@ def stream_chat_message_objects( chunks_above=new_msg_req.chunks_above, chunks_below=new_msg_req.chunks_below, full_doc=new_msg_req.full_doc, + evaluation_type=LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP, ) tool_dict[db_tool_model.id] = [search_tool] elif tool_cls.__name__ == ImageGenerationTool.__name__: @@ -629,18 +634,28 @@ def stream_chat_message_objects( ) yield qa_docs_response elif packet.id == SECTION_RELEVANCE_LIST_ID: - chunk_indices = packet.response + relevance_sections = packet.response - if reference_db_search_docs is not None and dropped_indices: - chunk_indices = drop_llm_indices( - llm_indices=chunk_indices, - search_docs=reference_db_search_docs, - dropped_indices=dropped_indices, + if reference_db_search_docs is not None: + llm_indices = relevant_documents_to_indices( + relevance_sections=relevance_sections, + search_docs=[ + translate_db_search_doc_to_server_search_doc(doc) + for doc in reference_db_search_docs + ], + ) + + if dropped_indices: + llm_indices = drop_llm_indices( + llm_indices=llm_indices, + search_docs=reference_db_search_docs, + dropped_indices=dropped_indices, + ) + + yield LLMRelevanceFilterResponse( + relevant_chunk_indices=llm_indices ) - yield LLMRelevanceFilterResponse( - relevant_chunk_indices=chunk_indices - ) elif packet.id == IMAGE_GENERATION_RESPONSE_ID: img_generation_response = cast( list[ImageGenerationResponse], packet.response diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index 3ba8e7a42..529090eda 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -33,11 +33,6 @@ DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak" # Note this is not in any of the deployment configs yet CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 0) CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 0) -# Whether the LLM should evaluate all of the document chunks passed in for usefulness -# in relation to the user query -DISABLE_LLM_CHUNK_FILTER = ( - os.environ.get("DISABLE_LLM_CHUNK_FILTER", "").lower() == "true" -) # Whether the LLM should be used to decide if a search would help given the chat history DISABLE_LLM_CHOOSE_SEARCH = ( os.environ.get("DISABLE_LLM_CHOOSE_SEARCH", "").lower() == "true" @@ -64,6 +59,7 @@ HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.62))) TITLE_CONTENT_RATIO = max( 0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.20)) ) + # A list of languages passed to the LLM to rephase the query # For example "English,French,Spanish", be sure to use the "," separator MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None @@ -76,16 +72,16 @@ LANGUAGE_CHAT_NAMING_HINT = ( or "The name of the conversation must be in the same language as the user query." ) - # Agentic search takes significantly more tokens and therefore has much higher cost. # This configuration allows users to get a search-only experience with instant results # and no involvement from the LLM. # Additionally, some LLM providers have strict rate limits which may prohibit # sending many API requests at once (as is done in agentic search). -DISABLE_AGENTIC_SEARCH = ( - os.environ.get("DISABLE_AGENTIC_SEARCH") or "false" -).lower() == "true" - +# Whether the LLM should evaluate all of the document chunks passed in for usefulness +# in relation to the user query +DISABLE_LLM_DOC_RELEVANCE = ( + os.environ.get("DISABLE_LLM_DOC_RELEVANCE", "").lower() == "true" +) # Stops streaming answers back to the UI if this pattern is seen: STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index c057aca6b..2ec04b96a 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -16,7 +16,7 @@ from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session from danswer.auth.schemas import UserRole -from danswer.chat.models import LLMRelevanceSummaryResponse +from danswer.chat.models import DocumentRelevance from danswer.configs.chat_configs import HARD_DELETE_CHATS from danswer.configs.constants import MessageType from danswer.db.models import ChatMessage @@ -541,11 +541,11 @@ def get_doc_query_identifiers_from_model( def update_search_docs_table_with_relevance( db_session: Session, reference_db_search_docs: list[SearchDoc], - relevance_summary: LLMRelevanceSummaryResponse, + relevance_summary: DocumentRelevance, ) -> None: for search_doc in reference_db_search_docs: relevance_data = relevance_summary.relevance_summaries.get( - f"{search_doc.document_id}-{search_doc.chunk_ind}" + search_doc.document_id ) if relevance_data is not None: db_session.execute( diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index d7cf6ea37..da5ccc4e9 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -483,6 +483,7 @@ class Answer: ] elif message.id == FINAL_CONTEXT_DOCUMENTS: final_context_docs = cast(list[LlmDoc], message.response) + elif ( message.id == SEARCH_DOC_CONTENT_ID and not self._return_contexts diff --git a/backend/danswer/llm/answering/prune_and_merge.py b/backend/danswer/llm/answering/prune_and_merge.py index a31014b7d..b4bd4a348 100644 --- a/backend/danswer/llm/answering/prune_and_merge.py +++ b/backend/danswer/llm/answering/prune_and_merge.py @@ -28,6 +28,9 @@ logger = setup_logger() T = TypeVar("T", bound=LlmDoc | InferenceChunk | InferenceSection) _METADATA_TOKEN_ESTIMATE = 75 +# Title and additional tokens as part of the tool message json +# this is only used to log a warning so we can be more forgiving with the buffer +_OVERCOUNT_ESTIMATE = 256 class PruningError(Exception): @@ -179,10 +182,18 @@ def _apply_pruning( and section_token_count > DOC_EMBEDDING_CONTEXT_SIZE + _METADATA_TOKEN_ESTIMATE ): - logger.warning( - "Found more tokens in Section than expected, " - "likely mismatch between embedding and LLM tokenizers. Trimming content..." - ) + if ( + section_token_count + > DOC_EMBEDDING_CONTEXT_SIZE + + _METADATA_TOKEN_ESTIMATE + + _OVERCOUNT_ESTIMATE + ): + # If the section is just a little bit over, it is likely due to the additional tool message tokens + # no need to record this, the content will be trimmed just in case + logger.info( + "Found more tokens in Section than expected, " + "likely mismatch between embedding and LLM tokenizers. Trimming content..." + ) section.combined_content = tokenizer_trim_content( content=section.combined_content, desired_length=DOC_EMBEDDING_CONTEXT_SIZE, diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index 63ac13012..2c090d248 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -9,10 +9,12 @@ from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes +from danswer.chat.models import DocumentRelevance from danswer.chat.models import LLMRelevanceFilterResponse -from danswer.chat.models import LLMRelevanceSummaryResponse from danswer.chat.models import QADocsResponse +from danswer.chat.models import RelevanceAnalysis from danswer.chat.models import StreamingError +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT from danswer.configs.chat_configs import QA_TIMEOUT from danswer.configs.constants import MessageType @@ -39,18 +41,17 @@ from danswer.one_shot_answer.models import DirectQARequest from danswer.one_shot_answer.models import OneShotQAResponse from danswer.one_shot_answer.models import QueryRephrase from danswer.one_shot_answer.qa_utils import combine_message_thread +from danswer.search.enums import LLMEvaluationType from danswer.search.models import RerankMetricsContainer from danswer.search.models import RetrievalMetricsContainer from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.search.utils import dedupe_documents -from danswer.search.utils import drop_llm_indices from danswer.secondary_llm_flows.answer_validation import get_answer_validity from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.utils import get_json_line from danswer.tools.force import ForceUseTool from danswer.tools.search.search_tool import SEARCH_DOC_CONTENT_ID -from danswer.tools.search.search_tool import SEARCH_EVALUATION_ID from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID from danswer.tools.search.search_tool import SearchResponseSummary from danswer.tools.search.search_tool import SearchTool @@ -74,7 +75,7 @@ AnswerObjectIterator = Iterator[ | ChatMessageDetail | CitationInfo | ToolCallKickoff - | LLMRelevanceSummaryResponse + | DocumentRelevance ] @@ -180,10 +181,15 @@ def stream_answer_objects( max_tokens=max_document_tokens, use_sections=query_req.chunks_above > 0 or query_req.chunks_below > 0, ) + print("EVALLLUATINO") + print(query_req.evaluation_type) search_tool = SearchTool( db_session=db_session, user=user, + evaluation_type=LLMEvaluationType.SKIP + if DISABLE_LLM_DOC_RELEVANCE + else query_req.evaluation_type, persona=chat_session.persona, retrieval_options=query_req.retrieval_options, prompt_config=prompt_config, @@ -194,7 +200,6 @@ def stream_answer_objects( chunks_below=query_req.chunks_below, full_doc=query_req.full_doc, bypass_acl=bypass_acl, - llm_doc_eval=query_req.llm_doc_eval, ) answer_config = AnswerStyleConfig( @@ -223,7 +228,6 @@ def stream_answer_objects( ) # won't be any ImageGenerationDisplay responses since that tool is never passed in - dropped_inds: list[int] = [] for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): # for one-shot flow, don't currently do anything with these @@ -266,20 +270,18 @@ def stream_answer_objects( yield packet.response elif packet.id == SECTION_RELEVANCE_LIST_ID: - chunk_indices = packet.response + document_based_response = {} - if reference_db_search_docs is not None and dropped_inds: - chunk_indices = drop_llm_indices( - llm_indices=chunk_indices, - search_docs=reference_db_search_docs, - dropped_indices=dropped_inds, - ) + if packet.response is not None: + for evaluation in packet.response: + document_based_response[ + evaluation.document_id + ] = RelevanceAnalysis( + relevant=evaluation.relevant, content=evaluation.content + ) - yield LLMRelevanceFilterResponse(relevant_chunk_indices=packet.response) - - elif packet.id == SEARCH_EVALUATION_ID: - evaluation_response = LLMRelevanceSummaryResponse( - relevance_summaries=packet.response + evaluation_response = DocumentRelevance( + relevance_summaries=document_based_response ) if reference_db_search_docs is not None: update_search_docs_table_with_relevance( diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index d10986d21..c1a3ab0aa 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -9,6 +9,7 @@ from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes from danswer.chat.models import QADocsResponse from danswer.configs.constants import MessageType +from danswer.search.enums import LLMEvaluationType from danswer.search.models import ChunkContext from danswer.search.models import RetrievalDetails @@ -27,11 +28,11 @@ class DirectQARequest(ChunkContext): messages: list[ThreadMessage] prompt_id: int | None persona_id: int - agentic: bool | None = None retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) # This is to forcibly skip (or run) the step, if None it uses the system defaults skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None + evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED + chain_of_thought: bool = False return_contexts: bool = False @@ -40,11 +41,6 @@ class DirectQARequest(ChunkContext): # will also disable Thread-based Rewording if specified query_override: str | None = None - # This is to toggle agentic evaluation: - # 1. Evaluates whether each response is relevant or not - # 2. Provides a summary of the document's relevance in the resulsts - llm_doc_eval: bool = False - # If True, skips generative an AI response to the search query skip_gen_ai_answer_generation: bool = False diff --git a/backend/danswer/prompts/agentic_evaluation.py b/backend/danswer/prompts/agentic_evaluation.py index bf8852cf8..546f40c7f 100644 --- a/backend/danswer/prompts/agentic_evaluation.py +++ b/backend/danswer/prompts/agentic_evaluation.py @@ -28,7 +28,8 @@ True or False """ AGENTIC_SEARCH_USER_PROMPT = """ -Document: + +Document Title: {title}{optional_metadata} ``` {content} ``` diff --git a/backend/danswer/search/enums.py b/backend/danswer/search/enums.py index 9ba44ada2..cced52d08 100644 --- a/backend/danswer/search/enums.py +++ b/backend/danswer/search/enums.py @@ -4,13 +4,6 @@ search/models.py imports from db/models.py.""" from enum import Enum -class OptionalSearchSetting(str, Enum): - ALWAYS = "always" - NEVER = "never" - # Determine whether to run search based on history and latest query - AUTO = "auto" - - class RecencyBiasSetting(str, Enum): FAVOR_RECENT = "favor_recent" # 2x decay rate BASE_DECAY = "base_decay" @@ -19,12 +12,26 @@ class RecencyBiasSetting(str, Enum): AUTO = "auto" +class OptionalSearchSetting(str, Enum): + ALWAYS = "always" + NEVER = "never" + # Determine whether to run search based on history and latest query + AUTO = "auto" + + class SearchType(str, Enum): KEYWORD = "keyword" SEMANTIC = "semantic" HYBRID = "hybrid" +class LLMEvaluationType(str, Enum): + AGENTIC = "agentic" # applies agentic evaluation + BASIC = "basic" # applies boolean evaluation + SKIP = "skip" # skips evaluation + UNSPECIFIED = "unspecified" # reverts to default + + class QueryFlow(str, Enum): SEARCH = "search" QUESTION_ANSWER = "question-answer" diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index a6eb3cd4d..8797d583f 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -6,13 +6,13 @@ from pydantic import validator from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW -from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER from danswer.configs.chat_configs import HYBRID_ALPHA from danswer.configs.chat_configs import NUM_RERANKED_RESULTS from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.constants import DocumentSource from danswer.db.models import Persona from danswer.indexing.models import BaseChunk +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import OptionalSearchSetting from danswer.search.enums import SearchType from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW @@ -78,7 +78,7 @@ class SearchRequest(ChunkContext): hybrid_alpha: float = HYBRID_ALPHA # This is to forcibly skip (or run) the step, if None it uses the system defaults skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None + evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED class Config: arbitrary_types_allowed = True @@ -88,11 +88,11 @@ class SearchQuery(ChunkContext): query: str filters: IndexFilters recency_bias_multiplier: float + evaluation_type: LLMEvaluationType num_hits: int = NUM_RETURNED_HITS offset: int = 0 search_type: SearchType = SearchType.HYBRID skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW - skip_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER # Only used if not skip_rerank num_rerank: int | None = NUM_RERANKED_RESULTS # Only used if not skip_llm_chunk_filter diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py index 7be81983b..7767f31d7 100644 --- a/backend/danswer/search/pipeline.py +++ b/backend/danswer/search/pipeline.py @@ -5,17 +5,19 @@ from typing import cast from sqlalchemy.orm import Session -from danswer.chat.models import RelevanceChunk -from danswer.configs.chat_configs import DISABLE_AGENTIC_SEARCH +from danswer.chat.models import SectionRelevancePiece +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.models import User from danswer.document_index.factory import get_default_document_index from danswer.llm.answering.models import DocumentPruningConfig from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prune_and_merge import _merge_sections from danswer.llm.answering.prune_and_merge import ChunkRange from danswer.llm.answering.prune_and_merge import merge_chunk_intervals from danswer.llm.interfaces import LLM +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType from danswer.search.models import IndexFilters @@ -29,6 +31,7 @@ from danswer.search.postprocessing.postprocessing import search_postprocessing from danswer.search.preprocessing.preprocessing import retrieval_preprocessing from danswer.search.retrieval.search_runner import retrieve_chunks from danswer.search.utils import inference_section_from_chunks +from danswer.search.utils import relevant_sections_to_indices from danswer.secondary_llm_flows.agentic_evaluation import evaluate_inference_section from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import FunctionCall @@ -84,11 +87,13 @@ class SearchPipeline: # Reranking and LLM section selection can be run together # If only LLM selection is on, the reranked chunks are yielded immediatly self._reranked_sections: list[InferenceSection] | None = None - self._relevant_section_indices: list[int] | None = None + self._final_context_sections: list[InferenceSection] | None = None + + self._section_relevance: list[SectionRelevancePiece] | None = None # Generates reranked chunks and LLM selections self._postprocessing_generator: ( - Iterator[list[InferenceSection] | list[int]] | None + Iterator[list[InferenceSection] | list[SectionRelevancePiece]] | None ) = None """Pre-processing""" @@ -332,44 +337,66 @@ class SearchPipeline: return self._reranked_sections @property - def relevant_section_indices(self) -> list[int]: - if self._relevant_section_indices is not None: - return self._relevant_section_indices + def final_context_sections(self) -> list[InferenceSection]: + if self._final_context_sections is not None: + return self._final_context_sections - self._relevant_section_indices = next( - cast(Iterator[list[int]], self._postprocessing_generator) - ) - return self._relevant_section_indices + self._final_context_sections = _merge_sections(sections=self.reranked_sections) + return self._final_context_sections @property - def relevance_summaries(self) -> dict[str, RelevanceChunk]: - if DISABLE_AGENTIC_SEARCH: + def section_relevance(self) -> list[SectionRelevancePiece] | None: + if self._section_relevance is not None: + return self._section_relevance + + if ( + self.search_query.evaluation_type == LLMEvaluationType.SKIP + or DISABLE_LLM_DOC_RELEVANCE + ): + return None + + if self.search_query.evaluation_type == LLMEvaluationType.UNSPECIFIED: raise ValueError( - "Agentic saerch operation called while DISABLE_AGENTIC_SEARCH is toggled" + "Attempted to access section relevance scores on search query with evaluation type `UNSPECIFIED`." + + "The search query evaluation type should have been specified." ) - if len(self.reranked_sections) == 0: - logger.warning( - "No sections found in agentic search evalution. Returning empty dict." + + if self.search_query.evaluation_type == LLMEvaluationType.AGENTIC: + sections = self.final_context_sections + functions = [ + FunctionCall( + evaluate_inference_section, + (section, self.search_query.query, self.llm), + ) + for section in sections + ] + results = run_functions_in_parallel(function_calls=functions) + self._section_relevance = list(results.values()) + + elif self.search_query.evaluation_type == LLMEvaluationType.BASIC: + if DISABLE_LLM_DOC_RELEVANCE: + raise ValueError( + "Basic search evaluation operation called while DISABLE_LLM_DOC_RELEVANCE is enabled." + ) + self._section_relevance = next( + cast( + Iterator[list[SectionRelevancePiece]], + self._postprocessing_generator, + ) ) - return {} - sections = self.reranked_sections - functions = [ - FunctionCall( - evaluate_inference_section, (section, self.search_query.query, self.llm) + else: + # All other cases should have been handled above + raise ValueError( + f"Unexpected evaluation type: {self.search_query.evaluation_type}" ) - for section in sections - ] - results = run_functions_in_parallel(function_calls=functions) - - return { - next(iter(value)): value[next(iter(value))] for value in results.values() - } + return self._section_relevance @property def section_relevance_list(self) -> list[bool]: - return [ - True if ind in self.relevant_section_indices else False - for ind in range(len(self.reranked_sections)) - ] + llm_indices = relevant_sections_to_indices( + relevance_sections=self.section_relevance, + inference_sections=self.final_context_sections, + ) + return [ind in llm_indices for ind in range(len(self.final_context_sections))] diff --git a/backend/danswer/search/postprocessing/postprocessing.py b/backend/danswer/search/postprocessing/postprocessing.py index 0aacce71a..5333ca3fd 100644 --- a/backend/danswer/search/postprocessing/postprocessing.py +++ b/backend/danswer/search/postprocessing/postprocessing.py @@ -4,6 +4,7 @@ from typing import cast import numpy +from danswer.chat.models import SectionRelevancePiece from danswer.configs.app_configs import BLURB_SIZE from danswer.configs.constants import RETURN_SEPARATOR from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX @@ -15,6 +16,7 @@ from danswer.llm.interfaces import LLM from danswer.natural_language_processing.search_nlp_models import ( CrossEncoderEnsembleModel, ) +from danswer.search.enums import LLMEvaluationType from danswer.search.models import ChunkMetric from danswer.search.models import InferenceChunk from danswer.search.models import InferenceChunkUncleaned @@ -48,10 +50,6 @@ def should_rerank(query: SearchQuery) -> bool: return query.search_type != SearchType.KEYWORD and not query.skip_rerank -def should_apply_llm_based_relevance_filter(query: SearchQuery) -> bool: - return not query.skip_llm_chunk_filter - - def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]: def _remove_title(chunk: InferenceChunkUncleaned) -> str: if not chunk.title or not chunk.content: @@ -233,7 +231,7 @@ def search_postprocessing( retrieved_sections: list[InferenceSection], llm: LLM, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, -) -> Iterator[list[InferenceSection] | list[int]]: +) -> Iterator[list[InferenceSection] | list[SectionRelevancePiece]]: post_processing_tasks: list[FunctionCall] = [] if not retrieved_sections: @@ -265,7 +263,10 @@ def search_postprocessing( sections_yielded = True llm_filter_task_id = None - if should_apply_llm_based_relevance_filter(search_query): + if search_query.evaluation_type in [ + LLMEvaluationType.BASIC, + LLMEvaluationType.UNSPECIFIED, + ]: post_processing_tasks.append( FunctionCall( filter_sections, @@ -306,7 +307,11 @@ def search_postprocessing( ) yield [ - index - for index, section in enumerate(reranked_sections or retrieved_sections) - if section.center_chunk.unique_id in llm_selected_section_ids + SectionRelevancePiece( + document_id=section.center_chunk.document_id, + chunk_id=section.center_chunk.chunk_id, + relevant=section.center_chunk.unique_id in llm_selected_section_ids, + content="", + ) + for section in (reranked_sections or retrieved_sections) ] diff --git a/backend/danswer/search/preprocessing/preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py index bb2449efe..9d82f1d76 100644 --- a/backend/danswer/search/preprocessing/preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -1,11 +1,12 @@ from sqlalchemy.orm import Session from danswer.configs.chat_configs import BASE_RECENCY_DECAY -from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.db.models import User from danswer.llm.interfaces import LLM +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import QueryFlow from danswer.search.enums import RecencyBiasSetting from danswer.search.models import BaseFilters @@ -35,7 +36,6 @@ def retrieval_preprocessing( db_session: Session, bypass_acl: bool = False, include_query_intent: bool = True, - disable_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER, base_recency_decay: float = BASE_RECENCY_DECAY, favor_recent_decay_multiplier: float = FAVOR_RECENT_DECAY_MULTIPLIER, ) -> tuple[SearchQuery, SearchType | None, QueryFlow | None]: @@ -137,18 +137,23 @@ def retrieval_preprocessing( access_control_list=user_acl_filters, ) - llm_chunk_filter = False - if search_request.skip_llm_chunk_filter is not None: - llm_chunk_filter = not search_request.skip_llm_chunk_filter - elif persona: - llm_chunk_filter = persona.llm_relevance_filter + llm_evaluation_type = LLMEvaluationType.BASIC + if search_request.evaluation_type is not LLMEvaluationType.UNSPECIFIED: + llm_evaluation_type = search_request.evaluation_type - if disable_llm_chunk_filter: - if llm_chunk_filter: + elif persona: + llm_evaluation_type = ( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ) + + if DISABLE_LLM_DOC_RELEVANCE: + if llm_evaluation_type: logger.info( "LLM chunk filtering would have run but has been globally disabled" ) - llm_chunk_filter = False + llm_evaluation_type = LLMEvaluationType.SKIP skip_rerank = search_request.skip_rerank if skip_rerank is None: @@ -176,7 +181,7 @@ def retrieval_preprocessing( num_hits=limit if limit is not None else NUM_RETURNED_HITS, offset=offset or 0, skip_rerank=skip_rerank, - skip_llm_chunk_filter=not llm_chunk_filter, + evaluation_type=llm_evaluation_type, chunks_above=search_request.chunks_above, chunks_below=search_request.chunks_below, full_doc=search_request.full_doc, diff --git a/backend/danswer/search/utils.py b/backend/danswer/search/utils.py index 8b138d2e9..38ca2559e 100644 --- a/backend/danswer/search/utils.py +++ b/backend/danswer/search/utils.py @@ -1,6 +1,7 @@ from collections.abc import Sequence from typing import TypeVar +from danswer.chat.models import SectionRelevancePiece from danswer.db.models import SearchDoc as DBSearchDoc from danswer.search.models import InferenceChunk from danswer.search.models import InferenceSection @@ -37,6 +38,46 @@ def dedupe_documents(items: list[T]) -> tuple[list[T], list[int]]: return deduped_items, dropped_indices +def relevant_sections_to_indices( + relevance_sections: list[SectionRelevancePiece] | None, + inference_sections: list[InferenceSection], +) -> list[int]: + if relevance_sections is None: + return [] + + relevant_set = { + (chunk.document_id, chunk.chunk_id) + for chunk in relevance_sections + if chunk.relevant + } + relevant_indices = [ + index + for index, section in enumerate(inference_sections) + if (section.center_chunk.document_id, section.center_chunk.chunk_id) + in relevant_set + ] + return relevant_indices + + +def relevant_documents_to_indices( + relevance_sections: list[SectionRelevancePiece] | None, search_docs: list[SearchDoc] +) -> list[int]: + if relevance_sections is None: + return [] + + relevant_set = { + (chunk.document_id, chunk.chunk_id) + for chunk in relevance_sections + if chunk.relevant + } + + return [ + index + for index, section in enumerate(search_docs) + if (section.document_id, section.chunk_ind) in relevant_set + ] + + def drop_llm_indices( llm_indices: list[int], search_docs: Sequence[DBSearchDoc | SavedSearchDoc], diff --git a/backend/danswer/secondary_llm_flows/agentic_evaluation.py b/backend/danswer/secondary_llm_flows/agentic_evaluation.py index c35bf0542..3de9db00b 100644 --- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py +++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py @@ -1,6 +1,6 @@ import re -from danswer.chat.models import RelevanceChunk +from danswer.chat.models import SectionRelevancePiece from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string @@ -13,7 +13,7 @@ logger = setup_logger() def _get_agent_eval_messages( - title: str, content: str, query: str + title: str, content: str, query: str, center_metadata: str ) -> list[dict[str, str]]: messages = [ { @@ -23,7 +23,10 @@ def _get_agent_eval_messages( { "role": "user", "content": AGENTIC_SEARCH_USER_PROMPT.format( - title=title, content=content, query=query + title=title, + content=content, + query=query, + optional_metadata=center_metadata, ), }, ] @@ -32,16 +35,27 @@ def _get_agent_eval_messages( def evaluate_inference_section( document: InferenceSection, query: str, llm: LLM -) -> dict[str, RelevanceChunk]: - results = {} +) -> SectionRelevancePiece: + def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str: + metadata_str = "\n\nMetadata:\n" + for key, value in metadata.items(): + value_str = ", ".join(value) if isinstance(value, list) else value + metadata_str += f"{key} - {value_str}\n" + + # Since there is now multiple sections, add this prefix for clarity + return metadata_str + "\nContent:" document_id = document.center_chunk.document_id semantic_id = document.center_chunk.semantic_identifier contents = document.combined_content - chunk_id = document.center_chunk.chunk_id + center_metadata = document.center_chunk.metadata + center_metadata_str = _get_metadata_str(center_metadata) if center_metadata else "" messages = _get_agent_eval_messages( - title=semantic_id, content=contents, query=query + title=semantic_id, + content=contents, + query=query, + center_metadata=center_metadata_str, ) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) model_output = message_to_string(llm.invoke(filled_llm_prompt)) @@ -64,7 +78,9 @@ def evaluate_inference_section( ) relevant = last_line.strip().lower().startswith("true") - results[f"{document_id}-{chunk_id}"] = RelevanceChunk( - relevant=relevant, content=analysis + return SectionRelevancePiece( + document_id=document_id, + chunk_id=document.center_chunk.chunk_id, + relevant=relevant, + content=analysis, ) - return results diff --git a/backend/danswer/secondary_llm_flows/chunk_usefulness.py b/backend/danswer/secondary_llm_flows/chunk_usefulness.py index b672a563d..7dd0b93ec 100644 --- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py +++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py @@ -1,5 +1,6 @@ from collections.abc import Callable +from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string @@ -45,6 +46,12 @@ def llm_eval_section(query: str, section_content: str, llm: LLM) -> bool: def llm_batch_eval_sections( query: str, section_contents: list[str], llm: LLM, use_threads: bool = True ) -> list[bool]: + if DISABLE_LLM_DOC_RELEVANCE: + raise RuntimeError( + "LLM Doc Relevance is globally disabled, " + "this should have been caught upstream." + ) + if use_threads: functions_with_args: list[tuple[Callable, tuple]] = [ (llm_eval_section, (query, section_content, llm)) diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index e7e1ca493..556d1b195 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -261,6 +261,7 @@ def get_answer_with_quote( query = query_request.messages[0].message logger.info(f"Received query for one shot answer with quotes: {query}") + packets = stream_search_answer( query_req=query_request, user=user, diff --git a/backend/danswer/tools/images/image_generation_tool.py b/backend/danswer/tools/images/image_generation_tool.py index 3b798c43d..8b100df03 100644 --- a/backend/danswer/tools/images/image_generation_tool.py +++ b/backend/danswer/tools/images/image_generation_tool.py @@ -162,20 +162,43 @@ class ImageGenerationTool(Tool): ) def _generate_image(self, prompt: str) -> ImageGenerationResponse: - response = image_generation( - prompt=prompt, - model=self.model, - api_key=self.api_key, - # need to pass in None rather than empty str - api_base=self.api_base or None, - api_version=self.api_version or None, - n=1, - extra_headers=build_llm_extra_headers(self.additional_headers), - ) - return ImageGenerationResponse( - revised_prompt=response.data[0]["revised_prompt"], - url=response.data[0]["url"], - ) + try: + response = image_generation( + prompt=prompt, + model=self.model, + api_key=self.api_key, + # need to pass in None rather than empty str + api_base=self.api_base or None, + api_version=self.api_version or None, + n=1, + extra_headers=build_llm_extra_headers(self.additional_headers), + ) + return ImageGenerationResponse( + revised_prompt=response.data[0]["revised_prompt"], + url=response.data[0]["url"], + ) + except Exception as e: + logger.debug(f"Error occured during image generation: {e}") + + error_message = str(e) + if "OpenAIException" in str(type(e)): + if ( + "Your request was rejected as a result of our safety system" + in error_message + ): + raise ValueError( + "The image generation request was rejected due to OpenAI's content policy. Please try a different prompt." + ) + elif "Invalid image URL" in error_message: + raise ValueError("Invalid image URL provided for image generation.") + elif "invalid_request_error" in error_message: + raise ValueError( + "Invalid request for image generation. Please check your input." + ) + + raise ValueError( + "An error occurred during image generation. Please try again later." + ) def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: prompt = cast(str, kwargs["prompt"]) diff --git a/backend/danswer/tools/search/search_tool.py b/backend/danswer/tools/search/search_tool.py index 78be26e88..a0ae64c46 100644 --- a/backend/danswer/tools/search/search_tool.py +++ b/backend/danswer/tools/search/search_tool.py @@ -10,7 +10,6 @@ from danswer.chat.chat_utils import llm_doc_from_inference_section from danswer.chat.models import DanswerContext from danswer.chat.models import DanswerContexts from danswer.chat.models import LlmDoc -from danswer.configs.chat_configs import DISABLE_AGENTIC_SEARCH from danswer.db.models import Persona from danswer.db.models import User from danswer.dynamic_configs.interface import JSON_ro @@ -18,7 +17,9 @@ from danswer.llm.answering.models import DocumentPruningConfig from danswer.llm.answering.models import PreviousMessage from danswer.llm.answering.models import PromptConfig from danswer.llm.answering.prune_and_merge import prune_and_merge_sections +from danswer.llm.answering.prune_and_merge import prune_sections from danswer.llm.interfaces import LLM +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import QueryFlow from danswer.search.enums import SearchType from danswer.search.models import IndexFilters @@ -78,6 +79,7 @@ class SearchTool(Tool): llm: LLM, fast_llm: LLM, pruning_config: DocumentPruningConfig, + evaluation_type: LLMEvaluationType, # if specified, will not actually run a search and will instead return these # sections. Used when the user selects specific docs to talk to selected_sections: list[InferenceSection] | None = None, @@ -85,7 +87,6 @@ class SearchTool(Tool): chunks_below: int = 0, full_doc: bool = False, bypass_acl: bool = False, - llm_doc_eval: bool = False, ) -> None: self.user = user self.persona = persona @@ -94,6 +95,7 @@ class SearchTool(Tool): self.llm = llm self.fast_llm = fast_llm self.pruning_config = pruning_config + self.evaluation_type = evaluation_type self.selected_sections = selected_sections @@ -102,7 +104,6 @@ class SearchTool(Tool): self.full_doc = full_doc self.bypass_acl = bypass_acl self.db_session = db_session - self.llm_doc_eval = llm_doc_eval @property def name(self) -> str: @@ -205,10 +206,12 @@ class SearchTool(Tool): question=query, document_pruning_config=self.pruning_config, ) + llm_docs = [ llm_doc_from_inference_section(section) for section in final_context_sections ] + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: @@ -221,6 +224,7 @@ class SearchTool(Tool): search_pipeline = SearchPipeline( search_request=SearchRequest( query=query, + evaluation_type=self.evaluation_type, human_selected_filters=( self.retrieval_options.filters if self.retrieval_options else None ), @@ -251,7 +255,7 @@ class SearchTool(Tool): id=SEARCH_RESPONSE_SUMMARY_ID, response=SearchResponseSummary( rephrased_query=query, - top_sections=search_pipeline.reranked_sections, + top_sections=search_pipeline.final_context_sections, predicted_flow=search_pipeline.predicted_flow, predicted_search=search_pipeline.predicted_search_type, final_filters=search_pipeline.search_query.filters, @@ -276,11 +280,11 @@ class SearchTool(Tool): yield ToolResponse( id=SECTION_RELEVANCE_LIST_ID, - response=search_pipeline.relevant_section_indices, + response=search_pipeline.section_relevance, ) - final_context_sections = prune_and_merge_sections( - sections=search_pipeline.reranked_sections, + pruned_sections = prune_sections( + sections=search_pipeline.final_context_sections, section_relevance_list=search_pipeline.section_relevance_list, prompt_config=self.prompt_config, llm_config=self.llm.config, @@ -289,17 +293,11 @@ class SearchTool(Tool): ) llm_docs = [ - llm_doc_from_inference_section(section) - for section in final_context_sections + llm_doc_from_inference_section(section) for section in pruned_sections ] yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) - if self.llm_doc_eval and not DISABLE_AGENTIC_SEARCH: - yield ToolResponse( - id=SEARCH_EVALUATION_ID, response=search_pipeline.relevance_summaries - ) - def final_result(self, *args: ToolResponse) -> JSON_ro: final_docs = cast( list[LlmDoc], diff --git a/backend/ee/danswer/server/query_and_chat/models.py b/backend/ee/danswer/server/query_and_chat/models.py index b0116f804..0c5f4a7ef 100644 --- a/backend/ee/danswer/server/query_and_chat/models.py +++ b/backend/ee/danswer/server/query_and_chat/models.py @@ -1,6 +1,7 @@ from pydantic import BaseModel from danswer.configs.constants import DocumentSource +from danswer.search.enums import LLMEvaluationType from danswer.search.enums import SearchType from danswer.search.models import ChunkContext from danswer.search.models import RetrievalDetails @@ -21,9 +22,9 @@ class DocumentSearchRequest(ChunkContext): search_type: SearchType retrieval_options: RetrievalDetails recency_bias_multiplier: float = 1.0 + evaluation_type: LLMEvaluationType # This is to forcibly skip (or run) the step, if None it uses the system defaults skip_rerank: bool | None = None - skip_llm_chunk_filter: bool | None = None class BasicCreateChatMessageRequest(ChunkContext): diff --git a/backend/ee/danswer/server/query_and_chat/query_backend.py b/backend/ee/danswer/server/query_and_chat/query_backend.py index 6edc067b6..8c0c23286 100644 --- a/backend/ee/danswer/server/query_and_chat/query_backend.py +++ b/backend/ee/danswer/server/query_and_chat/query_backend.py @@ -1,3 +1,5 @@ +from typing import cast + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException @@ -9,7 +11,9 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTA from danswer.danswerbot.slack.handlers.handle_standard_answers import ( oneoff_standard_answers, ) +from danswer.db.chat import translate_db_search_doc_to_server_search_doc from danswer.db.engine import get_session +from danswer.db.models import SearchDoc from danswer.db.models import User from danswer.db.persona import get_persona_by_id from danswer.llm.answering.prompts.citations_prompt import ( @@ -27,6 +31,7 @@ from danswer.search.models import SearchRequest from danswer.search.pipeline import SearchPipeline from danswer.search.utils import dedupe_documents from danswer.search.utils import drop_llm_indices +from danswer.search.utils import relevant_documents_to_indices from danswer.utils.logger import setup_logger from ee.danswer.server.query_and_chat.models import DocumentSearchRequest from ee.danswer.server.query_and_chat.models import StandardAnswerRequest @@ -63,7 +68,7 @@ def handle_search_request( offset=search_request.retrieval_options.offset, limit=search_request.retrieval_options.limit, skip_rerank=search_request.skip_rerank, - skip_llm_chunk_filter=search_request.skip_llm_chunk_filter, + evaluation_type=search_request.evaluation_type, chunks_above=search_request.chunks_above, chunks_below=search_request.chunks_below, full_doc=search_request.full_doc, @@ -75,8 +80,7 @@ def handle_search_request( bypass_acl=False, ) top_sections = search_pipeline.reranked_sections - # If using surrounding context or full doc, this will be empty - relevant_section_indices = search_pipeline.relevant_section_indices + relevance_sections = search_pipeline.section_relevance top_docs = [ SavedSearchDocWithContent( document_id=section.center_chunk.document_id, @@ -105,19 +109,26 @@ def handle_search_request( # Deduping happens at the last step to avoid harming quality by dropping content early on deduped_docs = top_docs dropped_inds = None + if search_request.retrieval_options.dedupe_docs: deduped_docs, dropped_inds = dedupe_documents(top_docs) + llm_indices = relevant_documents_to_indices( + relevance_sections=relevance_sections, + search_docs=[ + translate_db_search_doc_to_server_search_doc(cast(SearchDoc, doc)) + for doc in deduped_docs + ], + ) + if dropped_inds: - relevant_section_indices = drop_llm_indices( - llm_indices=relevant_section_indices, + llm_indices = drop_llm_indices( + llm_indices=llm_indices, search_docs=deduped_docs, dropped_indices=dropped_inds, ) - return DocumentSearchResponse( - top_documents=deduped_docs, llm_indices=relevant_section_indices - ) + return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices) @basic_router.post("/answer-with-quote") diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index ec1c7b525..cd0d700c0 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -44,14 +44,13 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} - BING_API_KEY=${BING_API_KEY:-} - - DISABLE_AGENTIC_SEARCH=${DISABLE_AGENTIC_SEARCH:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} # if set, allows for the use of the token budget system - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-} # Enables the use of bedrock models @@ -129,7 +128,6 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} @@ -230,7 +228,7 @@ services: - INTERNAL_URL=http://api_server:8080 - WEB_DOMAIN=${WEB_DOMAIN:-} - THEME_IS_DARK=${THEME_IS_DARK:-} - - DISABLE_AGENTIC_SEARCH=${DISABLE_AGENTIC_SEARCH:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 2a0495958..ab277abde 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -41,7 +41,7 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} @@ -122,7 +122,7 @@ services: - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} diff --git a/deployment/docker_compose/env.multilingual.template b/deployment/docker_compose/env.multilingual.template index e6059c6ae..05e5dac51 100644 --- a/deployment/docker_compose/env.multilingual.template +++ b/deployment/docker_compose/env.multilingual.template @@ -24,7 +24,7 @@ NORMALIZE_EMBEDDINGS="True" # Use LLM to determine if chunks are relevant to the query # May not work well for languages that do not have much training data in the LLM training set # If using a common language like Spanish, French, Chinese, etc. this can be kept turned on -DISABLE_LLM_CHUNK_FILTER="True" +DISABLE_LLM_DOC_RELEVANCE="True" # The default reranking models are English first # There are no great quality French/English reranking models currently so turning this off diff --git a/deployment/helm/values.yaml b/deployment/helm/values.yaml index bb41a7511..c2195465e 100644 --- a/deployment/helm/values.yaml +++ b/deployment/helm/values.yaml @@ -402,7 +402,7 @@ configMap: GEN_AI_MAX_TOKENS: "" QA_TIMEOUT: "60" MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_CHUNK_FILTER: "" + DISABLE_LLM_DOC_RELEVANCE: "" DISABLE_LLM_CHOOSE_SEARCH: "" DISABLE_LLM_QUERY_REPHRASE: "" # Query Options diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index 81918c147..1a01c6b83 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -24,7 +24,7 @@ data: GEN_AI_MAX_TOKENS: "" QA_TIMEOUT: "60" MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_CHUNK_FILTER: "" + DISABLE_LLM_DOC_RELEVANCE: "" DISABLE_LLM_CHOOSE_SEARCH: "" DISABLE_LLM_QUERY_REPHRASE: "" # Query Options diff --git a/web/src/app/chat/modal/configuration/AssistantsTab.tsx b/web/src/app/chat/modal/configuration/AssistantsTab.tsx index 9ec402de4..a95296ef3 100644 --- a/web/src/app/chat/modal/configuration/AssistantsTab.tsx +++ b/web/src/app/chat/modal/configuration/AssistantsTab.tsx @@ -94,6 +94,7 @@ const AssistantCard = ({ ))} )} +
Default model:{" "} {getDisplayNameForModel( diff --git a/web/src/app/search/page.tsx b/web/src/app/search/page.tsx index fd242c1cb..884f6d2d2 100644 --- a/web/src/app/search/page.tsx +++ b/web/src/app/search/page.tsx @@ -36,7 +36,7 @@ import ToggleSearch from "./WrappedSearch"; import { AGENTIC_SEARCH_TYPE_COOKIE_NAME, NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN, - DISABLE_AGENTIC_SEARCH, + DISABLE_LLM_DOC_RELEVANCE, } from "@/lib/constants"; import WrappedSearch from "./WrappedSearch"; @@ -206,7 +206,7 @@ export default async function Home() { { - return ( - - - - ); + return ; }; + export const RobotIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/components/search/DateRangeSelector.tsx b/web/src/components/search/DateRangeSelector.tsx index 4ec2c4448..2dd83dc65 100644 --- a/web/src/components/search/DateRangeSelector.tsx +++ b/web/src/components/search/DateRangeSelector.tsx @@ -38,9 +38,11 @@ export const TODAY = "Today"; export function DateRangeSelector({ value, onValueChange, + isHoritontal, }: { value: DateRangePickerValue | null; onValueChange: (value: DateRangePickerValue | null) => void; + isHoritontal?: boolean; }) { return (
@@ -106,6 +108,7 @@ export function DateRangeSelector({ flex text-sm px-3 + line-clamp-1 py-1.5 rounded-lg border @@ -113,12 +116,16 @@ export function DateRangeSelector({ cursor-pointer hover:bg-hover`} > - {" "} - {value?.selectValue ? ( -
{value.selectValue}
- ) : ( - "Any time..." - )} + {" "} +

+ {value?.selectValue ? ( +

{value.selectValue}
+ ) : isHoritontal ? ( + "Date" + ) : ( + "Any time..." + )} +

{value?.selectValue ? (
void; onSearch: (fast?: boolean) => void; - searchState?: searchState; agentic?: boolean; toggleAgentic?: () => void; + ccPairs: CCPairBasicInfo[]; + documentSets: DocumentSet[]; + filterManager: any; // You might want to replace 'any' with a more specific type + finalAvailableDocumentSets: DocumentSet[]; + finalAvailableSources: string[]; + tags: Tag[]; } import { useState, useEffect, useRef } from "react"; @@ -18,6 +21,9 @@ import { Divider } from "@tremor/react"; import { CustomTooltip } from "../tooltip/CustomTooltip"; import KeyboardSymbol from "@/lib/browserUtilities"; import { SettingsContext } from "../settings/SettingsProvider"; +import { HorizontalSourceSelector, SourceSelector } from "./filtering/Filters"; +import { CCPairBasicInfo, DocumentSet, Tag } from "@/lib/types"; +import { SourceMetadata } from "@/lib/search/interfaces"; export const AnimatedToggle = ({ isOn, @@ -116,12 +122,17 @@ export const AnimatedToggle = ({ export default AnimatedToggle; export const FullSearchBar = ({ - searchState, query, setQuery, onSearch, agentic, toggleAgentic, + ccPairs, + documentSets, + filterManager, + finalAvailableDocumentSets, + finalAvailableSources, + tags, }: FullSearchBarProps) => { const handleChange = (event: ChangeEvent) => { const target = event.target; @@ -196,47 +207,44 @@ export const FullSearchBar = ({ suppressContentEditableWarning={true} /> -
- {searchState == "searching" && ( -
- Searching... -
- )} - - {searchState == "reading" && ( -
- - Reading{settings?.isMobile ? "" : " Documents"}... - -
- )} - - {searchState == "analyzing" && ( -
- - Generating{settings?.isMobile ? "" : " Analysis"}... - -
- )} - - {toggleAgentic && ( - - )} - -
- + )} +
+ {/* ccPairs, documentSets, filterManager, finalAvailableDocumentSets, finalAvailableSources, tags */} + {/*
/ */} +
+ {toggleAgentic && ( + + )} + +
+ +
diff --git a/web/src/components/search/SearchResultsDisplay.tsx b/web/src/components/search/SearchResultsDisplay.tsx index 41dd27bb4..2953bb900 100644 --- a/web/src/components/search/SearchResultsDisplay.tsx +++ b/web/src/components/search/SearchResultsDisplay.tsx @@ -1,24 +1,19 @@ "use client"; -import { removeDuplicateDocs } from "@/lib/documentUtils"; import { - DanswerDocument, DocumentRelevance, - FlowType, - Quote, - Relevance, SearchDanswerDocument, SearchDefaultOverrides, SearchResponse, - ValidQuestionResponse, } from "@/lib/search/interfaces"; import { usePopup } from "../admin/connectors/Popup"; import { AlertIcon, BroomIcon, UndoIcon } from "../icons/icons"; import { AgenticDocumentDisplay, DocumentDisplay } from "./DocumentDisplay"; import { searchState } from "./SearchSection"; -import { useEffect, useState } from "react"; +import { useContext, useEffect, useState } from "react"; import { Tooltip } from "../tooltip/Tooltip"; import KeyboardSymbol from "@/lib/browserUtilities"; +import { SettingsContext } from "../settings/SettingsProvider"; const getSelectedDocumentIds = ( documents: SearchDanswerDocument[], @@ -135,31 +130,17 @@ export const SearchResultsDisplay = ({ ); } - const dedupedQuotes: Quote[] = []; - const seen = new Set(); - if (quotes) { - quotes.forEach((quote) => { - if (!seen.has(quote.document_id)) { - dedupedQuotes.push(quote); - seen.add(quote.document_id); - } - }); - } - const selectedDocumentIds = getSelectedDocumentIds( documents || [], searchResponse.selectedDocIndices || [] ); - const relevantDocs = documents ? documents.filter((doc) => { return ( showAll || (searchResponse && searchResponse.additional_relevance && - searchResponse.additional_relevance[ - `${doc.document_id}-${doc.chunk_ind}` - ].relevant) || + searchResponse.additional_relevance[doc.document_id].relevant) || doc.is_relevant ); }) @@ -183,6 +164,7 @@ export const SearchResultsDisplay = ({ return ( <> {popup} + {documents && documents.length == 0 && (

No docs found! Ensure that you have enabled at least one connector @@ -248,9 +230,7 @@ export const SearchResultsDisplay = ({ {uniqueDocuments.map((document, ind) => { const relevance: DocumentRelevance | null = searchResponse.additional_relevance - ? searchResponse.additional_relevance[ - `${document.document_id}-${document.chunk_ind}` - ] + ? searchResponse.additional_relevance[document.document_id] : null; return agenticResults ? ( diff --git a/web/src/components/search/SearchSection.tsx b/web/src/components/search/SearchSection.tsx index 1295c9a2c..ac9ee074c 100644 --- a/web/src/components/search/SearchSection.tsx +++ b/web/src/components/search/SearchSection.tsx @@ -17,13 +17,11 @@ import { SearchDanswerDocument, } from "@/lib/search/interfaces"; import { searchRequestStreamed } from "@/lib/search/streamingQa"; - import { CancellationToken, cancellable } from "@/lib/search/cancellable"; import { useFilters, useObjectState } from "@/lib/hooks"; -import { questionValidationStreamed } from "@/lib/search/streamingQuestionValidation"; import { Persona } from "@/app/admin/assistants/interfaces"; import { computeAvailableFilters } from "@/lib/filters"; -import { redirect, useRouter, useSearchParams } from "next/navigation"; +import { useRouter, useSearchParams } from "next/navigation"; import { SettingsContext } from "../settings/SettingsProvider"; import { HistorySidebar } from "@/app/chat/sessionSidebar/HistorySidebar"; import { ChatSession, SearchSession } from "@/app/chat/interfaces"; @@ -33,13 +31,19 @@ import { SIDEBAR_TOGGLED_COOKIE_NAME } from "../resizable/constants"; import { AGENTIC_SEARCH_TYPE_COOKIE_NAME } from "@/lib/constants"; import Cookies from "js-cookie"; import FixedLogo from "@/app/chat/shared_chat_search/FixedLogo"; +import { AnswerSection } from "./results/AnswerSection"; +import { QuotesSection } from "./results/QuotesSection"; +import { QAFeedbackBlock } from "./QAFeedback"; +import { usePopup } from "../admin/connectors/Popup"; export type searchState = | "input" | "searching" | "reading" | "analyzing" - | "summarizing"; + | "summarizing" + | "generating" + | "citing"; const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = { forceDisplayQA: false, @@ -48,7 +52,6 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = { const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = { reasoning: null, - answerable: null, error: null, }; @@ -223,35 +226,48 @@ export const SearchSection = ({ additional_relevance: undefined, }; // Streaming updates - const updateCurrentAnswer = (answer: string) => + const updateCurrentAnswer = (answer: string) => { setSearchResponse((prevState) => ({ ...(prevState || initialSearchResponse), answer, })); - const updateQuotes = (quotes: Quote[]) => + + setSearchState((searchState) => { + if (searchState != "input") { + return "generating"; + } + return "input"; + }); + }; + + const updateQuotes = (quotes: Quote[]) => { setSearchResponse((prevState) => ({ ...(prevState || initialSearchResponse), quotes, })); + setSearchState((searchState) => "input"); + }; const updateDocs = (documents: SearchDanswerDocument[]) => { - setTimeout(() => { - setSearchState((searchState) => { - if (searchState != "input") { - return "reading"; - } - return "input"; - }); - }, 1500); + if (agentic) { + setTimeout(() => { + setSearchState((searchState) => { + if (searchState != "input") { + return "reading"; + } + return "input"; + }); + }, 1500); - setTimeout(() => { - setSearchState((searchState) => { - if (searchState != "input") { - return "analyzing"; - } - return "input"; - }); - }, 4500); + setTimeout(() => { + setSearchState((searchState) => { + if (searchState != "input") { + return "analyzing"; + } + return "input"; + }); + }, 4500); + } setSearchResponse((prevState) => ({ ...(prevState || initialSearchResponse), @@ -294,8 +310,9 @@ export const SearchSection = ({ messageId, })); router.refresh(); - setSearchState("input"); + // setSearchState("input"); setIsFetching(false); + setSearchState((searchState) => "input"); // router.replace(`/search?searchId=${chat_session_id}`); }; @@ -309,7 +326,11 @@ export const SearchSection = ({ setContentEnriched(true); setIsFetching(false); - setSearchState("input"); + if (disabledAgentic) { + setSearchState("input"); + } else { + setSearchState("analyzing"); + } }; const updateComments = (comments: any) => { @@ -317,7 +338,9 @@ export const SearchSection = ({ }; const finishedSearching = () => { - setSearchState("input"); + if (disabledAgentic) { + setSearchState("input"); + } }; const resetInput = () => { @@ -414,15 +437,7 @@ export const SearchSection = ({ offset: offset ?? defaultOverrides.offset, }; - const questionValidationArgs = { - query, - update: setValidQuestionResponse, - }; - - await Promise.all([ - searchRequestStreamed(searchFnArgs), - questionValidationStreamed(questionValidationArgs), - ]); + await Promise.all([searchRequestStreamed(searchFnArgs)]); }; // handle redirect if search page is disabled @@ -481,6 +496,20 @@ export const SearchSection = ({ setShowDocSidebar, mobile: settings?.isMobile, }); + const { answer, quotes, documents, error, messageId } = searchResponse; + + const dedupedQuotes: Quote[] = []; + const seen = new Set(); + if (quotes) { + quotes.forEach((quote) => { + if (!seen.has(quote.document_id)) { + dedupedQuotes.push(quote); + seen.add(quote.document_id); + } + }); + } + + const { popup, setPopup } = usePopup(); return ( <> @@ -600,15 +629,113 @@ export const SearchSection = ({ disabledAgentic ? undefined : toggleAgentic } agentic={agentic} - searchState={searchState} query={query} setQuery={setQuery} onSearch={async (agentic?: boolean) => { setDefaultOverrides(SEARCH_DEFAULT_OVERRIDES_START); await onSearch({ agentic, offset: 0 }); }} + finalAvailableDocumentSets={finalAvailableDocumentSets} + finalAvailableSources={finalAvailableSources} + filterManager={filterManager} + documentSets={documentSets} + ccPairs={ccPairs} + tags={tags} />

+ {!firstSearch && ( +
+
+
+

+ AI Answer +

+ + {searchState == "generating" && ( +
+ + Generating response... + +
+ )} + + {searchState == "citing" && ( +
+ + Generating citations... + +
+ )} + + {searchState == "searching" && ( +
+ Searching... +
+ )} + + {searchState == "reading" && ( +
+ + Reading{settings?.isMobile ? "" : " Documents"} + ... + +
+ )} + + {searchState == "analyzing" && ( +
+ + Generating + {settings?.isMobile ? "" : " Analysis"}... + +
+ )} +
+ +
+ +
+ + {quotes !== null && answer && ( +
+ + + {searchResponse.messageId !== null && ( +
+ +
+ )} +
+ )} +
+
+ )} {!settings?.isMobile && (
diff --git a/web/src/components/search/filtering/FilterDropdown.tsx b/web/src/components/search/filtering/FilterDropdown.tsx index 1c3028d60..8d3333d88 100644 --- a/web/src/components/search/filtering/FilterDropdown.tsx +++ b/web/src/components/search/filtering/FilterDropdown.tsx @@ -6,22 +6,23 @@ interface Option { display: string | JSX.Element; displayName?: string; } - export function FilterDropdown({ options, selected, handleSelect, icon, defaultDisplay, + width = "w-64", }: { options: Option[]; selected: string[]; handleSelect: (option: Option) => void; icon: JSX.Element; defaultDisplay: string | JSX.Element; + width?: string; }) { return ( -
+
-
+

Filters

@@ -324,3 +331,184 @@ export function HorizontalFilters({
); } + +export function HorizontalSourceSelector({ + timeRange, + setTimeRange, + selectedSources, + setSelectedSources, + selectedDocumentSets, + setSelectedDocumentSets, + selectedTags, + setSelectedTags, + availableDocumentSets, + existingSources, + availableTags, +}: SourceSelectorProps) { + const handleSourceSelect = (source: SourceMetadata) => { + setSelectedSources((prev: SourceMetadata[]) => { + if (prev.map((s) => s.internalName).includes(source.internalName)) { + return prev.filter((s) => s.internalName !== source.internalName); + } else { + return [...prev, source]; + } + }); + }; + + const handleDocumentSetSelect = (documentSetName: string) => { + setSelectedDocumentSets((prev: string[]) => { + if (prev.includes(documentSetName)) { + return prev.filter((s) => s !== documentSetName); + } else { + return [...prev, documentSetName]; + } + }); + }; + + const handleTagSelect = (tag: Tag) => { + setSelectedTags((prev: Tag[]) => { + if ( + prev.some( + (t) => t.tag_key === tag.tag_key && t.tag_value === tag.tag_value + ) + ) { + return prev.filter( + (t) => !(t.tag_key === tag.tag_key && t.tag_value === tag.tag_value) + ); + } else { + return [...prev, tag]; + } + }); + }; + + return ( +
+
+
+ +
+ + {existingSources.length > 0 && ( + existingSources.includes(source.internalName)) + .map((source) => ({ + key: source.internalName, + display: ( + <> + + {source.displayName} + + ), + }))} + selected={selectedSources.map((source) => source.internalName)} + handleSelect={(option) => + handleSourceSelect( + listSourceMetadata().find((s) => s.internalName === option.key)! + ) + } + icon={} + defaultDisplay="Sources" + width="w-fit max-w-24 ellipsis truncate" + /> + )} + + {availableDocumentSets.length > 0 && ( + ({ + key: documentSet.name, + display: ( + <> + + {documentSet.name} + + ), + }))} + selected={selectedDocumentSets} + handleSelect={(option) => handleDocumentSetSelect(option.key)} + icon={} + defaultDisplay="Sets" + width="w-fit max-w-24 ellipsis" + /> + )} + + {availableTags.length > 0 && ( + ({ + key: `${tag.tag_key}=${tag.tag_value}`, + display: ( + + {tag.tag_key} + = + {tag.tag_value} + + ), + }))} + selected={selectedTags.map( + (tag) => `${tag.tag_key}=${tag.tag_value}` + )} + handleSelect={(option) => { + const [tag_key, tag_value] = option.key.split("="); + const selectedTag = availableTags.find( + (tag) => tag.tag_key === tag_key && tag.tag_value === tag_value + ); + if (selectedTag) { + handleTagSelect(selectedTag); + } + }} + icon={} + defaultDisplay="Tags" + width="w-fit max-w-24 ellipsis" + /> + )} +
+ + {/*
+ {timeRange && timeRange.selectValue && ( + setTimeRange(null)}> +
{timeRange.selectValue}
+
+ )} + {selectedSources.map((source) => ( + handleSourceSelect(source)} + > + <> + + {source.displayName} + + + ))} + {selectedDocumentSets.map((documentSetName) => ( + handleDocumentSetSelect(documentSetName)} + > + <> + + {documentSetName} + + + ))} + {selectedTags.map((tag) => ( + handleTagSelect(tag)} + > + + {tag.tag_key}={tag.tag_value} + + + ))} +
*/} +
+ ); +} diff --git a/web/src/components/search/results/AnswerSection.tsx b/web/src/components/search/results/AnswerSection.tsx index 08ce5c6bf..e68fdc4e5 100644 --- a/web/src/components/search/results/AnswerSection.tsx +++ b/web/src/components/search/results/AnswerSection.tsx @@ -26,31 +26,28 @@ interface AnswerSectionProps { answer: string | null; quotes: Quote[] | null; error: string | null; - nonAnswerableReason: string | null; isFetching: boolean; } export const AnswerSection = (props: AnswerSectionProps) => { let status = "in-progress" as StatusOptions; - let header = <>Building answer...; + let header = <>; let body = null; // finished answer if (props.quotes !== null || !props.isFetching) { status = "success"; - header = <>AI answer; - if (props.answer) { - body = ( - - {replaceNewlines(props.answer)} - - ); - } else { - body =
Information not found
; - } + header = <>; + + body = ( + + {replaceNewlines(props.answer || "")} + + ); + // error while building answer (NOTE: if error occurs during quote generation // the above if statement will hit and the error will not be displayed) } else if (props.error) { @@ -64,7 +61,7 @@ export const AnswerSection = (props: AnswerSectionProps) => { // answer is streaming } else if (props.answer) { status = "success"; - header = <>AI answer; + header = <>; body = ( { ); } - if (props.nonAnswerableReason) { - status = "warning"; - header = <>Building best effort AI answer...; - } return ( {
{header}
} - body={ -
- {body} - {props.nonAnswerableReason && !props.isFetching && ( -
- Warning: the AI did not think this - question was answerable.{" "} -
- {props.nonAnswerableReason} -
-
- )} -
- } + body={
{body}
} desiredOpenStatus={true} isNotControllable={true} /> diff --git a/web/src/components/search/results/QuotesSection.tsx b/web/src/components/search/results/QuotesSection.tsx index 92900bb46..4736e6eb7 100644 --- a/web/src/components/search/results/QuotesSection.tsx +++ b/web/src/components/search/results/QuotesSection.tsx @@ -65,7 +65,6 @@ const QuoteDisplay = ({ quoteInfo }: { quoteInfo: Quote }) => { interface QuotesSectionProps { quotes: Quote[] | null; - isAnswerable: boolean | null; isFetching: boolean; } @@ -110,11 +109,7 @@ export const QuotesSection = (props: QuotesSectionProps) => { let status: StatusOptions = "in-progress"; if (!props.isFetching) { if (props.quotes && props.quotes.length > 0) { - if (props.isAnswerable === false) { - status = "warning"; - } else { - status = "success"; - } + status = "success"; } else { status = "failed"; } diff --git a/web/src/components/search/results/ResponseSection.tsx b/web/src/components/search/results/ResponseSection.tsx index eb04ac889..30903ea79 100644 --- a/web/src/components/search/results/ResponseSection.tsx +++ b/web/src/components/search/results/ResponseSection.tsx @@ -7,6 +7,7 @@ import { } from "@/components/icons/icons"; import { useState } from "react"; import { Grid } from "react-loader-spinner"; +import { searchState } from "../SearchSection"; export type StatusOptions = "in-progress" | "failed" | "warning" | "success"; @@ -31,26 +32,13 @@ export const ResponseSection = ({ let icon = null; if (status === "in-progress") { - icon = ( -
- -
- ); + icon = <>; } if (status === "failed") { icon = ; } if (status === "success") { - icon = ; + icon = <>; } if (status === "warning") { icon = ; diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts index 703655f1e..a694f157e 100644 --- a/web/src/lib/constants.ts +++ b/web/src/lib/constants.ts @@ -52,5 +52,5 @@ export const CUSTOM_ANALYTICS_ENABLED = process.env.CUSTOM_ANALYTICS_SECRET_KEY ? true : false; -export const DISABLE_AGENTIC_SEARCH = - process.env.DISABLE_AGENTIC_SEARCH?.toLowerCase() === "true"; +export const DISABLE_LLM_DOC_RELEVANCE = + process.env.DISABLE_LLM_DOC_RELEVANCE?.toLowerCase() === "true"; diff --git a/web/src/lib/search/interfaces.ts b/web/src/lib/search/interfaces.ts index 187d801c0..b33879055 100644 --- a/web/src/lib/search/interfaces.ts +++ b/web/src/lib/search/interfaces.ts @@ -158,7 +158,6 @@ export interface SearchRequestOverrides { } export interface ValidQuestionResponse { - answerable: boolean | null; reasoning: string | null; error: string | null; } diff --git a/web/src/lib/search/streamingQa.ts b/web/src/lib/search/streamingQa.ts index 74cd81825..51771222d 100644 --- a/web/src/lib/search/streamingQa.ts +++ b/web/src/lib/search/streamingQa.ts @@ -61,8 +61,7 @@ export const searchRequestStreamed = async ({ filters: filters, enable_auto_detect_filters: false, }, - llm_doc_eval: true, - skip_gen_ai_answer_generation: true, + evaluation_type: agentic ? "agentic" : "basic", }), headers: { "Content-Type": "application/json", diff --git a/web/src/lib/search/streamingQuestionValidation.ts b/web/src/lib/search/streamingQuestionValidation.ts deleted file mode 100644 index a3901ac49..000000000 --- a/web/src/lib/search/streamingQuestionValidation.ts +++ /dev/null @@ -1,66 +0,0 @@ -import { - AnswerPiecePacket, - ErrorMessagePacket, - ValidQuestionResponse, -} from "./interfaces"; -import { processRawChunkString } from "./streamingUtils"; - -export interface QuestionValidationArgs { - query: string; - update: (update: Partial) => void; -} - -export const questionValidationStreamed = async ({ - query, - update, -}: QuestionValidationArgs) => { - const response = await fetch("/api/query/stream-query-validation", { - method: "POST", - body: JSON.stringify({ - query, - }), - headers: { - "Content-Type": "application/json", - }, - }); - const reader = response.body?.getReader(); - const decoder = new TextDecoder("utf-8"); - - let reasoning = ""; - let previousPartialChunk: string | null = null; - while (true) { - const rawChunk = await reader?.read(); - if (!rawChunk) { - throw new Error("Unable to process chunk"); - } - const { done, value } = rawChunk; - if (done) { - break; - } - - const [completedChunks, partialChunk] = processRawChunkString< - AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket - >(decoder.decode(value, { stream: true }), previousPartialChunk); - if (!completedChunks.length && !partialChunk) { - break; - } - previousPartialChunk = partialChunk as string | null; - - completedChunks.forEach((chunk) => { - if (Object.hasOwn(chunk, "answer_piece")) { - reasoning += (chunk as AnswerPiecePacket).answer_piece; - update({ - reasoning, - }); - } - - if (Object.hasOwn(chunk, "answerable")) { - update({ answerable: (chunk as ValidQuestionResponse).answerable }); - } - - if (Object.hasOwn(chunk, "error")) { - update({ error: (chunk as ErrorMessagePacket).error }); - } - }); - } -};