Add answers to search (#2020)

2025-04-03 09:28:25 +02:00 · 2024-08-04 23:02:55 -07:00 · 2024-08-04 23:02:55 -07:00 · 6d67d472cd
commit 6d67d472cd
parent 76b7792e69
47 changed files with 806 additions and 445 deletions
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@ -15,7 +15,7 @@ LOG_LEVEL=debug

 # This passes top N results to LLM an additional time for reranking prior to answer generation
 # This step is quite heavy on token usage so we disable it for dev generally
-DISABLE_LLM_CHUNK_FILTER=True
+DISABLE_LLM_DOC_RELEVANCE=True


 # Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
--- a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
+++ b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
@ -10,8 +10,8 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision = "1d6ad76d1f37"
 down_revision = "e1392f05e840"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@ -46,15 +46,22 @@ class LLMRelevanceFilterResponse(BaseModel):
    relevant_chunk_indices: list[int]


-class RelevanceChunk(BaseModel):
-    # TODO make this document level. Also slight misnomer here as this is actually
-    # done at the section level currently rather than the chunk
-    relevant: bool | None = None
+class RelevanceAnalysis(BaseModel):
+    relevant: bool
    content: str | None = None


-class LLMRelevanceSummaryResponse(BaseModel):
-    relevance_summaries: dict[str, RelevanceChunk]
+class SectionRelevancePiece(RelevanceAnalysis):
+    """LLM analysis mapped to an Inference Section"""
+
+    document_id: str
+    chunk_id: int  # ID of the center chunk for a given inference section
+
+
+class DocumentRelevance(BaseModel):
+    """Contains all relevance information for a given search"""
+
+    relevance_summaries: dict[str, RelevanceAnalysis]


 class DanswerAnswerPiece(BaseModel):
--- a/backend/danswer/chat/personas.yaml
+++ b/backend/danswer/chat/personas.yaml
@ -17,7 +17,7 @@ personas:
    num_chunks: 10
    # Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine
    # if the chunk is useful or not towards the latest user query
-    # This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable
+    # This feature can be overriden for all personas via DISABLE_LLM_DOC_RELEVANCE env variable
    llm_relevance_filter: true
    # Enable/Disable usage of the LLM to extract query time filters including source type and time range filters
    llm_filter_extraction: true
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@ -52,6 +52,7 @@ from danswer.llm.factory import get_llms_for_persona
 from danswer.llm.factory import get_main_llm_from_tuple
 from danswer.llm.interfaces import LLMConfig
 from danswer.natural_language_processing.utils import get_tokenizer
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import OptionalSearchSetting
 from danswer.search.enums import QueryFlow
 from danswer.search.enums import SearchType
@ -60,6 +61,7 @@ from danswer.search.retrieval.search_runner import inference_sections_from_ids
 from danswer.search.utils import chunks_or_sections_to_search_docs
 from danswer.search.utils import dedupe_documents
 from danswer.search.utils import drop_llm_indices
+from danswer.search.utils import relevant_documents_to_indices
 from danswer.server.query_and_chat.models import ChatMessageDetail
 from danswer.server.query_and_chat.models import CreateChatMessageRequest
 from danswer.server.utils import get_json_line
@ -501,6 +503,9 @@ def stream_chat_message_objects(
                        chunks_above=new_msg_req.chunks_above,
                        chunks_below=new_msg_req.chunks_below,
                        full_doc=new_msg_req.full_doc,
+                        evaluation_type=LLMEvaluationType.BASIC
+                        if persona.llm_relevance_filter
+                        else LLMEvaluationType.SKIP,
                    )
                    tool_dict[db_tool_model.id] = [search_tool]
                elif tool_cls.__name__ == ImageGenerationTool.__name__:
@ -629,18 +634,28 @@ def stream_chat_message_objects(
                    )
                    yield qa_docs_response
                elif packet.id == SECTION_RELEVANCE_LIST_ID:
-                    chunk_indices = packet.response
+                    relevance_sections = packet.response

-                    if reference_db_search_docs is not None and dropped_indices:
-                        chunk_indices = drop_llm_indices(
-                            llm_indices=chunk_indices,
-                            search_docs=reference_db_search_docs,
-                            dropped_indices=dropped_indices,
+                    if reference_db_search_docs is not None:
+                        llm_indices = relevant_documents_to_indices(
+                            relevance_sections=relevance_sections,
+                            search_docs=[
+                                translate_db_search_doc_to_server_search_doc(doc)
+                                for doc in reference_db_search_docs
+                            ],
+                        )
+
+                        if dropped_indices:
+                            llm_indices = drop_llm_indices(
+                                llm_indices=llm_indices,
+                                search_docs=reference_db_search_docs,
+                                dropped_indices=dropped_indices,
+                            )
+
+                        yield LLMRelevanceFilterResponse(
+                            relevant_chunk_indices=llm_indices
                        )

-                    yield LLMRelevanceFilterResponse(
-                        relevant_chunk_indices=chunk_indices
-                    )
                elif packet.id == IMAGE_GENERATION_RESPONSE_ID:
                    img_generation_response = cast(
                        list[ImageGenerationResponse], packet.response
--- a/backend/danswer/configs/chat_configs.py
+++ b/backend/danswer/configs/chat_configs.py
@ -33,11 +33,6 @@ DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
 # Note this is not in any of the deployment configs yet
 CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 0)
 CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 0)
-# Whether the LLM should evaluate all of the document chunks passed in for usefulness
-# in relation to the user query
-DISABLE_LLM_CHUNK_FILTER = (
-    os.environ.get("DISABLE_LLM_CHUNK_FILTER", "").lower() == "true"
-)
 # Whether the LLM should be used to decide if a search would help given the chat history
 DISABLE_LLM_CHOOSE_SEARCH = (
    os.environ.get("DISABLE_LLM_CHOOSE_SEARCH", "").lower() == "true"
@ -64,6 +59,7 @@ HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.62)))
 TITLE_CONTENT_RATIO = max(
    0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.20))
 )
+
 # A list of languages passed to the LLM to rephase the query
 # For example "English,French,Spanish", be sure to use the "," separator
 MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
@ -76,16 +72,16 @@ LANGUAGE_CHAT_NAMING_HINT = (
    or "The name of the conversation must be in the same language as the user query."
 )

-
 # Agentic search takes significantly more tokens and therefore has much higher cost.
 # This configuration allows users to get a search-only experience with instant results
 # and no involvement from the LLM.
 # Additionally, some LLM providers have strict rate limits which may prohibit
 # sending many API requests at once (as is done in agentic search).
-DISABLE_AGENTIC_SEARCH = (
-    os.environ.get("DISABLE_AGENTIC_SEARCH") or "false"
-).lower() == "true"
-
+# Whether the LLM should evaluate all of the document chunks passed in for usefulness
+# in relation to the user query
+DISABLE_LLM_DOC_RELEVANCE = (
+    os.environ.get("DISABLE_LLM_DOC_RELEVANCE", "").lower() == "true"
+)

 # Stops streaming answers back to the UI if this pattern is seen:
 STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
--- a/backend/danswer/db/chat.py
+++ b/backend/danswer/db/chat.py
@ -16,7 +16,7 @@ from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import Session

 from danswer.auth.schemas import UserRole
-from danswer.chat.models import LLMRelevanceSummaryResponse
+from danswer.chat.models import DocumentRelevance
 from danswer.configs.chat_configs import HARD_DELETE_CHATS
 from danswer.configs.constants import MessageType
 from danswer.db.models import ChatMessage
@ -541,11 +541,11 @@ def get_doc_query_identifiers_from_model(
 def update_search_docs_table_with_relevance(
    db_session: Session,
    reference_db_search_docs: list[SearchDoc],
-    relevance_summary: LLMRelevanceSummaryResponse,
+    relevance_summary: DocumentRelevance,
 ) -> None:
    for search_doc in reference_db_search_docs:
        relevance_data = relevance_summary.relevance_summaries.get(
-            f"{search_doc.document_id}-{search_doc.chunk_ind}"
+            search_doc.document_id
        )
        if relevance_data is not None:
            db_session.execute(
--- a/backend/danswer/llm/answering/answer.py
+++ b/backend/danswer/llm/answering/answer.py
@ -483,6 +483,7 @@ class Answer:
                        ]
                    elif message.id == FINAL_CONTEXT_DOCUMENTS:
                        final_context_docs = cast(list[LlmDoc], message.response)
+
                    elif (
                        message.id == SEARCH_DOC_CONTENT_ID
                        and not self._return_contexts
--- a/backend/danswer/llm/answering/prune_and_merge.py
+++ b/backend/danswer/llm/answering/prune_and_merge.py
@ -28,6 +28,9 @@ logger = setup_logger()
 T = TypeVar("T", bound=LlmDoc | InferenceChunk | InferenceSection)

 _METADATA_TOKEN_ESTIMATE = 75
+# Title and additional tokens as part of the tool message json
+# this is only used to log a warning so we can be more forgiving with the buffer
+_OVERCOUNT_ESTIMATE = 256


 class PruningError(Exception):
@ -179,10 +182,18 @@ def _apply_pruning(
            and section_token_count
            > DOC_EMBEDDING_CONTEXT_SIZE + _METADATA_TOKEN_ESTIMATE
        ):
-            logger.warning(
-                "Found more tokens in Section than expected, "
-                "likely mismatch between embedding and LLM tokenizers. Trimming content..."
-            )
+            if (
+                section_token_count
+                > DOC_EMBEDDING_CONTEXT_SIZE
+                + _METADATA_TOKEN_ESTIMATE
+                + _OVERCOUNT_ESTIMATE
+            ):
+                # If the section is just a little bit over, it is likely due to the additional tool message tokens
+                # no need to record this, the content will be trimmed just in case
+                logger.info(
+                    "Found more tokens in Section than expected, "
+                    "likely mismatch between embedding and LLM tokenizers. Trimming content..."
+                )
            section.combined_content = tokenizer_trim_content(
                content=section.combined_content,
                desired_length=DOC_EMBEDDING_CONTEXT_SIZE,
--- a/backend/danswer/one_shot_answer/answer_question.py
+++ b/backend/danswer/one_shot_answer/answer_question.py
@ -9,10 +9,12 @@ from danswer.chat.models import CitationInfo
 from danswer.chat.models import DanswerAnswerPiece
 from danswer.chat.models import DanswerContexts
 from danswer.chat.models import DanswerQuotes
+from danswer.chat.models import DocumentRelevance
 from danswer.chat.models import LLMRelevanceFilterResponse
-from danswer.chat.models import LLMRelevanceSummaryResponse
 from danswer.chat.models import QADocsResponse
+from danswer.chat.models import RelevanceAnalysis
 from danswer.chat.models import StreamingError
+from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
 from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from danswer.configs.chat_configs import QA_TIMEOUT
 from danswer.configs.constants import MessageType
@ -39,18 +41,17 @@ from danswer.one_shot_answer.models import DirectQARequest
 from danswer.one_shot_answer.models import OneShotQAResponse
 from danswer.one_shot_answer.models import QueryRephrase
 from danswer.one_shot_answer.qa_utils import combine_message_thread
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.models import RerankMetricsContainer
 from danswer.search.models import RetrievalMetricsContainer
 from danswer.search.utils import chunks_or_sections_to_search_docs
 from danswer.search.utils import dedupe_documents
-from danswer.search.utils import drop_llm_indices
 from danswer.secondary_llm_flows.answer_validation import get_answer_validity
 from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase
 from danswer.server.query_and_chat.models import ChatMessageDetail
 from danswer.server.utils import get_json_line
 from danswer.tools.force import ForceUseTool
 from danswer.tools.search.search_tool import SEARCH_DOC_CONTENT_ID
-from danswer.tools.search.search_tool import SEARCH_EVALUATION_ID
 from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID
 from danswer.tools.search.search_tool import SearchResponseSummary
 from danswer.tools.search.search_tool import SearchTool
@ -74,7 +75,7 @@ AnswerObjectIterator = Iterator[
    | ChatMessageDetail
    | CitationInfo
    | ToolCallKickoff
-    | LLMRelevanceSummaryResponse
+    | DocumentRelevance
 ]


@ -180,10 +181,15 @@ def stream_answer_objects(
        max_tokens=max_document_tokens,
        use_sections=query_req.chunks_above > 0 or query_req.chunks_below > 0,
    )
+    print("EVALLLUATINO")
+    print(query_req.evaluation_type)

    search_tool = SearchTool(
        db_session=db_session,
        user=user,
+        evaluation_type=LLMEvaluationType.SKIP
+        if DISABLE_LLM_DOC_RELEVANCE
+        else query_req.evaluation_type,
        persona=chat_session.persona,
        retrieval_options=query_req.retrieval_options,
        prompt_config=prompt_config,
@ -194,7 +200,6 @@ def stream_answer_objects(
        chunks_below=query_req.chunks_below,
        full_doc=query_req.full_doc,
        bypass_acl=bypass_acl,
-        llm_doc_eval=query_req.llm_doc_eval,
    )

    answer_config = AnswerStyleConfig(
@ -223,7 +228,6 @@ def stream_answer_objects(
    )

    # won't be any ImageGenerationDisplay responses since that tool is never passed in
-    dropped_inds: list[int] = []

    for packet in cast(AnswerObjectIterator, answer.processed_streamed_output):
        # for one-shot flow, don't currently do anything with these
@ -266,20 +270,18 @@ def stream_answer_objects(
                yield packet.response

            elif packet.id == SECTION_RELEVANCE_LIST_ID:
-                chunk_indices = packet.response
+                document_based_response = {}

-                if reference_db_search_docs is not None and dropped_inds:
-                    chunk_indices = drop_llm_indices(
-                        llm_indices=chunk_indices,
-                        search_docs=reference_db_search_docs,
-                        dropped_indices=dropped_inds,
-                    )
+                if packet.response is not None:
+                    for evaluation in packet.response:
+                        document_based_response[
+                            evaluation.document_id
+                        ] = RelevanceAnalysis(
+                            relevant=evaluation.relevant, content=evaluation.content
+                        )

-                yield LLMRelevanceFilterResponse(relevant_chunk_indices=packet.response)
-
-            elif packet.id == SEARCH_EVALUATION_ID:
-                evaluation_response = LLMRelevanceSummaryResponse(
-                    relevance_summaries=packet.response
+                evaluation_response = DocumentRelevance(
+                    relevance_summaries=document_based_response
                )
                if reference_db_search_docs is not None:
                    update_search_docs_table_with_relevance(
--- a/backend/danswer/one_shot_answer/models.py
+++ b/backend/danswer/one_shot_answer/models.py
@ -9,6 +9,7 @@ from danswer.chat.models import DanswerContexts
 from danswer.chat.models import DanswerQuotes
 from danswer.chat.models import QADocsResponse
 from danswer.configs.constants import MessageType
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.models import ChunkContext
 from danswer.search.models import RetrievalDetails

@ -27,11 +28,11 @@ class DirectQARequest(ChunkContext):
    messages: list[ThreadMessage]
    prompt_id: int | None
    persona_id: int
-    agentic: bool | None = None
    retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
    # This is to forcibly skip (or run) the step, if None it uses the system defaults
    skip_rerank: bool | None = None
-    skip_llm_chunk_filter: bool | None = None
+    evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED
+
    chain_of_thought: bool = False
    return_contexts: bool = False

@ -40,11 +41,6 @@ class DirectQARequest(ChunkContext):
    # will also disable Thread-based Rewording if specified
    query_override: str | None = None

-    # This is to toggle agentic evaluation:
-    # 1. Evaluates whether each response is relevant or not
-    # 2. Provides a summary of the document's relevance in the resulsts
-    llm_doc_eval: bool = False
-
    # If True, skips generative an AI response to the search query
    skip_gen_ai_answer_generation: bool = False

--- a/backend/danswer/prompts/agentic_evaluation.py
+++ b/backend/danswer/prompts/agentic_evaluation.py
@ -28,7 +28,8 @@ True or False
 """

 AGENTIC_SEARCH_USER_PROMPT = """
-Document:
+
+Document Title: {title}{optional_metadata}
 ```
 {content}
 ```
--- a/backend/danswer/search/enums.py
+++ b/backend/danswer/search/enums.py
@ -4,13 +4,6 @@ search/models.py imports from db/models.py."""
 from enum import Enum


-class OptionalSearchSetting(str, Enum):
-    ALWAYS = "always"
-    NEVER = "never"
-    # Determine whether to run search based on history and latest query
-    AUTO = "auto"
-
-
 class RecencyBiasSetting(str, Enum):
    FAVOR_RECENT = "favor_recent"  # 2x decay rate
    BASE_DECAY = "base_decay"
@ -19,12 +12,26 @@ class RecencyBiasSetting(str, Enum):
    AUTO = "auto"


+class OptionalSearchSetting(str, Enum):
+    ALWAYS = "always"
+    NEVER = "never"
+    # Determine whether to run search based on history and latest query
+    AUTO = "auto"
+
+
 class SearchType(str, Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    HYBRID = "hybrid"


+class LLMEvaluationType(str, Enum):
+    AGENTIC = "agentic"  # applies agentic evaluation
+    BASIC = "basic"  # applies boolean evaluation
+    SKIP = "skip"  # skips evaluation
+    UNSPECIFIED = "unspecified"  # reverts to default
+
+
 class QueryFlow(str, Enum):
    SEARCH = "search"
    QUESTION_ANSWER = "question-answer"
--- a/backend/danswer/search/models.py
+++ b/backend/danswer/search/models.py
@ -6,13 +6,13 @@ from pydantic import validator

 from danswer.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
 from danswer.configs.chat_configs import CONTEXT_CHUNKS_BELOW
-from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER
 from danswer.configs.chat_configs import HYBRID_ALPHA
 from danswer.configs.chat_configs import NUM_RERANKED_RESULTS
 from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.configs.constants import DocumentSource
 from danswer.db.models import Persona
 from danswer.indexing.models import BaseChunk
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import OptionalSearchSetting
 from danswer.search.enums import SearchType
 from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
@ -78,7 +78,7 @@ class SearchRequest(ChunkContext):
    hybrid_alpha: float = HYBRID_ALPHA
    # This is to forcibly skip (or run) the step, if None it uses the system defaults
    skip_rerank: bool | None = None
-    skip_llm_chunk_filter: bool | None = None
+    evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED

    class Config:
        arbitrary_types_allowed = True
@ -88,11 +88,11 @@ class SearchQuery(ChunkContext):
    query: str
    filters: IndexFilters
    recency_bias_multiplier: float
+    evaluation_type: LLMEvaluationType
    num_hits: int = NUM_RETURNED_HITS
    offset: int = 0
    search_type: SearchType = SearchType.HYBRID
    skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW
-    skip_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER
    # Only used if not skip_rerank
    num_rerank: int | None = NUM_RERANKED_RESULTS
    # Only used if not skip_llm_chunk_filter
--- a/backend/danswer/search/pipeline.py
+++ b/backend/danswer/search/pipeline.py
@ -5,17 +5,19 @@ from typing import cast

 from sqlalchemy.orm import Session

-from danswer.chat.models import RelevanceChunk
-from danswer.configs.chat_configs import DISABLE_AGENTIC_SEARCH
+from danswer.chat.models import SectionRelevancePiece
+from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
 from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
 from danswer.db.embedding_model import get_current_db_embedding_model
 from danswer.db.models import User
 from danswer.document_index.factory import get_default_document_index
 from danswer.llm.answering.models import DocumentPruningConfig
 from danswer.llm.answering.models import PromptConfig
+from danswer.llm.answering.prune_and_merge import _merge_sections
 from danswer.llm.answering.prune_and_merge import ChunkRange
 from danswer.llm.answering.prune_and_merge import merge_chunk_intervals
 from danswer.llm.interfaces import LLM
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import QueryFlow
 from danswer.search.enums import SearchType
 from danswer.search.models import IndexFilters
@ -29,6 +31,7 @@ from danswer.search.postprocessing.postprocessing import search_postprocessing
 from danswer.search.preprocessing.preprocessing import retrieval_preprocessing
 from danswer.search.retrieval.search_runner import retrieve_chunks
 from danswer.search.utils import inference_section_from_chunks
+from danswer.search.utils import relevant_sections_to_indices
 from danswer.secondary_llm_flows.agentic_evaluation import evaluate_inference_section
 from danswer.utils.logger import setup_logger
 from danswer.utils.threadpool_concurrency import FunctionCall
@ -84,11 +87,13 @@ class SearchPipeline:
        # Reranking and LLM section selection can be run together
        # If only LLM selection is on, the reranked chunks are yielded immediatly
        self._reranked_sections: list[InferenceSection] | None = None
-        self._relevant_section_indices: list[int] | None = None
+        self._final_context_sections: list[InferenceSection] | None = None
+
+        self._section_relevance: list[SectionRelevancePiece] | None = None

        # Generates reranked chunks and LLM selections
        self._postprocessing_generator: (
-            Iterator[list[InferenceSection] | list[int]] | None
+            Iterator[list[InferenceSection] | list[SectionRelevancePiece]] | None
        ) = None

    """Pre-processing"""
@ -332,44 +337,66 @@ class SearchPipeline:
        return self._reranked_sections

    @property
-    def relevant_section_indices(self) -> list[int]:
-        if self._relevant_section_indices is not None:
-            return self._relevant_section_indices
+    def final_context_sections(self) -> list[InferenceSection]:
+        if self._final_context_sections is not None:
+            return self._final_context_sections

-        self._relevant_section_indices = next(
-            cast(Iterator[list[int]], self._postprocessing_generator)
-        )
-        return self._relevant_section_indices
+        self._final_context_sections = _merge_sections(sections=self.reranked_sections)
+        return self._final_context_sections

    @property
-    def relevance_summaries(self) -> dict[str, RelevanceChunk]:
-        if DISABLE_AGENTIC_SEARCH:
+    def section_relevance(self) -> list[SectionRelevancePiece] | None:
+        if self._section_relevance is not None:
+            return self._section_relevance
+
+        if (
+            self.search_query.evaluation_type == LLMEvaluationType.SKIP
+            or DISABLE_LLM_DOC_RELEVANCE
+        ):
+            return None
+
+        if self.search_query.evaluation_type == LLMEvaluationType.UNSPECIFIED:
            raise ValueError(
-                "Agentic saerch operation called while DISABLE_AGENTIC_SEARCH is toggled"
+                "Attempted to access section relevance scores on search query with evaluation type `UNSPECIFIED`."
+                + "The search query evaluation type should have been specified."
            )
-        if len(self.reranked_sections) == 0:
-            logger.warning(
-                "No sections found in agentic search evalution. Returning empty dict."
+
+        if self.search_query.evaluation_type == LLMEvaluationType.AGENTIC:
+            sections = self.final_context_sections
+            functions = [
+                FunctionCall(
+                    evaluate_inference_section,
+                    (section, self.search_query.query, self.llm),
+                )
+                for section in sections
+            ]
+            results = run_functions_in_parallel(function_calls=functions)
+            self._section_relevance = list(results.values())
+
+        elif self.search_query.evaluation_type == LLMEvaluationType.BASIC:
+            if DISABLE_LLM_DOC_RELEVANCE:
+                raise ValueError(
+                    "Basic search evaluation operation called while DISABLE_LLM_DOC_RELEVANCE is enabled."
+                )
+            self._section_relevance = next(
+                cast(
+                    Iterator[list[SectionRelevancePiece]],
+                    self._postprocessing_generator,
+                )
            )
-            return {}

-        sections = self.reranked_sections
-        functions = [
-            FunctionCall(
-                evaluate_inference_section, (section, self.search_query.query, self.llm)
+        else:
+            # All other cases should have been handled above
+            raise ValueError(
+                f"Unexpected evaluation type: {self.search_query.evaluation_type}"
            )
-            for section in sections
-        ]

-        results = run_functions_in_parallel(function_calls=functions)
-
-        return {
-            next(iter(value)): value[next(iter(value))] for value in results.values()
-        }
+        return self._section_relevance

    @property
    def section_relevance_list(self) -> list[bool]:
-        return [
-            True if ind in self.relevant_section_indices else False
-            for ind in range(len(self.reranked_sections))
-        ]
+        llm_indices = relevant_sections_to_indices(
+            relevance_sections=self.section_relevance,
+            inference_sections=self.final_context_sections,
+        )
+        return [ind in llm_indices for ind in range(len(self.final_context_sections))]
--- a/backend/danswer/search/postprocessing/postprocessing.py
+++ b/backend/danswer/search/postprocessing/postprocessing.py
@ -4,6 +4,7 @@ from typing import cast

 import numpy

+from danswer.chat.models import SectionRelevancePiece
 from danswer.configs.app_configs import BLURB_SIZE
 from danswer.configs.constants import RETURN_SEPARATOR
 from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX
@ -15,6 +16,7 @@ from danswer.llm.interfaces import LLM
 from danswer.natural_language_processing.search_nlp_models import (
    CrossEncoderEnsembleModel,
 )
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.models import ChunkMetric
 from danswer.search.models import InferenceChunk
 from danswer.search.models import InferenceChunkUncleaned
@ -48,10 +50,6 @@ def should_rerank(query: SearchQuery) -> bool:
    return query.search_type != SearchType.KEYWORD and not query.skip_rerank


-def should_apply_llm_based_relevance_filter(query: SearchQuery) -> bool:
-    return not query.skip_llm_chunk_filter
-
-
 def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk]:
    def _remove_title(chunk: InferenceChunkUncleaned) -> str:
        if not chunk.title or not chunk.content:
@ -233,7 +231,7 @@ def search_postprocessing(
    retrieved_sections: list[InferenceSection],
    llm: LLM,
    rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
-) -> Iterator[list[InferenceSection] | list[int]]:
+) -> Iterator[list[InferenceSection] | list[SectionRelevancePiece]]:
    post_processing_tasks: list[FunctionCall] = []

    if not retrieved_sections:
@ -265,7 +263,10 @@ def search_postprocessing(
        sections_yielded = True

    llm_filter_task_id = None
-    if should_apply_llm_based_relevance_filter(search_query):
+    if search_query.evaluation_type in [
+        LLMEvaluationType.BASIC,
+        LLMEvaluationType.UNSPECIFIED,
+    ]:
        post_processing_tasks.append(
            FunctionCall(
                filter_sections,
@ -306,7 +307,11 @@ def search_postprocessing(
    )

    yield [
-        index
-        for index, section in enumerate(reranked_sections or retrieved_sections)
-        if section.center_chunk.unique_id in llm_selected_section_ids
+        SectionRelevancePiece(
+            document_id=section.center_chunk.document_id,
+            chunk_id=section.center_chunk.chunk_id,
+            relevant=section.center_chunk.unique_id in llm_selected_section_ids,
+            content="",
+        )
+        for section in (reranked_sections or retrieved_sections)
    ]
--- a/backend/danswer/search/preprocessing/preprocessing.py
+++ b/backend/danswer/search/preprocessing/preprocessing.py
@ -1,11 +1,12 @@
 from sqlalchemy.orm import Session

 from danswer.configs.chat_configs import BASE_RECENCY_DECAY
-from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER
+from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
 from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER
 from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.db.models import User
 from danswer.llm.interfaces import LLM
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import QueryFlow
 from danswer.search.enums import RecencyBiasSetting
 from danswer.search.models import BaseFilters
@ -35,7 +36,6 @@ def retrieval_preprocessing(
    db_session: Session,
    bypass_acl: bool = False,
    include_query_intent: bool = True,
-    disable_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER,
    base_recency_decay: float = BASE_RECENCY_DECAY,
    favor_recent_decay_multiplier: float = FAVOR_RECENT_DECAY_MULTIPLIER,
 ) -> tuple[SearchQuery, SearchType | None, QueryFlow | None]:
@ -137,18 +137,23 @@ def retrieval_preprocessing(
        access_control_list=user_acl_filters,
    )

-    llm_chunk_filter = False
-    if search_request.skip_llm_chunk_filter is not None:
-        llm_chunk_filter = not search_request.skip_llm_chunk_filter
-    elif persona:
-        llm_chunk_filter = persona.llm_relevance_filter
+    llm_evaluation_type = LLMEvaluationType.BASIC
+    if search_request.evaluation_type is not LLMEvaluationType.UNSPECIFIED:
+        llm_evaluation_type = search_request.evaluation_type

-    if disable_llm_chunk_filter:
-        if llm_chunk_filter:
+    elif persona:
+        llm_evaluation_type = (
+            LLMEvaluationType.BASIC
+            if persona.llm_relevance_filter
+            else LLMEvaluationType.SKIP
+        )
+
+    if DISABLE_LLM_DOC_RELEVANCE:
+        if llm_evaluation_type:
            logger.info(
                "LLM chunk filtering would have run but has been globally disabled"
            )
-        llm_chunk_filter = False
+        llm_evaluation_type = LLMEvaluationType.SKIP

    skip_rerank = search_request.skip_rerank
    if skip_rerank is None:
@ -176,7 +181,7 @@ def retrieval_preprocessing(
            num_hits=limit if limit is not None else NUM_RETURNED_HITS,
            offset=offset or 0,
            skip_rerank=skip_rerank,
-            skip_llm_chunk_filter=not llm_chunk_filter,
+            evaluation_type=llm_evaluation_type,
            chunks_above=search_request.chunks_above,
            chunks_below=search_request.chunks_below,
            full_doc=search_request.full_doc,
--- a/backend/danswer/search/utils.py
+++ b/backend/danswer/search/utils.py
@ -1,6 +1,7 @@
 from collections.abc import Sequence
 from typing import TypeVar

+from danswer.chat.models import SectionRelevancePiece
 from danswer.db.models import SearchDoc as DBSearchDoc
 from danswer.search.models import InferenceChunk
 from danswer.search.models import InferenceSection
@ -37,6 +38,46 @@ def dedupe_documents(items: list[T]) -> tuple[list[T], list[int]]:
    return deduped_items, dropped_indices


+def relevant_sections_to_indices(
+    relevance_sections: list[SectionRelevancePiece] | None,
+    inference_sections: list[InferenceSection],
+) -> list[int]:
+    if relevance_sections is None:
+        return []
+
+    relevant_set = {
+        (chunk.document_id, chunk.chunk_id)
+        for chunk in relevance_sections
+        if chunk.relevant
+    }
+    relevant_indices = [
+        index
+        for index, section in enumerate(inference_sections)
+        if (section.center_chunk.document_id, section.center_chunk.chunk_id)
+        in relevant_set
+    ]
+    return relevant_indices
+
+
+def relevant_documents_to_indices(
+    relevance_sections: list[SectionRelevancePiece] | None, search_docs: list[SearchDoc]
+) -> list[int]:
+    if relevance_sections is None:
+        return []
+
+    relevant_set = {
+        (chunk.document_id, chunk.chunk_id)
+        for chunk in relevance_sections
+        if chunk.relevant
+    }
+
+    return [
+        index
+        for index, section in enumerate(search_docs)
+        if (section.document_id, section.chunk_ind) in relevant_set
+    ]
+
+
 def drop_llm_indices(
    llm_indices: list[int],
    search_docs: Sequence[DBSearchDoc | SavedSearchDoc],
--- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py
+++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py
@ -1,6 +1,6 @@
 import re

-from danswer.chat.models import RelevanceChunk
+from danswer.chat.models import SectionRelevancePiece
 from danswer.llm.interfaces import LLM
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.llm.utils import message_to_string
@ -13,7 +13,7 @@ logger = setup_logger()


 def _get_agent_eval_messages(
-    title: str, content: str, query: str
+    title: str, content: str, query: str, center_metadata: str
 ) -> list[dict[str, str]]:
    messages = [
        {
@ -23,7 +23,10 @@ def _get_agent_eval_messages(
        {
            "role": "user",
            "content": AGENTIC_SEARCH_USER_PROMPT.format(
-                title=title, content=content, query=query
+                title=title,
+                content=content,
+                query=query,
+                optional_metadata=center_metadata,
            ),
        },
    ]
@ -32,16 +35,27 @@ def _get_agent_eval_messages(

 def evaluate_inference_section(
    document: InferenceSection, query: str, llm: LLM
-) -> dict[str, RelevanceChunk]:
-    results = {}
+) -> SectionRelevancePiece:
+    def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str:
+        metadata_str = "\n\nMetadata:\n"
+        for key, value in metadata.items():
+            value_str = ", ".join(value) if isinstance(value, list) else value
+            metadata_str += f"{key} - {value_str}\n"
+
+        # Since there is now multiple sections, add this prefix for clarity
+        return metadata_str + "\nContent:"

    document_id = document.center_chunk.document_id
    semantic_id = document.center_chunk.semantic_identifier
    contents = document.combined_content
-    chunk_id = document.center_chunk.chunk_id
+    center_metadata = document.center_chunk.metadata
+    center_metadata_str = _get_metadata_str(center_metadata) if center_metadata else ""

    messages = _get_agent_eval_messages(
-        title=semantic_id, content=contents, query=query
+        title=semantic_id,
+        content=contents,
+        query=query,
+        center_metadata=center_metadata_str,
    )
    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
    model_output = message_to_string(llm.invoke(filled_llm_prompt))
@ -64,7 +78,9 @@ def evaluate_inference_section(
    )
    relevant = last_line.strip().lower().startswith("true")

-    results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
-        relevant=relevant, content=analysis
+    return SectionRelevancePiece(
+        document_id=document_id,
+        chunk_id=document.center_chunk.chunk_id,
+        relevant=relevant,
+        content=analysis,
    )
-    return results
--- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py
+++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py
@ -1,5 +1,6 @@
 from collections.abc import Callable

+from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
 from danswer.llm.interfaces import LLM
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.llm.utils import message_to_string
@ -45,6 +46,12 @@ def llm_eval_section(query: str, section_content: str, llm: LLM) -> bool:
 def llm_batch_eval_sections(
    query: str, section_contents: list[str], llm: LLM, use_threads: bool = True
 ) -> list[bool]:
+    if DISABLE_LLM_DOC_RELEVANCE:
+        raise RuntimeError(
+            "LLM Doc Relevance is globally disabled, "
+            "this should have been caught upstream."
+        )
+
    if use_threads:
        functions_with_args: list[tuple[Callable, tuple]] = [
            (llm_eval_section, (query, section_content, llm))
--- a/backend/danswer/server/query_and_chat/query_backend.py
+++ b/backend/danswer/server/query_and_chat/query_backend.py
@ -261,6 +261,7 @@ def get_answer_with_quote(
    query = query_request.messages[0].message

    logger.info(f"Received query for one shot answer with quotes: {query}")
+
    packets = stream_search_answer(
        query_req=query_request,
        user=user,
--- a/backend/danswer/tools/images/image_generation_tool.py
+++ b/backend/danswer/tools/images/image_generation_tool.py
@ -162,20 +162,43 @@ class ImageGenerationTool(Tool):
        )

    def _generate_image(self, prompt: str) -> ImageGenerationResponse:
-        response = image_generation(
-            prompt=prompt,
-            model=self.model,
-            api_key=self.api_key,
-            # need to pass in None rather than empty str
-            api_base=self.api_base or None,
-            api_version=self.api_version or None,
-            n=1,
-            extra_headers=build_llm_extra_headers(self.additional_headers),
-        )
-        return ImageGenerationResponse(
-            revised_prompt=response.data[0]["revised_prompt"],
-            url=response.data[0]["url"],
-        )
+        try:
+            response = image_generation(
+                prompt=prompt,
+                model=self.model,
+                api_key=self.api_key,
+                # need to pass in None rather than empty str
+                api_base=self.api_base or None,
+                api_version=self.api_version or None,
+                n=1,
+                extra_headers=build_llm_extra_headers(self.additional_headers),
+            )
+            return ImageGenerationResponse(
+                revised_prompt=response.data[0]["revised_prompt"],
+                url=response.data[0]["url"],
+            )
+        except Exception as e:
+            logger.debug(f"Error occured during image generation: {e}")
+
+            error_message = str(e)
+            if "OpenAIException" in str(type(e)):
+                if (
+                    "Your request was rejected as a result of our safety system"
+                    in error_message
+                ):
+                    raise ValueError(
+                        "The image generation request was rejected due to OpenAI's content policy. Please try a different prompt."
+                    )
+                elif "Invalid image URL" in error_message:
+                    raise ValueError("Invalid image URL provided for image generation.")
+                elif "invalid_request_error" in error_message:
+                    raise ValueError(
+                        "Invalid request for image generation. Please check your input."
+                    )
+
+            raise ValueError(
+                "An error occurred during image generation. Please try again later."
+            )

    def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]:
        prompt = cast(str, kwargs["prompt"])
--- a/backend/danswer/tools/search/search_tool.py
+++ b/backend/danswer/tools/search/search_tool.py
@ -10,7 +10,6 @@ from danswer.chat.chat_utils import llm_doc_from_inference_section
 from danswer.chat.models import DanswerContext
 from danswer.chat.models import DanswerContexts
 from danswer.chat.models import LlmDoc
-from danswer.configs.chat_configs import DISABLE_AGENTIC_SEARCH
 from danswer.db.models import Persona
 from danswer.db.models import User
 from danswer.dynamic_configs.interface import JSON_ro
@ -18,7 +17,9 @@ from danswer.llm.answering.models import DocumentPruningConfig
 from danswer.llm.answering.models import PreviousMessage
 from danswer.llm.answering.models import PromptConfig
 from danswer.llm.answering.prune_and_merge import prune_and_merge_sections
+from danswer.llm.answering.prune_and_merge import prune_sections
 from danswer.llm.interfaces import LLM
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import QueryFlow
 from danswer.search.enums import SearchType
 from danswer.search.models import IndexFilters
@ -78,6 +79,7 @@ class SearchTool(Tool):
        llm: LLM,
        fast_llm: LLM,
        pruning_config: DocumentPruningConfig,
+        evaluation_type: LLMEvaluationType,
        # if specified, will not actually run a search and will instead return these
        # sections. Used when the user selects specific docs to talk to
        selected_sections: list[InferenceSection] | None = None,
@ -85,7 +87,6 @@ class SearchTool(Tool):
        chunks_below: int = 0,
        full_doc: bool = False,
        bypass_acl: bool = False,
-        llm_doc_eval: bool = False,
    ) -> None:
        self.user = user
        self.persona = persona
@ -94,6 +95,7 @@ class SearchTool(Tool):
        self.llm = llm
        self.fast_llm = fast_llm
        self.pruning_config = pruning_config
+        self.evaluation_type = evaluation_type

        self.selected_sections = selected_sections

@ -102,7 +104,6 @@ class SearchTool(Tool):
        self.full_doc = full_doc
        self.bypass_acl = bypass_acl
        self.db_session = db_session
-        self.llm_doc_eval = llm_doc_eval

    @property
    def name(self) -> str:
@ -205,10 +206,12 @@ class SearchTool(Tool):
            question=query,
            document_pruning_config=self.pruning_config,
        )
+
        llm_docs = [
            llm_doc_from_inference_section(section)
            for section in final_context_sections
        ]
+
        yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs)

    def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]:
@ -221,6 +224,7 @@ class SearchTool(Tool):
        search_pipeline = SearchPipeline(
            search_request=SearchRequest(
                query=query,
+                evaluation_type=self.evaluation_type,
                human_selected_filters=(
                    self.retrieval_options.filters if self.retrieval_options else None
                ),
@ -251,7 +255,7 @@ class SearchTool(Tool):
            id=SEARCH_RESPONSE_SUMMARY_ID,
            response=SearchResponseSummary(
                rephrased_query=query,
-                top_sections=search_pipeline.reranked_sections,
+                top_sections=search_pipeline.final_context_sections,
                predicted_flow=search_pipeline.predicted_flow,
                predicted_search=search_pipeline.predicted_search_type,
                final_filters=search_pipeline.search_query.filters,
@ -276,11 +280,11 @@ class SearchTool(Tool):

        yield ToolResponse(
            id=SECTION_RELEVANCE_LIST_ID,
-            response=search_pipeline.relevant_section_indices,
+            response=search_pipeline.section_relevance,
        )

-        final_context_sections = prune_and_merge_sections(
-            sections=search_pipeline.reranked_sections,
+        pruned_sections = prune_sections(
+            sections=search_pipeline.final_context_sections,
            section_relevance_list=search_pipeline.section_relevance_list,
            prompt_config=self.prompt_config,
            llm_config=self.llm.config,
@ -289,17 +293,11 @@ class SearchTool(Tool):
        )

        llm_docs = [
-            llm_doc_from_inference_section(section)
-            for section in final_context_sections
+            llm_doc_from_inference_section(section) for section in pruned_sections
        ]

        yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs)

-        if self.llm_doc_eval and not DISABLE_AGENTIC_SEARCH:
-            yield ToolResponse(
-                id=SEARCH_EVALUATION_ID, response=search_pipeline.relevance_summaries
-            )
-
    def final_result(self, *args: ToolResponse) -> JSON_ro:
        final_docs = cast(
            list[LlmDoc],
--- a/backend/ee/danswer/server/query_and_chat/models.py
+++ b/backend/ee/danswer/server/query_and_chat/models.py
@ -1,6 +1,7 @@
 from pydantic import BaseModel

 from danswer.configs.constants import DocumentSource
+from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import SearchType
 from danswer.search.models import ChunkContext
 from danswer.search.models import RetrievalDetails
@ -21,9 +22,9 @@ class DocumentSearchRequest(ChunkContext):
    search_type: SearchType
    retrieval_options: RetrievalDetails
    recency_bias_multiplier: float = 1.0
+    evaluation_type: LLMEvaluationType
    # This is to forcibly skip (or run) the step, if None it uses the system defaults
    skip_rerank: bool | None = None
-    skip_llm_chunk_filter: bool | None = None


 class BasicCreateChatMessageRequest(ChunkContext):
--- a/backend/ee/danswer/server/query_and_chat/query_backend.py
+++ b/backend/ee/danswer/server/query_and_chat/query_backend.py
@ -1,3 +1,5 @@
+from typing import cast
+
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
@ -9,7 +11,9 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTA
 from danswer.danswerbot.slack.handlers.handle_standard_answers import (
    oneoff_standard_answers,
 )
+from danswer.db.chat import translate_db_search_doc_to_server_search_doc
 from danswer.db.engine import get_session
+from danswer.db.models import SearchDoc
 from danswer.db.models import User
 from danswer.db.persona import get_persona_by_id
 from danswer.llm.answering.prompts.citations_prompt import (
@ -27,6 +31,7 @@ from danswer.search.models import SearchRequest
 from danswer.search.pipeline import SearchPipeline
 from danswer.search.utils import dedupe_documents
 from danswer.search.utils import drop_llm_indices
+from danswer.search.utils import relevant_documents_to_indices
 from danswer.utils.logger import setup_logger
 from ee.danswer.server.query_and_chat.models import DocumentSearchRequest
 from ee.danswer.server.query_and_chat.models import StandardAnswerRequest
@ -63,7 +68,7 @@ def handle_search_request(
            offset=search_request.retrieval_options.offset,
            limit=search_request.retrieval_options.limit,
            skip_rerank=search_request.skip_rerank,
-            skip_llm_chunk_filter=search_request.skip_llm_chunk_filter,
+            evaluation_type=search_request.evaluation_type,
            chunks_above=search_request.chunks_above,
            chunks_below=search_request.chunks_below,
            full_doc=search_request.full_doc,
@ -75,8 +80,7 @@ def handle_search_request(
        bypass_acl=False,
    )
    top_sections = search_pipeline.reranked_sections
-    # If using surrounding context or full doc, this will be empty
-    relevant_section_indices = search_pipeline.relevant_section_indices
+    relevance_sections = search_pipeline.section_relevance
    top_docs = [
        SavedSearchDocWithContent(
            document_id=section.center_chunk.document_id,
@ -105,19 +109,26 @@ def handle_search_request(
    # Deduping happens at the last step to avoid harming quality by dropping content early on
    deduped_docs = top_docs
    dropped_inds = None
+
    if search_request.retrieval_options.dedupe_docs:
        deduped_docs, dropped_inds = dedupe_documents(top_docs)

+    llm_indices = relevant_documents_to_indices(
+        relevance_sections=relevance_sections,
+        search_docs=[
+            translate_db_search_doc_to_server_search_doc(cast(SearchDoc, doc))
+            for doc in deduped_docs
+        ],
+    )
+
    if dropped_inds:
-        relevant_section_indices = drop_llm_indices(
-            llm_indices=relevant_section_indices,
+        llm_indices = drop_llm_indices(
+            llm_indices=llm_indices,
            search_docs=deduped_docs,
            dropped_indices=dropped_inds,
        )

-    return DocumentSearchResponse(
-        top_documents=deduped_docs, llm_indices=relevant_section_indices
-    )
+    return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)


@basic_router.post("/answer-with-quote")
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@ -44,14 +44,13 @@ services:
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - QA_TIMEOUT=${QA_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
-      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
      - BING_API_KEY=${BING_API_KEY:-}
-      - DISABLE_AGENTIC_SEARCH=${DISABLE_AGENTIC_SEARCH:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
      # if set, allows for the use of the token budget system
      - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
      # Enables the use of bedrock models
@ -129,7 +128,6 @@ services:
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - QA_TIMEOUT=${QA_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
-      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
@ -230,7 +228,7 @@ services:
      - INTERNAL_URL=http://api_server:8080
      - WEB_DOMAIN=${WEB_DOMAIN:-}
      - THEME_IS_DARK=${THEME_IS_DARK:-}
-      - DISABLE_AGENTIC_SEARCH=${DISABLE_AGENTIC_SEARCH:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}

      # Enterprise Edition only
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
--- a/deployment/docker_compose/docker-compose.gpu-dev.yml
+++ b/deployment/docker_compose/docker-compose.gpu-dev.yml
@ -41,7 +41,7 @@ services:
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - QA_TIMEOUT=${QA_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
-      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
@ -122,7 +122,7 @@ services:
      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
      - QA_TIMEOUT=${QA_TIMEOUT:-}
      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
-      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
--- a/deployment/docker_compose/env.multilingual.template
+++ b/deployment/docker_compose/env.multilingual.template
@ -24,7 +24,7 @@ NORMALIZE_EMBEDDINGS="True"
 # Use LLM to determine if chunks are relevant to the query
 # May not work well for languages that do not have much training data in the LLM training set
 # If using a common language like Spanish, French, Chinese, etc. this can be kept turned on
-DISABLE_LLM_CHUNK_FILTER="True"
+DISABLE_LLM_DOC_RELEVANCE="True"

 # The default reranking models are English first
 # There are no great quality French/English reranking models currently so turning this off
--- a/deployment/helm/values.yaml
+++ b/deployment/helm/values.yaml
@ -402,7 +402,7 @@ configMap:
  GEN_AI_MAX_TOKENS: ""
  QA_TIMEOUT: "60"
  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_CHUNK_FILTER: ""
+  DISABLE_LLM_DOC_RELEVANCE: ""
  DISABLE_LLM_CHOOSE_SEARCH: ""
  DISABLE_LLM_QUERY_REPHRASE: ""
  # Query Options
--- a/deployment/kubernetes/env-configmap.yaml
+++ b/deployment/kubernetes/env-configmap.yaml
@ -24,7 +24,7 @@ data:
  GEN_AI_MAX_TOKENS: ""
  QA_TIMEOUT: "60"
  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_CHUNK_FILTER: ""
+  DISABLE_LLM_DOC_RELEVANCE: ""
  DISABLE_LLM_CHOOSE_SEARCH: ""
  DISABLE_LLM_QUERY_REPHRASE: ""
  # Query Options
--- a/web/src/app/chat/modal/configuration/AssistantsTab.tsx
+++ b/web/src/app/chat/modal/configuration/AssistantsTab.tsx
@ -94,6 +94,7 @@ const AssistantCard = ({
            ))}
          </div>
        )}
+
        <div className="text-xs text-subtle">
          <span className="font-semibold">Default model:</span>{" "}
          {getDisplayNameForModel(
--- a/web/src/app/search/page.tsx
+++ b/web/src/app/search/page.tsx
@ -36,7 +36,7 @@ import ToggleSearch from "./WrappedSearch";
 import {
  AGENTIC_SEARCH_TYPE_COOKIE_NAME,
  NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN,
-  DISABLE_AGENTIC_SEARCH,
+  DISABLE_LLM_DOC_RELEVANCE,
 } from "@/lib/constants";
 import WrappedSearch from "./WrappedSearch";

@ -206,7 +206,7 @@ export default async function Home() {

      <InstantSSRAutoRefresh />
      <WrappedSearch
-        disabledAgentic={DISABLE_AGENTIC_SEARCH}
+        disabledAgentic={DISABLE_LLM_DOC_RELEVANCE}
        initiallyToggled={toggleSidebar}
        querySessions={querySessions}
        user={user}
--- a/web/src/components/icons/icons.tsx
+++ b/web/src/components/icons/icons.tsx
@ -1711,24 +1711,9 @@ export const ThumbsUpIcon = ({
  size = 16,
  className = defaultTailwindCSS,
 }: IconProps) => {
-  return (
-    <svg
-      style={{ width: `${size}px`, height: `${size}px` }}
-      className={`w-[${size}px] h-[${size}px] ` + className}
-      xmlns="http://www.w3.org/2000/svg"
-      width="200"
-      height="200"
-      viewBox="0 0 20 20"
-    >
-      <path
-        fill="currentColor"
-        fillRule="evenodd"
-        d="M10 2c-2.236 0-4.43.18-6.57.524C1.993 2.755 1 4.014 1 5.426v5.148c0 1.413.993 2.67 2.43 2.902c1.168.188 2.352.327 3.55.414c.28.02.521.18.642.413l1.713 3.293a.75.75 0 0 0 1.33 0l1.713-3.293a.783.783 0 0 1 .642-.413a41.102 41.102 0 0 0 3.55-.414c1.437-.231 2.43-1.49 2.43-2.902V5.426c0-1.413-.993-2.67-2.43-2.902A41.289 41.289 0 0 0 10 2ZM6.75 6a.75.75 0 0 0 0 1.5h6.5a.75.75 0 0 0 0-1.5h-6.5Zm0 2.5a.75.75 0 0 0 0 1.5h3.5a.75.75 0 0 0 0-1.5h-3.5Z"
-        clipRule="evenodd"
-      />
-    </svg>
-  );
+  return <FiThumbsUp size={size} className={className} />;
 };
+
 export const RobotIcon = ({
  size = 16,
  className = defaultTailwindCSS,
--- a/web/src/components/search/DateRangeSelector.tsx
+++ b/web/src/components/search/DateRangeSelector.tsx
@ -38,9 +38,11 @@ export const TODAY = "Today";
 export function DateRangeSelector({
  value,
  onValueChange,
+  isHoritontal,
 }: {
  value: DateRangePickerValue | null;
  onValueChange: (value: DateRangePickerValue | null) => void;
+  isHoritontal?: boolean;
 }) {
  return (
    <div>
@ -106,6 +108,7 @@ export function DateRangeSelector({
              flex 
              text-sm  
              px-3
+              line-clamp-1
              py-1.5 
              rounded-lg 
              border 
@ -113,12 +116,16 @@ export function DateRangeSelector({
              cursor-pointer 
              hover:bg-hover`}
        >
-          <FiCalendar className="my-auto mr-2" />{" "}
-          {value?.selectValue ? (
-            <div className="text-emphasis">{value.selectValue}</div>
-          ) : (
-            "Any time..."
-          )}
+          <FiCalendar className="flex-none my-auto mr-2" />{" "}
+          <p className="line-clamp-1">
+            {value?.selectValue ? (
+              <div className="text-emphasis">{value.selectValue}</div>
+            ) : isHoritontal ? (
+              "Date"
+            ) : (
+              "Any time..."
+            )}
+          </p>
          {value?.selectValue ? (
            <div
              className="my-auto ml-auto p-0.5 rounded-full w-fit"
--- a/web/src/components/search/DocumentDisplay.tsx
+++ b/web/src/components/search/DocumentDisplay.tsx
@ -16,7 +16,7 @@ import { BookIcon, CheckmarkIcon, LightBulbIcon, XIcon } from "../icons/icons";

 import { FaStar } from "react-icons/fa";
 import { FiTag } from "react-icons/fi";
-import { DISABLE_AGENTIC_SEARCH } from "@/lib/constants";
+import { DISABLE_LLM_DOC_RELEVANCE } from "@/lib/constants";
 import { SettingsContext } from "../settings/SettingsProvider";

 export const buildDocumentSummaryDisplay = (
--- a/web/src/components/search/SearchBar.tsx
+++ b/web/src/components/search/SearchBar.tsx
@ -1,15 +1,18 @@
 import React, { KeyboardEvent, ChangeEvent, useContext } from "react";
-import { searchState } from "./SearchSection";

 import { MagnifyingGlass } from "@phosphor-icons/react";
-
 interface FullSearchBarProps {
  query: string;
  setQuery: (query: string) => void;
  onSearch: (fast?: boolean) => void;
-  searchState?: searchState;
  agentic?: boolean;
  toggleAgentic?: () => void;
+  ccPairs: CCPairBasicInfo[];
+  documentSets: DocumentSet[];
+  filterManager: any; // You might want to replace 'any' with a more specific type
+  finalAvailableDocumentSets: DocumentSet[];
+  finalAvailableSources: string[];
+  tags: Tag[];
 }

 import { useState, useEffect, useRef } from "react";
@ -18,6 +21,9 @@ import { Divider } from "@tremor/react";
 import { CustomTooltip } from "../tooltip/CustomTooltip";
 import KeyboardSymbol from "@/lib/browserUtilities";
 import { SettingsContext } from "../settings/SettingsProvider";
+import { HorizontalSourceSelector, SourceSelector } from "./filtering/Filters";
+import { CCPairBasicInfo, DocumentSet, Tag } from "@/lib/types";
+import { SourceMetadata } from "@/lib/search/interfaces";

 export const AnimatedToggle = ({
  isOn,
@ -116,12 +122,17 @@ export const AnimatedToggle = ({
 export default AnimatedToggle;

 export const FullSearchBar = ({
-  searchState,
  query,
  setQuery,
  onSearch,
  agentic,
  toggleAgentic,
+  ccPairs,
+  documentSets,
+  filterManager,
+  finalAvailableDocumentSets,
+  finalAvailableSources,
+  tags,
 }: FullSearchBarProps) => {
  const handleChange = (event: ChangeEvent<HTMLTextAreaElement>) => {
    const target = event.target;
@ -196,47 +207,44 @@ export const FullSearchBar = ({
        suppressContentEditableWarning={true}
      />

-      <div className="flex justify-end w-full items-center space-x-3 mr-12 px-4 pb-2">
-        {searchState == "searching" && (
-          <div key={"Reading"} className="mr-auto relative inline-block">
-            <span className="loading-text">Searching...</span>
-          </div>
-        )}
-
-        {searchState == "reading" && (
-          <div key={"Reading"} className="mr-auto relative inline-block">
-            <span className="loading-text">
-              Reading{settings?.isMobile ? "" : " Documents"}...
-            </span>
-          </div>
-        )}
-
-        {searchState == "analyzing" && (
-          <div key={"Generating"} className="mr-auto relative inline-block">
-            <span className="loading-text">
-              Generating{settings?.isMobile ? "" : " Analysis"}...
-            </span>
-          </div>
-        )}
-
-        {toggleAgentic && (
-          <AnimatedToggle isOn={agentic!} handleToggle={toggleAgentic} />
-        )}
-
-        <div className="my-auto pl-2">
-          <button
-            onClick={() => {
-              onSearch(agentic);
-            }}
-            className="flex my-auto cursor-pointer"
-          >
-            <SendIcon
-              size={28}
-              className={`text-emphasis text-white p-1 rounded-full ${
-                query ? "bg-background-800" : "bg-[#D7D7D7]"
-              }`}
+      <div
+        className={`flex 2xl:justify-end justify-between w-full items-center space-x-3 px-4 pb-2`}
+      >
+        {/* <div className="absolute z-10 mobile:px-4 mobile:max-w-searchbar-max mobile:w-[90%] top-12 desktop:left-0 hidden 2xl:block mobile:left-1/2 mobile:transform mobile:-translate-x-1/2 desktop:w-52 3xl:w-64"> */}
+        <div className="2xl:hidden">
+          {(ccPairs.length > 0 || documentSets.length > 0) && (
+            <HorizontalSourceSelector
+              isHorizontal
+              {...filterManager}
+              showDocSidebar={false}
+              availableDocumentSets={finalAvailableDocumentSets}
+              existingSources={finalAvailableSources}
+              availableTags={tags}
            />
-          </button>
+          )}
+        </div>
+        {/* ccPairs, documentSets, filterManager, finalAvailableDocumentSets, finalAvailableSources, tags */}
+        {/* </div>/ */}
+        <div className="flex my-auto gap-x-3">
+          {toggleAgentic && (
+            <AnimatedToggle isOn={agentic!} handleToggle={toggleAgentic} />
+          )}
+
+          <div className="my-auto pl-2">
+            <button
+              onClick={() => {
+                onSearch(agentic);
+              }}
+              className="flex my-auto cursor-pointer"
+            >
+              <SendIcon
+                size={28}
+                className={`text-emphasis text-white p-1 rounded-full ${
+                  query ? "bg-background-800" : "bg-[#D7D7D7]"
+                }`}
+              />
+            </button>
+          </div>
        </div>
      </div>
      <div className="absolute bottom-2.5 right-10"></div>
--- a/web/src/components/search/SearchResultsDisplay.tsx
+++ b/web/src/components/search/SearchResultsDisplay.tsx
@ -1,24 +1,19 @@
 "use client";

-import { removeDuplicateDocs } from "@/lib/documentUtils";
 import {
-  DanswerDocument,
  DocumentRelevance,
-  FlowType,
-  Quote,
-  Relevance,
  SearchDanswerDocument,
  SearchDefaultOverrides,
  SearchResponse,
-  ValidQuestionResponse,
 } from "@/lib/search/interfaces";
 import { usePopup } from "../admin/connectors/Popup";
 import { AlertIcon, BroomIcon, UndoIcon } from "../icons/icons";
 import { AgenticDocumentDisplay, DocumentDisplay } from "./DocumentDisplay";
 import { searchState } from "./SearchSection";
-import { useEffect, useState } from "react";
+import { useContext, useEffect, useState } from "react";
 import { Tooltip } from "../tooltip/Tooltip";
 import KeyboardSymbol from "@/lib/browserUtilities";
+import { SettingsContext } from "../settings/SettingsProvider";

 const getSelectedDocumentIds = (
  documents: SearchDanswerDocument[],
@ -135,31 +130,17 @@ export const SearchResultsDisplay = ({
    );
  }

-  const dedupedQuotes: Quote[] = [];
-  const seen = new Set<string>();
-  if (quotes) {
-    quotes.forEach((quote) => {
-      if (!seen.has(quote.document_id)) {
-        dedupedQuotes.push(quote);
-        seen.add(quote.document_id);
-      }
-    });
-  }
-
  const selectedDocumentIds = getSelectedDocumentIds(
    documents || [],
    searchResponse.selectedDocIndices || []
  );
-
  const relevantDocs = documents
    ? documents.filter((doc) => {
        return (
          showAll ||
          (searchResponse &&
            searchResponse.additional_relevance &&
-            searchResponse.additional_relevance[
-              `${doc.document_id}-${doc.chunk_ind}`
-            ].relevant) ||
+            searchResponse.additional_relevance[doc.document_id].relevant) ||
          doc.is_relevant
        );
      })
@ -183,6 +164,7 @@ export const SearchResultsDisplay = ({
  return (
    <>
      {popup}
+
      {documents && documents.length == 0 && (
        <p className="flex text-lg font-bold">
          No docs found! Ensure that you have enabled at least one connector
@ -248,9 +230,7 @@ export const SearchResultsDisplay = ({
          {uniqueDocuments.map((document, ind) => {
            const relevance: DocumentRelevance | null =
              searchResponse.additional_relevance
-                ? searchResponse.additional_relevance[
-                    `${document.document_id}-${document.chunk_ind}`
-                  ]
+                ? searchResponse.additional_relevance[document.document_id]
                : null;

            return agenticResults ? (
--- a/web/src/components/search/SearchSection.tsx
+++ b/web/src/components/search/SearchSection.tsx
@ -17,13 +17,11 @@ import {
  SearchDanswerDocument,
 } from "@/lib/search/interfaces";
 import { searchRequestStreamed } from "@/lib/search/streamingQa";
-
 import { CancellationToken, cancellable } from "@/lib/search/cancellable";
 import { useFilters, useObjectState } from "@/lib/hooks";
-import { questionValidationStreamed } from "@/lib/search/streamingQuestionValidation";
 import { Persona } from "@/app/admin/assistants/interfaces";
 import { computeAvailableFilters } from "@/lib/filters";
-import { redirect, useRouter, useSearchParams } from "next/navigation";
+import { useRouter, useSearchParams } from "next/navigation";
 import { SettingsContext } from "../settings/SettingsProvider";
 import { HistorySidebar } from "@/app/chat/sessionSidebar/HistorySidebar";
 import { ChatSession, SearchSession } from "@/app/chat/interfaces";
@ -33,13 +31,19 @@ import { SIDEBAR_TOGGLED_COOKIE_NAME } from "../resizable/constants";
 import { AGENTIC_SEARCH_TYPE_COOKIE_NAME } from "@/lib/constants";
 import Cookies from "js-cookie";
 import FixedLogo from "@/app/chat/shared_chat_search/FixedLogo";
+import { AnswerSection } from "./results/AnswerSection";
+import { QuotesSection } from "./results/QuotesSection";
+import { QAFeedbackBlock } from "./QAFeedback";
+import { usePopup } from "../admin/connectors/Popup";

 export type searchState =
  | "input"
  | "searching"
  | "reading"
  | "analyzing"
-  | "summarizing";
+  | "summarizing"
+  | "generating"
+  | "citing";

 const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
  forceDisplayQA: false,
@ -48,7 +52,6 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {

 const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
  reasoning: null,
-  answerable: null,
  error: null,
 };

@ -223,35 +226,48 @@ export const SearchSection = ({
    additional_relevance: undefined,
  };
  // Streaming updates
-  const updateCurrentAnswer = (answer: string) =>
+  const updateCurrentAnswer = (answer: string) => {
    setSearchResponse((prevState) => ({
      ...(prevState || initialSearchResponse),
      answer,
    }));
-  const updateQuotes = (quotes: Quote[]) =>
+
+    setSearchState((searchState) => {
+      if (searchState != "input") {
+        return "generating";
+      }
+      return "input";
+    });
+  };
+
+  const updateQuotes = (quotes: Quote[]) => {
    setSearchResponse((prevState) => ({
      ...(prevState || initialSearchResponse),
      quotes,
    }));
+    setSearchState((searchState) => "input");
+  };

  const updateDocs = (documents: SearchDanswerDocument[]) => {
-    setTimeout(() => {
-      setSearchState((searchState) => {
-        if (searchState != "input") {
-          return "reading";
-        }
-        return "input";
-      });
-    }, 1500);
+    if (agentic) {
+      setTimeout(() => {
+        setSearchState((searchState) => {
+          if (searchState != "input") {
+            return "reading";
+          }
+          return "input";
+        });
+      }, 1500);

-    setTimeout(() => {
-      setSearchState((searchState) => {
-        if (searchState != "input") {
-          return "analyzing";
-        }
-        return "input";
-      });
-    }, 4500);
+      setTimeout(() => {
+        setSearchState((searchState) => {
+          if (searchState != "input") {
+            return "analyzing";
+          }
+          return "input";
+        });
+      }, 4500);
+    }

    setSearchResponse((prevState) => ({
      ...(prevState || initialSearchResponse),
@ -294,8 +310,9 @@ export const SearchSection = ({
      messageId,
    }));
    router.refresh();
-    setSearchState("input");
+    // setSearchState("input");
    setIsFetching(false);
+    setSearchState((searchState) => "input");

    // router.replace(`/search?searchId=${chat_session_id}`);
  };
@ -309,7 +326,11 @@ export const SearchSection = ({
    setContentEnriched(true);

    setIsFetching(false);
-    setSearchState("input");
+    if (disabledAgentic) {
+      setSearchState("input");
+    } else {
+      setSearchState("analyzing");
+    }
  };

  const updateComments = (comments: any) => {
@ -317,7 +338,9 @@ export const SearchSection = ({
  };

  const finishedSearching = () => {
-    setSearchState("input");
+    if (disabledAgentic) {
+      setSearchState("input");
+    }
  };

  const resetInput = () => {
@ -414,15 +437,7 @@ export const SearchSection = ({
      offset: offset ?? defaultOverrides.offset,
    };

-    const questionValidationArgs = {
-      query,
-      update: setValidQuestionResponse,
-    };
-
-    await Promise.all([
-      searchRequestStreamed(searchFnArgs),
-      questionValidationStreamed(questionValidationArgs),
-    ]);
+    await Promise.all([searchRequestStreamed(searchFnArgs)]);
  };

  // handle redirect if search page is disabled
@ -481,6 +496,20 @@ export const SearchSection = ({
    setShowDocSidebar,
    mobile: settings?.isMobile,
  });
+  const { answer, quotes, documents, error, messageId } = searchResponse;
+
+  const dedupedQuotes: Quote[] = [];
+  const seen = new Set<string>();
+  if (quotes) {
+    quotes.forEach((quote) => {
+      if (!seen.has(quote.document_id)) {
+        dedupedQuotes.push(quote);
+        seen.add(quote.document_id);
+      }
+    });
+  }
+
+  const { popup, setPopup } = usePopup();

  return (
    <>
@ -600,15 +629,113 @@ export const SearchSection = ({
                        disabledAgentic ? undefined : toggleAgentic
                      }
                      agentic={agentic}
-                      searchState={searchState}
                      query={query}
                      setQuery={setQuery}
                      onSearch={async (agentic?: boolean) => {
                        setDefaultOverrides(SEARCH_DEFAULT_OVERRIDES_START);
                        await onSearch({ agentic, offset: 0 });
                      }}
+                      finalAvailableDocumentSets={finalAvailableDocumentSets}
+                      finalAvailableSources={finalAvailableSources}
+                      filterManager={filterManager}
+                      documentSets={documentSets}
+                      ccPairs={ccPairs}
+                      tags={tags}
                    />
                  </div>
+                  {!firstSearch && (
+                    <div className="my-4 min-h-[16rem] p-4 border-2 border-border rounded-lg relative">
+                      <div>
+                        <div className="flex gap-x-2 mb-1">
+                          <h2 className="text-emphasis font-bold my-auto mb-1 ">
+                            AI Answer
+                          </h2>
+
+                          {searchState == "generating" && (
+                            <div
+                              key={"generating"}
+                              className="relative inline-block"
+                            >
+                              <span className="loading-text">
+                                Generating response...
+                              </span>
+                            </div>
+                          )}
+
+                          {searchState == "citing" && (
+                            <div
+                              key={"citing"}
+                              className="relative inline-block"
+                            >
+                              <span className="loading-text">
+                                Generating citations...
+                              </span>
+                            </div>
+                          )}
+
+                          {searchState == "searching" && (
+                            <div
+                              key={"Reading"}
+                              className="relative inline-block"
+                            >
+                              <span className="loading-text">Searching...</span>
+                            </div>
+                          )}
+
+                          {searchState == "reading" && (
+                            <div
+                              key={"Reading"}
+                              className="relative inline-block"
+                            >
+                              <span className="loading-text">
+                                Reading{settings?.isMobile ? "" : " Documents"}
+                                ...
+                              </span>
+                            </div>
+                          )}
+
+                          {searchState == "analyzing" && (
+                            <div
+                              key={"Generating"}
+                              className="relative inline-block"
+                            >
+                              <span className="loading-text">
+                                Generating
+                                {settings?.isMobile ? "" : " Analysis"}...
+                              </span>
+                            </div>
+                          )}
+                        </div>
+
+                        <div className="mb-2 pt-1 border-t border-border w-full">
+                          <AnswerSection
+                            answer={answer}
+                            quotes={quotes}
+                            error={error}
+                            isFetching={isFetching}
+                          />
+                        </div>
+
+                        {quotes !== null && answer && (
+                          <div className="pt-1 border-t border-border w-full">
+                            <QuotesSection
+                              quotes={dedupedQuotes}
+                              isFetching={isFetching}
+                            />
+
+                            {searchResponse.messageId !== null && (
+                              <div className="absolute right-3 bottom-3">
+                                <QAFeedbackBlock
+                                  messageId={searchResponse.messageId}
+                                  setPopup={setPopup}
+                                />
+                              </div>
+                            )}
+                          </div>
+                        )}
+                      </div>
+                    </div>
+                  )}

                  {!settings?.isMobile && (
                    <div className="mt-6">
--- a/web/src/components/search/filtering/FilterDropdown.tsx
+++ b/web/src/components/search/filtering/FilterDropdown.tsx
@ -6,22 +6,23 @@ interface Option {
  display: string | JSX.Element;
  displayName?: string;
 }
-
 export function FilterDropdown({
  options,
  selected,
  handleSelect,
  icon,
  defaultDisplay,
+  width = "w-64",
 }: {
  options: Option[];
  selected: string[];
  handleSelect: (option: Option) => void;
  icon: JSX.Element;
  defaultDisplay: string | JSX.Element;
+  width?: string;
 }) {
  return (
-    <div className="w-64">
+    <div>
      <CustomDropdown
        dropdown={
          <div
@ -32,7 +33,7 @@ export function FilterDropdown({
          bg-background
          flex 
          flex-col 
-          w-64 
+          ${width}
          max-h-96 
          overflow-y-auto 
          overscroll-contain`}
@ -76,7 +77,7 @@ export function FilterDropdown({
        <div
          className={`
        flex 
-        w-64
+        ${width}
        text-sm 
        px-3
        py-1.5 
--- a/web/src/components/search/filtering/Filters.tsx
+++ b/web/src/components/search/filtering/Filters.tsx
@ -3,7 +3,14 @@ import { DocumentSet, Tag, ValidSources } from "@/lib/types";
 import { SourceMetadata } from "@/lib/search/interfaces";
 import { InfoIcon, defaultTailwindCSS } from "../../icons/icons";
 import { HoverPopup } from "../../HoverPopup";
-import { FiBook, FiBookmark, FiFilter, FiMap, FiX } from "react-icons/fi";
+import {
+  FiBook,
+  FiBookmark,
+  FiFilter,
+  FiMap,
+  FiTag,
+  FiX,
+} from "react-icons/fi";
 import { DateRangeSelector } from "../DateRangeSelector";
 import { DateRangePickerValue } from "@tremor/react";
 import { FilterDropdown } from "./FilterDropdown";
@ -72,9 +79,9 @@ export function SourceSelector({
    <div
      className={`hidden ${
        showDocSidebar ? "4xl:block" : "!block"
-      } duration-1000 ease-out transition-all transform origin-top-right`}
+      } duration-1000 flex  ease-out transition-all transform origin-top-right`}
    >
-      <div className="flex mb-4 pb-2 border-b border-border text-emphasis">
+      <div className=" mb-4 pb-2 border-b border-border text-emphasis">
        <h2 className="font-bold my-auto">Filters</h2>
        <FiFilter className="my-auto ml-2" size="16" />
      </div>
@ -324,3 +331,184 @@ export function HorizontalFilters({
    </div>
  );
 }
+
+export function HorizontalSourceSelector({
+  timeRange,
+  setTimeRange,
+  selectedSources,
+  setSelectedSources,
+  selectedDocumentSets,
+  setSelectedDocumentSets,
+  selectedTags,
+  setSelectedTags,
+  availableDocumentSets,
+  existingSources,
+  availableTags,
+}: SourceSelectorProps) {
+  const handleSourceSelect = (source: SourceMetadata) => {
+    setSelectedSources((prev: SourceMetadata[]) => {
+      if (prev.map((s) => s.internalName).includes(source.internalName)) {
+        return prev.filter((s) => s.internalName !== source.internalName);
+      } else {
+        return [...prev, source];
+      }
+    });
+  };
+
+  const handleDocumentSetSelect = (documentSetName: string) => {
+    setSelectedDocumentSets((prev: string[]) => {
+      if (prev.includes(documentSetName)) {
+        return prev.filter((s) => s !== documentSetName);
+      } else {
+        return [...prev, documentSetName];
+      }
+    });
+  };
+
+  const handleTagSelect = (tag: Tag) => {
+    setSelectedTags((prev: Tag[]) => {
+      if (
+        prev.some(
+          (t) => t.tag_key === tag.tag_key && t.tag_value === tag.tag_value
+        )
+      ) {
+        return prev.filter(
+          (t) => !(t.tag_key === tag.tag_key && t.tag_value === tag.tag_value)
+        );
+      } else {
+        return [...prev, tag];
+      }
+    });
+  };
+
+  return (
+    <div className="flex flex-col space-y-4">
+      <div className="flex space-x-2">
+        <div className="w-24">
+          <DateRangeSelector
+            isHoritontal
+            value={timeRange}
+            onValueChange={setTimeRange}
+          />
+        </div>
+
+        {existingSources.length > 0 && (
+          <FilterDropdown
+            options={listSourceMetadata()
+              .filter((source) => existingSources.includes(source.internalName))
+              .map((source) => ({
+                key: source.internalName,
+                display: (
+                  <>
+                    <SourceIcon
+                      sourceType={source.internalName}
+                      iconSize={16}
+                    />
+                    <span className="ml-2 text-sm">{source.displayName}</span>
+                  </>
+                ),
+              }))}
+            selected={selectedSources.map((source) => source.internalName)}
+            handleSelect={(option) =>
+              handleSourceSelect(
+                listSourceMetadata().find((s) => s.internalName === option.key)!
+              )
+            }
+            icon={<FiMap size={16} />}
+            defaultDisplay="Sources"
+            width="w-fit max-w-24 ellipsis truncate"
+          />
+        )}
+
+        {availableDocumentSets.length > 0 && (
+          <FilterDropdown
+            options={availableDocumentSets.map((documentSet) => ({
+              key: documentSet.name,
+              display: (
+                <>
+                  <FiBookmark />
+                  <span className="ml-2 text-sm">{documentSet.name}</span>
+                </>
+              ),
+            }))}
+            selected={selectedDocumentSets}
+            handleSelect={(option) => handleDocumentSetSelect(option.key)}
+            icon={<FiBook size={16} />}
+            defaultDisplay="Sets"
+            width="w-fit max-w-24 ellipsis"
+          />
+        )}
+
+        {availableTags.length > 0 && (
+          <FilterDropdown
+            options={availableTags.map((tag) => ({
+              key: `${tag.tag_key}=${tag.tag_value}`,
+              display: (
+                <span className="text-sm">
+                  {tag.tag_key}
+                  <b>=</b>
+                  {tag.tag_value}
+                </span>
+              ),
+            }))}
+            selected={selectedTags.map(
+              (tag) => `${tag.tag_key}=${tag.tag_value}`
+            )}
+            handleSelect={(option) => {
+              const [tag_key, tag_value] = option.key.split("=");
+              const selectedTag = availableTags.find(
+                (tag) => tag.tag_key === tag_key && tag.tag_value === tag_value
+              );
+              if (selectedTag) {
+                handleTagSelect(selectedTag);
+              }
+            }}
+            icon={<FiTag size={16} />}
+            defaultDisplay="Tags"
+            width="w-fit max-w-24 ellipsis"
+          />
+        )}
+      </div>
+
+      {/* <div className="flex flex-wrap gap-2">
+        {timeRange && timeRange.selectValue && (
+          <SelectedBubble onClick={() => setTimeRange(null)}>
+            <div className="text-sm flex">{timeRange.selectValue}</div>
+          </SelectedBubble>
+        )}
+        {selectedSources.map((source) => (
+          <SelectedBubble
+            key={source.internalName}
+            onClick={() => handleSourceSelect(source)}
+          >
+            <>
+              <SourceIcon sourceType={source.internalName} iconSize={16} />
+              <span className="ml-2 text-sm">{source.displayName}</span>
+            </>
+          </SelectedBubble>
+        ))}
+        {selectedDocumentSets.map((documentSetName) => (
+          <SelectedBubble
+            key={documentSetName}
+            onClick={() => handleDocumentSetSelect(documentSetName)}
+          >
+            <>
+              <FiBookmark />
+              <span className="ml-2 text-sm">{documentSetName}</span>
+            </>
+          </SelectedBubble>
+        ))}
+        {selectedTags.map((tag) => (
+          <SelectedBubble
+            key={`${tag.tag_key}=${tag.tag_value}`}
+            onClick={() => handleTagSelect(tag)}
+          >
+            <span className="text-sm">
+              {tag.tag_key}<b>=</b>{tag.tag_value}
+            </span>
+          </SelectedBubble>
+        ))}
+      </div> */}
+    </div>
+  );
+}
--- a/web/src/components/search/results/AnswerSection.tsx
+++ b/web/src/components/search/results/AnswerSection.tsx
@ -26,31 +26,28 @@ interface AnswerSectionProps {
  answer: string | null;
  quotes: Quote[] | null;
  error: string | null;
-  nonAnswerableReason: string | null;
  isFetching: boolean;
 }

 export const AnswerSection = (props: AnswerSectionProps) => {
  let status = "in-progress" as StatusOptions;
-  let header = <>Building answer...</>;
+  let header = <></>;
  let body = null;

  // finished answer
  if (props.quotes !== null || !props.isFetching) {
    status = "success";
-    header = <>AI answer</>;
-    if (props.answer) {
-      body = (
-        <ReactMarkdown
-          className="prose text-sm max-w-full"
-          remarkPlugins={[remarkGfm]}
-        >
-          {replaceNewlines(props.answer)}
-        </ReactMarkdown>
-      );
-    } else {
-      body = <div>Information not found</div>;
-    }
+    header = <></>;
+
+    body = (
+      <ReactMarkdown
+        className="prose text-sm max-w-full"
+        remarkPlugins={[remarkGfm]}
+      >
+        {replaceNewlines(props.answer || "")}
+      </ReactMarkdown>
+    );
+
    // error while building answer (NOTE: if error occurs during quote generation
    // the above if statement will hit and the error will not be displayed)
  } else if (props.error) {
@ -64,7 +61,7 @@ export const AnswerSection = (props: AnswerSectionProps) => {
    // answer is streaming
  } else if (props.answer) {
    status = "success";
-    header = <>AI answer</>;
+    header = <></>;
    body = (
      <ReactMarkdown
        className="prose text-sm max-w-full"
@ -74,10 +71,6 @@ export const AnswerSection = (props: AnswerSectionProps) => {
      </ReactMarkdown>
    );
  }
-  if (props.nonAnswerableReason) {
-    status = "warning";
-    header = <>Building best effort AI answer...</>;
-  }

  return (
    <ResponseSection
@ -87,20 +80,7 @@ export const AnswerSection = (props: AnswerSectionProps) => {
          <div className="ml-2 text-strong">{header}</div>
        </div>
      }
-      body={
-        <div className="">
-          {body}
-          {props.nonAnswerableReason && !props.isFetching && (
-            <div className="mt-4 text-sm">
-              <b className="font-medium">Warning:</b> the AI did not think this
-              question was answerable.{" "}
-              <div className="italic mt-1 ml-2">
-                {props.nonAnswerableReason}
-              </div>
-            </div>
-          )}
-        </div>
-      }
+      body={<div className="">{body}</div>}
      desiredOpenStatus={true}
      isNotControllable={true}
    />
--- a/web/src/components/search/results/QuotesSection.tsx
+++ b/web/src/components/search/results/QuotesSection.tsx
@ -65,7 +65,6 @@ const QuoteDisplay = ({ quoteInfo }: { quoteInfo: Quote }) => {

 interface QuotesSectionProps {
  quotes: Quote[] | null;
-  isAnswerable: boolean | null;
  isFetching: boolean;
 }

@ -110,11 +109,7 @@ export const QuotesSection = (props: QuotesSectionProps) => {
  let status: StatusOptions = "in-progress";
  if (!props.isFetching) {
    if (props.quotes && props.quotes.length > 0) {
-      if (props.isAnswerable === false) {
-        status = "warning";
-      } else {
-        status = "success";
-      }
+      status = "success";
    } else {
      status = "failed";
    }
--- a/web/src/components/search/results/ResponseSection.tsx
+++ b/web/src/components/search/results/ResponseSection.tsx
@ -7,6 +7,7 @@ import {
 } from "@/components/icons/icons";
 import { useState } from "react";
 import { Grid } from "react-loader-spinner";
+import { searchState } from "../SearchSection";

 export type StatusOptions = "in-progress" | "failed" | "warning" | "success";

@ -31,26 +32,13 @@ export const ResponseSection = ({

  let icon = null;
  if (status === "in-progress") {
-    icon = (
-      <div className="m-auto">
-        <Grid
-          height="12"
-          width="12"
-          color="#3b82f6"
-          ariaLabel="grid-loading"
-          radius="12.5"
-          wrapperStyle={{}}
-          wrapperClass=""
-          visible={true}
-        />
-      </div>
-    );
+    icon = <></>;
  }
  if (status === "failed") {
    icon = <AlertIcon size={16} className="text-red-500" />;
  }
  if (status === "success") {
-    icon = <CheckmarkIcon size={16} className="text-green-600" />;
+    icon = <></>;
  }
  if (status === "warning") {
    icon = <TriangleAlertIcon size={16} className="text-yellow-600" />;
--- a/web/src/lib/constants.ts
+++ b/web/src/lib/constants.ts
@ -52,5 +52,5 @@ export const CUSTOM_ANALYTICS_ENABLED = process.env.CUSTOM_ANALYTICS_SECRET_KEY
  ? true
  : false;

-export const DISABLE_AGENTIC_SEARCH =
-  process.env.DISABLE_AGENTIC_SEARCH?.toLowerCase() === "true";
+export const DISABLE_LLM_DOC_RELEVANCE =
+  process.env.DISABLE_LLM_DOC_RELEVANCE?.toLowerCase() === "true";
--- a/web/src/lib/search/interfaces.ts
+++ b/web/src/lib/search/interfaces.ts
@ -158,7 +158,6 @@ export interface SearchRequestOverrides {
 }

 export interface ValidQuestionResponse {
-  answerable: boolean | null;
  reasoning: string | null;
  error: string | null;
 }
--- a/web/src/lib/search/streamingQa.ts
+++ b/web/src/lib/search/streamingQa.ts
@ -61,8 +61,7 @@ export const searchRequestStreamed = async ({
          filters: filters,
          enable_auto_detect_filters: false,
        },
-        llm_doc_eval: true,
-        skip_gen_ai_answer_generation: true,
+        evaluation_type: agentic ? "agentic" : "basic",
      }),
      headers: {
        "Content-Type": "application/json",
--- a/web/src/lib/search/streamingQuestionValidation.ts
+++ b/web/src/lib/search/streamingQuestionValidation.ts
@ -1,66 +0,0 @@
-import {
-  AnswerPiecePacket,
-  ErrorMessagePacket,
-  ValidQuestionResponse,
-} from "./interfaces";
-import { processRawChunkString } from "./streamingUtils";
-
-export interface QuestionValidationArgs {
-  query: string;
-  update: (update: Partial<ValidQuestionResponse>) => void;
-}
-
-export const questionValidationStreamed = async <T>({
-  query,
-  update,
-}: QuestionValidationArgs) => {
-  const response = await fetch("/api/query/stream-query-validation", {
-    method: "POST",
-    body: JSON.stringify({
-      query,
-    }),
-    headers: {
-      "Content-Type": "application/json",
-    },
-  });
-  const reader = response.body?.getReader();
-  const decoder = new TextDecoder("utf-8");
-
-  let reasoning = "";
-  let previousPartialChunk: string | null = null;
-  while (true) {
-    const rawChunk = await reader?.read();
-    if (!rawChunk) {
-      throw new Error("Unable to process chunk");
-    }
-    const { done, value } = rawChunk;
-    if (done) {
-      break;
-    }
-
-    const [completedChunks, partialChunk] = processRawChunkString<
-      AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket
-    >(decoder.decode(value, { stream: true }), previousPartialChunk);
-    if (!completedChunks.length && !partialChunk) {
-      break;
-    }
-    previousPartialChunk = partialChunk as string | null;
-
-    completedChunks.forEach((chunk) => {
-      if (Object.hasOwn(chunk, "answer_piece")) {
-        reasoning += (chunk as AnswerPiecePacket).answer_piece;
-        update({
-          reasoning,
-        });
-      }
-
-      if (Object.hasOwn(chunk, "answerable")) {
-        update({ answerable: (chunk as ValidQuestionResponse).answerable });
-      }
-
-      if (Object.hasOwn(chunk, "error")) {
-        update({ error: (chunk as ErrorMessagePacket).error });
-      }
-    });
-  }
-};