diff --git a/backend/ee/onyx/server/query_and_chat/chat_backend.py b/backend/ee/onyx/server/query_and_chat/chat_backend.py
index a2c08b543bf..2d96d0739e0 100644
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -14,7 +14,6 @@ from ee.onyx.server.query_and_chat.models import (
     BasicCreateChatMessageWithHistoryRequest,
 )
 from ee.onyx.server.query_and_chat.models import ChatBasicResponse
-from ee.onyx.server.query_and_chat.models import SimpleDoc
 from onyx.auth.users import current_user
 from onyx.chat.chat_utils import combine_message_thread
 from onyx.chat.chat_utils import create_chat_chain
@@ -56,25 +55,6 @@ logger = setup_logger()
 router = APIRouter(prefix="/chat")
 
 
-def _translate_doc_response_to_simple_doc(
-    doc_response: QADocsResponse,
-) -> list[SimpleDoc]:
-    return [
-        SimpleDoc(
-            id=doc.document_id,
-            semantic_identifier=doc.semantic_identifier,
-            link=doc.link,
-            blurb=doc.blurb,
-            match_highlights=[
-                highlight for highlight in doc.match_highlights if highlight
-            ],
-            source_type=doc.source_type,
-            metadata=doc.metadata,
-        )
-        for doc in doc_response.top_documents
-    ]
-
-
 def _get_final_context_doc_indices(
     final_context_docs: list[LlmDoc] | None,
     top_docs: list[SavedSearchDoc] | None,
@@ -111,9 +91,6 @@ def _convert_packet_stream_to_response(
         elif isinstance(packet, QADocsResponse):
             response.top_documents = packet.top_documents
 
-            # TODO: deprecate `simple_search_docs`
-            response.simple_search_docs = _translate_doc_response_to_simple_doc(packet)
-
             # This is a no-op if agent_sub_questions hasn't already been filled
             if packet.level is not None and packet.level_question_num is not None:
                 id = (packet.level, packet.level_question_num)
diff --git a/backend/ee/onyx/server/query_and_chat/models.py b/backend/ee/onyx/server/query_and_chat/models.py
index 9268ac3439a..505624ac3ab 100644
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -163,8 +163,6 @@ class ChatBasicResponse(BaseModel):
     cited_documents: dict[int, str] | None = None
 
     # FOR BACKWARDS COMPATIBILITY
-    # TODO: deprecate both of these
-    simple_search_docs: list[SimpleDoc] | None = None
     llm_chunks_indices: list[int] | None = None
 
     # agentic fields
diff --git a/backend/onyx/chat/prune_and_merge.py b/backend/onyx/chat/prune_and_merge.py
index 284a1e193fe..83f847e09be 100644
--- a/backend/onyx/chat/prune_and_merge.py
+++ b/backend/onyx/chat/prune_and_merge.py
@@ -301,6 +301,10 @@ def prune_sections(
 
 
 def _merge_doc_chunks(chunks: list[InferenceChunk]) -> InferenceSection:
+    assert (
+        len(set([chunk.document_id for chunk in chunks])) == 1
+    ), "One distinct document must be passed into merge_doc_chunks"
+
     # Assuming there are no duplicates by this point
     sorted_chunks = sorted(chunks, key=lambda x: x.chunk_id)
 
diff --git a/backend/onyx/context/search/pipeline.py b/backend/onyx/context/search/pipeline.py
index 40cc9c7925c..3c7043994e5 100644
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -339,6 +339,12 @@ class SearchPipeline:
         self._retrieved_sections = self._get_sections()
         return self._retrieved_sections
 
+    @property
+    def merged_retrieved_sections(self) -> list[InferenceSection]:
+        """Should be used to display in the UI in order to prevent displaying
+        multiple sections for the same document as separate "documents"."""
+        return _merge_sections(sections=self.retrieved_sections)
+
     @property
     def reranked_sections(self) -> list[InferenceSection]:
         """Reranking is always done at the chunk level since section merging could create arbitrarily
diff --git a/backend/onyx/tools/tool_implementations/search/search_tool.py b/backend/onyx/tools/tool_implementations/search/search_tool.py
index 9dc7d5ab44f..00a4907d685 100644
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -353,7 +353,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
         )
         yield from yield_search_responses(
             query=query,
-            get_retrieved_sections=lambda: search_pipeline.retrieved_sections,
+            # give back the merged sections to prevent duplicate docs from appearing in the UI
+            get_retrieved_sections=lambda: search_pipeline.merged_retrieved_sections,
             get_final_context_sections=lambda: search_pipeline.final_context_sections,
             search_query_info=search_query_info,
             get_section_relevance=lambda: search_pipeline.section_relevance,
diff --git a/backend/tests/integration/common_utils/managers/chat.py b/backend/tests/integration/common_utils/managers/chat.py
index 2fdef68abfc..e4a8802e940 100644
--- a/backend/tests/integration/common_utils/managers/chat.py
+++ b/backend/tests/integration/common_utils/managers/chat.py
@@ -5,6 +5,7 @@ import requests
 from requests.models import Response
 
 from onyx.context.search.models import RetrievalDetails
+from onyx.context.search.models import SavedSearchDoc
 from onyx.file_store.models import FileDescriptor
 from onyx.llm.override_models import LLMOverride
 from onyx.llm.override_models import PromptOverride
@@ -97,17 +98,24 @@ class ChatSessionManager:
         for data in response_data:
             if "rephrased_query" in data:
                 analyzed.rephrased_query = data["rephrased_query"]
-            elif "tool_name" in data:
+            if "tool_name" in data:
                 analyzed.tool_name = data["tool_name"]
                 analyzed.tool_result = (
                     data.get("tool_result")
                     if analyzed.tool_name == "run_search"
                     else None
                 )
-            elif "relevance_summaries" in data:
+            if "relevance_summaries" in data:
                 analyzed.relevance_summaries = data["relevance_summaries"]
-            elif "answer_piece" in data and data["answer_piece"]:
+            if "answer_piece" in data and data["answer_piece"]:
                 analyzed.full_message += data["answer_piece"]
+            if "top_documents" in data:
+                assert (
+                    analyzed.top_documents is None
+                ), "top_documents should only be set once"
+                analyzed.top_documents = [
+                    SavedSearchDoc(**doc) for doc in data["top_documents"]
+                ]
 
         return analyzed
 
diff --git a/backend/tests/integration/common_utils/test_models.py b/backend/tests/integration/common_utils/test_models.py
index 329363cb862..01a2b07980b 100644
--- a/backend/tests/integration/common_utils/test_models.py
+++ b/backend/tests/integration/common_utils/test_models.py
@@ -10,6 +10,7 @@ from pydantic import Field
 from onyx.auth.schemas import UserRole
 from onyx.configs.constants import QAFeedbackType
 from onyx.context.search.enums import RecencyBiasSetting
+from onyx.context.search.models import SavedSearchDoc
 from onyx.db.enums import AccessType
 from onyx.server.documents.models import DocumentSource
 from onyx.server.documents.models import IndexAttemptSnapshot
@@ -157,7 +158,7 @@ class StreamedResponse(BaseModel):
     full_message: str = ""
     rephrased_query: str | None = None
     tool_name: str | None = None
-    top_documents: list[dict[str, Any]] | None = None
+    top_documents: list[SavedSearchDoc] | None = None
     relevance_summaries: list[dict[str, Any]] | None = None
     tool_result: Any | None = None
     user: str | None = None
diff --git a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py
index cadb848a97d..d8c818a5519 100644
--- a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py
+++ b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py
@@ -16,10 +16,7 @@ from tests.integration.common_utils.test_models import DATestCCPair
 from tests.integration.common_utils.test_models import DATestUser
 
 
-def test_send_message_simple_with_history(reset: None) -> None:
-    # Creating an admin user (first user created is automatically an admin)
-    admin_user: DATestUser = UserManager.create(name="admin_user")
-
+def test_send_message_simple_with_history(reset: None, admin_user: DATestUser) -> None:
     # create connectors
     cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
         user_performing_action=admin_user,
@@ -53,13 +50,13 @@ def test_send_message_simple_with_history(reset: None) -> None:
     response_json = response.json()
 
     # Check that the top document is the correct document
-    assert response_json["simple_search_docs"][0]["id"] == cc_pair_1.documents[0].id
     assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id
 
     # assert that the metadata is correct
     for doc in cc_pair_1.documents:
         found_doc = next(
-            (x for x in response_json["simple_search_docs"] if x["id"] == doc.id), None
+            (x for x in response_json["top_documents"] if x["document_id"] == doc.id),
+            None,
         )
         assert found_doc
         assert found_doc["metadata"]["document_id"] == doc.id
diff --git a/backend/tests/integration/tests/streaming_endpoints/conftest.py b/backend/tests/integration/tests/streaming_endpoints/conftest.py
new file mode 100644
index 00000000000..17da0871fec
--- /dev/null
+++ b/backend/tests/integration/tests/streaming_endpoints/conftest.py
@@ -0,0 +1,42 @@
+from collections.abc import Callable
+
+import pytest
+
+from onyx.configs.constants import DocumentSource
+from tests.integration.common_utils.managers.api_key import APIKeyManager
+from tests.integration.common_utils.managers.cc_pair import CCPairManager
+from tests.integration.common_utils.managers.document import DocumentManager
+from tests.integration.common_utils.test_models import DATestAPIKey
+from tests.integration.common_utils.test_models import DATestUser
+from tests.integration.common_utils.test_models import SimpleTestDocument
+
+
+DocumentBuilderType = Callable[[list[str]], list[SimpleTestDocument]]
+
+
+@pytest.fixture
+def document_builder(admin_user: DATestUser) -> DocumentBuilderType:
+    api_key: DATestAPIKey = APIKeyManager.create(
+        user_performing_action=admin_user,
+    )
+
+    # create connector
+    cc_pair_1 = CCPairManager.create_from_scratch(
+        source=DocumentSource.INGESTION_API,
+        user_performing_action=admin_user,
+    )
+
+    def _document_builder(contents: list[str]) -> list[SimpleTestDocument]:
+        # seed documents
+        docs: list[SimpleTestDocument] = [
+            DocumentManager.seed_doc_with_content(
+                cc_pair=cc_pair_1,
+                content=content,
+                api_key=api_key,
+            )
+            for content in contents
+        ]
+
+        return docs
+
+    return _document_builder
diff --git a/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py b/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
index cc507b1374c..08a3dae0ac1 100644
--- a/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
+++ b/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
@@ -5,12 +5,11 @@ import pytest
 from tests.integration.common_utils.constants import API_SERVER_URL
 from tests.integration.common_utils.managers.chat import ChatSessionManager
 from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
-from tests.integration.common_utils.managers.user import UserManager
 from tests.integration.common_utils.test_models import DATestUser
+from tests.integration.tests.streaming_endpoints.conftest import DocumentBuilderType
 
 
-def test_send_message_simple_with_history(reset: None) -> None:
-    admin_user: DATestUser = UserManager.create(name="admin_user")
+def test_send_message_simple_with_history(reset: None, admin_user: DATestUser) -> None:
     LLMProviderManager.create(user_performing_action=admin_user)
 
     test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
@@ -24,6 +23,44 @@ def test_send_message_simple_with_history(reset: None) -> None:
     assert len(response.full_message) > 0
 
 
+def test_send_message__basic_searches(
+    reset: None, admin_user: DATestUser, document_builder: DocumentBuilderType
+) -> None:
+    MESSAGE = "run a search for 'test'"
+    SHORT_DOC_CONTENT = "test"
+    LONG_DOC_CONTENT = "blah blah blah blah" * 100
+
+    LLMProviderManager.create(user_performing_action=admin_user)
+
+    short_doc = document_builder([SHORT_DOC_CONTENT])[0]
+
+    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
+    response = ChatSessionManager.send_message(
+        chat_session_id=test_chat_session.id,
+        message=MESSAGE,
+        user_performing_action=admin_user,
+    )
+    assert response.top_documents is not None
+    assert len(response.top_documents) == 1
+    assert response.top_documents[0].document_id == short_doc.id
+
+    # make sure this doc is really long so that it will be split into multiple chunks
+    long_doc = document_builder([LONG_DOC_CONTENT])[0]
+
+    # new chat session for simplicity
+    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
+    response = ChatSessionManager.send_message(
+        chat_session_id=test_chat_session.id,
+        message=MESSAGE,
+        user_performing_action=admin_user,
+    )
+    assert response.top_documents is not None
+    assert len(response.top_documents) == 2
+    # short doc should be more relevant and thus first
+    assert response.top_documents[0].document_id == short_doc.id
+    assert response.top_documents[1].document_id == long_doc.id
+
+
 @pytest.mark.skip(
     reason="enable for autorun when we have a testing environment with semantically useful data"
 )
diff --git a/web/src/components/search/DocumentDisplay.tsx b/web/src/components/search/DocumentDisplay.tsx
index f0a861c4ae9..68f839250df 100644
--- a/web/src/components/search/DocumentDisplay.tsx
+++ b/web/src/components/search/DocumentDisplay.tsx
@@ -26,7 +26,13 @@ export const buildDocumentSummaryDisplay = (
   matchHighlights: string[],
   blurb: string
 ) => {
-  if (!matchHighlights || matchHighlights.length === 0) {
+  // if there are no match highlights, or if it's really short, just use the blurb
+  // this is to prevent the UI from showing something like `...` for the summary
+  const MIN_MATCH_HIGHLIGHT_LENGTH = 5;
+  if (
+    !matchHighlights ||
+    matchHighlights.length <= MIN_MATCH_HIGHLIGHT_LENGTH
+  ) {
     return blurb;
   }