Remove Redundant Dedupe Logic (#1577)

This commit is contained in:
Yuhong Sun 2024-06-06 14:36:41 -07:00 committed by GitHub
parent da43bac456
commit 09da456bba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 10 deletions

View File

@ -208,15 +208,14 @@ def stream_answer_objects(
search_response_summary.top_sections
)
# Deduping happens at the last step to avoid harming quality by dropping content early on
deduped_docs = top_docs
if query_req.retrieval_options.dedupe_docs:
deduped_docs, dropped_inds = dedupe_documents(top_docs)
reference_db_search_docs = [
create_db_search_doc(
server_search_doc=top_doc, db_session=db_session
)
for top_doc in deduped_docs
create_db_search_doc(server_search_doc=doc, db_session=db_session)
for doc in deduped_docs
]
response_docs = [

View File

@ -78,9 +78,6 @@ class SearchRequest(ChunkContext):
skip_rerank: bool | None = None
skip_llm_chunk_filter: bool | None = None
# If this is set, only the highest matching chunk (or merged chunks) is returned
dedupe_docs: bool = False
class Config:
arbitrary_types_allowed = True
@ -118,6 +115,8 @@ class RetrievalDetails(ChunkContext):
# if None, no offset / limit
offset: int | None = None
limit: int | None = None
# If this is set, only the highest matching chunk (or merged chunks) is returned
dedupe_docs: bool = False

View File

@ -202,9 +202,6 @@ class SearchTool(Tool):
chunks_above=self.chunks_above,
chunks_below=self.chunks_below,
full_doc=self.full_doc,
dedupe_docs=self.retrieval_options.dedupe_docs
if self.retrieval_options
else False,
),
user=self.user,
db_session=self.db_session,