mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-11 21:39:31 +02:00
Remove Redundant Dedupe Logic (#1577)
This commit is contained in:
parent
da43bac456
commit
09da456bba
@ -208,15 +208,14 @@ def stream_answer_objects(
|
||||
search_response_summary.top_sections
|
||||
)
|
||||
|
||||
# Deduping happens at the last step to avoid harming quality by dropping content early on
|
||||
deduped_docs = top_docs
|
||||
if query_req.retrieval_options.dedupe_docs:
|
||||
deduped_docs, dropped_inds = dedupe_documents(top_docs)
|
||||
|
||||
reference_db_search_docs = [
|
||||
create_db_search_doc(
|
||||
server_search_doc=top_doc, db_session=db_session
|
||||
)
|
||||
for top_doc in deduped_docs
|
||||
create_db_search_doc(server_search_doc=doc, db_session=db_session)
|
||||
for doc in deduped_docs
|
||||
]
|
||||
|
||||
response_docs = [
|
||||
|
@ -78,9 +78,6 @@ class SearchRequest(ChunkContext):
|
||||
skip_rerank: bool | None = None
|
||||
skip_llm_chunk_filter: bool | None = None
|
||||
|
||||
# If this is set, only the highest matching chunk (or merged chunks) is returned
|
||||
dedupe_docs: bool = False
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@ -118,6 +115,8 @@ class RetrievalDetails(ChunkContext):
|
||||
# if None, no offset / limit
|
||||
offset: int | None = None
|
||||
limit: int | None = None
|
||||
|
||||
# If this is set, only the highest matching chunk (or merged chunks) is returned
|
||||
dedupe_docs: bool = False
|
||||
|
||||
|
||||
|
@ -202,9 +202,6 @@ class SearchTool(Tool):
|
||||
chunks_above=self.chunks_above,
|
||||
chunks_below=self.chunks_below,
|
||||
full_doc=self.full_doc,
|
||||
dedupe_docs=self.retrieval_options.dedupe_docs
|
||||
if self.retrieval_options
|
||||
else False,
|
||||
),
|
||||
user=self.user,
|
||||
db_session=self.db_session,
|
||||
|
Loading…
x
Reference in New Issue
Block a user