From d8b050026d34e11efaf4c4ff640a5339027a499f Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Tue, 22 Apr 2025 20:18:57 -0700 Subject: [PATCH] removal of keyword 1st phase --- .../context/search/retrieval/search_runner.py | 35 +++---------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/backend/onyx/context/search/retrieval/search_runner.py b/backend/onyx/context/search/retrieval/search_runner.py index 12a7c442d8..f77a08334b 100644 --- a/backend/onyx/context/search/retrieval/search_runner.py +++ b/backend/onyx/context/search/retrieval/search_runner.py @@ -164,9 +164,7 @@ def doc_index_retrieval( top_base_chunks_standard_ranking_thread: ( TimeoutThread[list[InferenceChunkUncleaned]] | None ) = None - top_base_chunks_keyword_ranking_thread: ( - TimeoutThread[list[InferenceChunkUncleaned]] | None - ) = None + top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = ( None ) @@ -190,20 +188,6 @@ def doc_index_retrieval( query.offset, ) - # same query but with 1st vespa phase as keyword retrieval - top_base_chunks_keyword_ranking_thread = run_in_background( - document_index.hybrid_retrieval, - query.query, - query_embedding, - query.processed_keywords, - query.filters, - query.hybrid_alpha, - query.recency_bias_multiplier, - query.num_hits, - QueryExpansionType.KEYWORD, - query.offset, - ) - if ( query.expanded_queries and query.expanded_queries.keywords_expansions @@ -264,9 +248,6 @@ def doc_index_retrieval( top_base_chunks_standard_ranking = wait_on_background( top_base_chunks_standard_ranking_thread ) - top_base_chunks_keyword_ranking = wait_on_background( - top_base_chunks_keyword_ranking_thread - ) top_keyword_chunks = wait_on_background(top_keyword_chunks_thread) @@ -274,11 +255,7 @@ def doc_index_retrieval( assert top_semantic_chunks_thread is not None top_semantic_chunks = wait_on_background(top_semantic_chunks_thread) - all_top_chunks = ( - top_base_chunks_standard_ranking - + top_base_chunks_keyword_ranking - + top_keyword_chunks - ) + all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks # use all three retrieval methods to retrieve top chunks @@ -293,12 +270,8 @@ def doc_index_retrieval( top_base_chunks_standard_ranking = wait_on_background( top_base_chunks_standard_ranking_thread ) - top_base_chunks_keyword_ranking = wait_on_background( - top_base_chunks_keyword_ranking_thread - ) - top_chunks = _dedupe_chunks( - top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking - ) + + top_chunks = _dedupe_chunks(top_base_chunks_standard_ranking) logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}")