From d8b050026d34e11efaf4c4ff640a5339027a499f Mon Sep 17 00:00:00 2001
From: joachim-danswer <joachim@danswer.ai>
Date: Tue, 22 Apr 2025 20:18:57 -0700
Subject: [PATCH] removal of keyword 1st phase

---
 .../context/search/retrieval/search_runner.py | 35 +++----------------
 1 file changed, 4 insertions(+), 31 deletions(-)

diff --git a/backend/onyx/context/search/retrieval/search_runner.py b/backend/onyx/context/search/retrieval/search_runner.py
index 12a7c442d8..f77a08334b 100644
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -164,9 +164,7 @@ def doc_index_retrieval(
     top_base_chunks_standard_ranking_thread: (
         TimeoutThread[list[InferenceChunkUncleaned]] | None
     ) = None
-    top_base_chunks_keyword_ranking_thread: (
-        TimeoutThread[list[InferenceChunkUncleaned]] | None
-    ) = None
+
     top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = (
         None
     )
@@ -190,20 +188,6 @@ def doc_index_retrieval(
         query.offset,
     )
 
-    # same query but with 1st vespa phase as keyword retrieval
-    top_base_chunks_keyword_ranking_thread = run_in_background(
-        document_index.hybrid_retrieval,
-        query.query,
-        query_embedding,
-        query.processed_keywords,
-        query.filters,
-        query.hybrid_alpha,
-        query.recency_bias_multiplier,
-        query.num_hits,
-        QueryExpansionType.KEYWORD,
-        query.offset,
-    )
-
     if (
         query.expanded_queries
         and query.expanded_queries.keywords_expansions
@@ -264,9 +248,6 @@ def doc_index_retrieval(
         top_base_chunks_standard_ranking = wait_on_background(
             top_base_chunks_standard_ranking_thread
         )
-        top_base_chunks_keyword_ranking = wait_on_background(
-            top_base_chunks_keyword_ranking_thread
-        )
 
         top_keyword_chunks = wait_on_background(top_keyword_chunks_thread)
 
@@ -274,11 +255,7 @@ def doc_index_retrieval(
             assert top_semantic_chunks_thread is not None
             top_semantic_chunks = wait_on_background(top_semantic_chunks_thread)
 
-        all_top_chunks = (
-            top_base_chunks_standard_ranking
-            + top_base_chunks_keyword_ranking
-            + top_keyword_chunks
-        )
+        all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks
 
         # use all three retrieval methods to retrieve top chunks
 
@@ -293,12 +270,8 @@ def doc_index_retrieval(
         top_base_chunks_standard_ranking = wait_on_background(
             top_base_chunks_standard_ranking_thread
         )
-        top_base_chunks_keyword_ranking = wait_on_background(
-            top_base_chunks_keyword_ranking_thread
-        )
-        top_chunks = _dedupe_chunks(
-            top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking
-        )
+
+        top_chunks = _dedupe_chunks(top_base_chunks_standard_ranking)
 
     logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}")