diff --git a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py index 5b46ca29ad..79be206359 100644 --- a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py +++ b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py @@ -293,6 +293,10 @@ def choose_tool( semantic_expansions=[semantic_expansion], ) + logger.info(f"Original query: {agent_config.inputs.search_request.query}") + logger.info(f"Expanded keyword queries: {keyword_expansion}") + logger.info(f"Expanded semantic queries: {semantic_expansion}") + return ToolChoiceUpdate( tool_choice=ToolChoice( tool=selected_tool, diff --git a/backend/onyx/chat/prune_and_merge.py b/backend/onyx/chat/prune_and_merge.py index 56e7378a94..d333c98e9e 100644 --- a/backend/onyx/chat/prune_and_merge.py +++ b/backend/onyx/chat/prune_and_merge.py @@ -213,8 +213,12 @@ def _apply_pruning( try: logger.debug(f"Number of documents after pruning: {ind + 1}") logger.debug("Number of tokens per document (pruned):") + + log_tokens_per_document: dict[int, int] = {} for x, y in section_idx_token_count.items(): - logger.debug(f"{x + 1}: {y}") + log_tokens_per_document[x + 1] = y + logger.debug(f"Tokens per document: {log_tokens_per_document}") + except Exception as e: logger.error(f"Error logging prune statistics: {e}") @@ -421,8 +425,14 @@ def _merge_sections(sections: list[InferenceSection]) -> list[InferenceSection]: ) logger.debug("Number of chunks per document (new ranking):") + + log_chunks_per_document: dict[int, int] = {} + for x, y in enumerate(new_sections): - logger.debug(f"{x + 1}: {len(y.chunks)}") + log_chunks_per_document[x + 1] = len(y.chunks) + + logger.debug(f"Chunks per document: {log_chunks_per_document}") + except Exception as e: logger.error(f"Error logging merge statistics: {e}") diff --git a/backend/onyx/context/search/retrieval/search_runner.py b/backend/onyx/context/search/retrieval/search_runner.py index acfd415c82..12a7c442d8 100644 --- a/backend/onyx/context/search/retrieval/search_runner.py +++ b/backend/onyx/context/search/retrieval/search_runner.py @@ -161,8 +161,12 @@ def doc_index_retrieval( keyword_embeddings_thread: TimeoutThread[list[Embedding]] | None = None semantic_embeddings_thread: TimeoutThread[list[Embedding]] | None = None - top_base_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = None - + top_base_chunks_standard_ranking_thread: ( + TimeoutThread[list[InferenceChunkUncleaned]] | None + ) = None + top_base_chunks_keyword_ranking_thread: ( + TimeoutThread[list[InferenceChunkUncleaned]] | None + ) = None top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = ( None ) @@ -173,7 +177,7 @@ def doc_index_retrieval( top_semantic_chunks: list[InferenceChunkUncleaned] | None = None # original retrieveal method - top_base_chunks_thread = run_in_background( + top_base_chunks_standard_ranking_thread = run_in_background( document_index.hybrid_retrieval, query.query, query_embedding, @@ -182,7 +186,21 @@ def doc_index_retrieval( query.hybrid_alpha, query.recency_bias_multiplier, query.num_hits, - "semantic", + QueryExpansionType.SEMANTIC, + query.offset, + ) + + # same query but with 1st vespa phase as keyword retrieval + top_base_chunks_keyword_ranking_thread = run_in_background( + document_index.hybrid_retrieval, + query.query, + query_embedding, + query.processed_keywords, + query.filters, + query.hybrid_alpha, + query.recency_bias_multiplier, + query.num_hits, + QueryExpansionType.KEYWORD, query.offset, ) @@ -243,7 +261,12 @@ def doc_index_retrieval( query.offset, ) - top_base_chunks = wait_on_background(top_base_chunks_thread) + top_base_chunks_standard_ranking = wait_on_background( + top_base_chunks_standard_ranking_thread + ) + top_base_chunks_keyword_ranking = wait_on_background( + top_base_chunks_keyword_ranking_thread + ) top_keyword_chunks = wait_on_background(top_keyword_chunks_thread) @@ -251,7 +274,11 @@ def doc_index_retrieval( assert top_semantic_chunks_thread is not None top_semantic_chunks = wait_on_background(top_semantic_chunks_thread) - all_top_chunks = top_base_chunks + top_keyword_chunks + all_top_chunks = ( + top_base_chunks_standard_ranking + + top_base_chunks_keyword_ranking + + top_keyword_chunks + ) # use all three retrieval methods to retrieve top chunks @@ -263,8 +290,17 @@ def doc_index_retrieval( else: - top_base_chunks = wait_on_background(top_base_chunks_thread) - top_chunks = _dedupe_chunks(top_base_chunks) + top_base_chunks_standard_ranking = wait_on_background( + top_base_chunks_standard_ranking_thread + ) + top_base_chunks_keyword_ranking = wait_on_background( + top_base_chunks_keyword_ranking_thread + ) + top_chunks = _dedupe_chunks( + top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking + ) + + logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}") retrieval_requests: list[VespaChunkRequest] = [] normal_chunks: list[InferenceChunkUncleaned] = [] diff --git a/backend/onyx/document_index/vespa/chunk_retrieval.py b/backend/onyx/document_index/vespa/chunk_retrieval.py index 1ef37155e9..ad3cf73bb2 100644 --- a/backend/onyx/document_index/vespa/chunk_retrieval.py +++ b/backend/onyx/document_index/vespa/chunk_retrieval.py @@ -358,7 +358,7 @@ def query_vespa( num_retrieved_document_ids = len( set([chunk.document_id for chunk in inference_chunks]) ) - logger.debug( + logger.info( f"Retrieved {num_retrieved_inference_chunks} inference chunks for {num_retrieved_document_ids} documents" ) except Exception as e: diff --git a/backend/onyx/document_index/vespa/index.py b/backend/onyx/document_index/vespa/index.py index 9805e2aff4..46838c4372 100644 --- a/backend/onyx/document_index/vespa/index.py +++ b/backend/onyx/document_index/vespa/index.py @@ -822,6 +822,8 @@ class VespaIndex(DocumentIndex): else: ranking_profile = f"hybrid_search_semantic_base_{len(query_embedding)}" + logger.info(f"Selected ranking profile: {ranking_profile}") + logger.debug(f"Query YQL: {yql}") params: dict[str, str | int | float] = { diff --git a/backend/onyx/prompts/chat_prompts.py b/backend/onyx/prompts/chat_prompts.py index 8656dfa106..5b0eb7ac5a 100644 --- a/backend/onyx/prompts/chat_prompts.py +++ b/backend/onyx/prompts/chat_prompts.py @@ -289,15 +289,21 @@ Rephrased query for search engine: QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT = """ -Please rephrase the following user question as a keyword query that would be appropriate for a \ -search engine. +Please rephrase the following user question as a pure keyword query that would be appropriate for a \ +search engine. IMPORTANT: the rephrased query MUST ONLY use EXISTING KEYWORDS from the original query \ +(exception: critical verbs that are converted to nouns)! +Also, keywords are usually nouns or adjectives, so you will likely need to drop \ +any verbs. IF AND ONLY IF you really think that a verb would be critical to FINDING the document, \ +convert the verb to a noun. \ +This will be rare though. Verbs like 'find, summarize, describe, etc. would NOT fall into this category, \ +for example, and should be omitted from the rephrased keyword query. Here is the user question: {question} -Respond with EXACTLY and ONLY one rephrased query. +Respond with EXACTLY and ONLY one rephrased keyword query. -Rephrased query for search engine: +Rephrased keyword query for search engine: """.strip()