diff --git a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py index ba23808d8d..5b46ca29ad 100644 --- a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py +++ b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py @@ -1,6 +1,8 @@ from typing import cast from uuid import uuid4 +from langchain_core.messages import AIMessage +from langchain_core.messages import HumanMessage from langchain_core.messages import ToolCall from langchain_core.runnables.config import RunnableConfig from langgraph.types import StreamWriter @@ -10,13 +12,21 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.orchestration.states import ToolChoice from onyx.agents.agent_search.orchestration.states import ToolChoiceState from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate +from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.chat.tool_handling.tool_response_handler import get_tool_by_name from onyx.chat.tool_handling.tool_response_handler import ( get_tool_call_for_non_tool_calling_llm_impl, ) +from onyx.configs.chat_configs import USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH from onyx.context.search.preprocessing.preprocessing import query_analysis from onyx.context.search.retrieval.search_runner import get_query_embedding +from onyx.llm.factory import get_default_llms +from onyx.prompts.chat_prompts import QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT +from onyx.prompts.chat_prompts import QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT +from onyx.prompts.chat_prompts import QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT +from onyx.prompts.chat_prompts import QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT +from onyx.tools.models import QueryExpansions from onyx.tools.models import SearchToolOverrideKwargs from onyx.tools.tool import Tool from onyx.tools.tool_implementations.search.search_tool import SearchTool @@ -30,6 +40,49 @@ from shared_configs.model_server_models import Embedding logger = setup_logger() +def _create_history_str(prompt_builder: AnswerPromptBuilder) -> str: + # TODO: Add trimming logic + history_segments = [] + for msg in prompt_builder.message_history: + if isinstance(msg, HumanMessage): + role = "User" + elif isinstance(msg, AIMessage): + role = "Assistant" + else: + continue + history_segments.append(f"{role}:\n {msg.content}\n\n") + return "\n".join(history_segments) + + +def _expand_query( + query: str, + expansion_type: QueryExpansionType, + prompt_builder: AnswerPromptBuilder, +) -> str: + + history_str = _create_history_str(prompt_builder) + + if history_str: + if expansion_type == QueryExpansionType.KEYWORD: + base_prompt = QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT + else: + base_prompt = QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT + expansion_prompt = base_prompt.format(question=query, history=history_str) + else: + if expansion_type == QueryExpansionType.KEYWORD: + base_prompt = QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT + else: + base_prompt = QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT + expansion_prompt = base_prompt.format(question=query) + + msg = HumanMessage(content=expansion_prompt) + primary_llm, _ = get_default_llms() + response = primary_llm.invoke([msg]) + rephrased_query: str = cast(str, response.content) + + return rephrased_query + + # TODO: break this out into an implementation function # and a function that handles extracting the necessary fields # from the state and config @@ -52,7 +105,16 @@ def choose_tool( embedding_thread: TimeoutThread[Embedding] | None = None keyword_thread: TimeoutThread[tuple[bool, list[str]]] | None = None + expanded_keyword_thread: TimeoutThread[str] | None = None + expanded_semantic_thread: TimeoutThread[str] | None = None override_kwargs: SearchToolOverrideKwargs | None = None + + using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm + prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder + + llm = agent_config.tooling.primary_llm + skip_gen_ai_answer_generation = agent_config.behavior.skip_gen_ai_answer_generation + if ( not agent_config.behavior.use_agentic_search and agent_config.tooling.search_tool is not None @@ -72,11 +134,20 @@ def choose_tool( agent_config.inputs.search_request.query, ) - using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm - prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder + if USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH: - llm = agent_config.tooling.primary_llm - skip_gen_ai_answer_generation = agent_config.behavior.skip_gen_ai_answer_generation + expanded_keyword_thread = run_in_background( + _expand_query, + agent_config.inputs.search_request.query, + QueryExpansionType.KEYWORD, + prompt_builder, + ) + expanded_semantic_thread = run_in_background( + _expand_query, + agent_config.inputs.search_request.query, + QueryExpansionType.SEMANTIC, + prompt_builder, + ) structured_response_format = agent_config.inputs.structured_response_format tools = [ @@ -209,6 +280,19 @@ def choose_tool( override_kwargs.precomputed_is_keyword = is_keyword override_kwargs.precomputed_keywords = keywords + if ( + selected_tool.name == SearchTool._NAME + and expanded_keyword_thread + and expanded_semantic_thread + ): + keyword_expansion = wait_on_background(expanded_keyword_thread) + semantic_expansion = wait_on_background(expanded_semantic_thread) + assert override_kwargs is not None, "must have override kwargs" + override_kwargs.expanded_queries = QueryExpansions( + keywords_expansions=[keyword_expansion], + semantic_expansions=[semantic_expansion], + ) + return ToolChoiceUpdate( tool_choice=ToolChoice( tool=selected_tool, diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/models.py b/backend/onyx/agents/agent_search/shared_graph_utils/models.py index f22bf162d9..812f97bd6a 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import Any from pydantic import BaseModel @@ -153,3 +154,8 @@ class AnswerGenerationDocuments(BaseModel): BaseMessage_Content = str | list[str | dict[str, Any]] + + +class QueryExpansionType(Enum): + KEYWORD = "keyword" + SEMANTIC = "semantic" diff --git a/backend/onyx/configs/chat_configs.py b/backend/onyx/configs/chat_configs.py index 45d80b6e22..56612b39e7 100644 --- a/backend/onyx/configs/chat_configs.py +++ b/backend/onyx/configs/chat_configs.py @@ -96,3 +96,9 @@ BING_API_KEY = os.environ.get("BING_API_KEY") or None ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False) VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2) + +# Whether or not to use the semantic & keyword search expansions for Basic Search +USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = ( + os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower() + == "true" +) diff --git a/backend/onyx/context/search/models.py b/backend/onyx/context/search/models.py index ef82c6bb54..47cfca563f 100644 --- a/backend/onyx/context/search/models.py +++ b/backend/onyx/context/search/models.py @@ -18,11 +18,17 @@ from onyx.indexing.models import IndexingSetting from shared_configs.enums import RerankerProvider from shared_configs.model_server_models import Embedding + MAX_METRICS_CONTENT = ( 200 # Just need enough characters to identify where in the doc the chunk is ) +class QueryExpansions(BaseModel): + keywords_expansions: list[str] | None = None + semantic_expansions: list[str] | None = None + + class RerankingDetails(BaseModel): # If model is None (or num_rerank is 0), then reranking is turned off rerank_model_name: str | None @@ -139,6 +145,8 @@ class ChunkContext(BaseModel): class SearchRequest(ChunkContext): query: str + expanded_queries: QueryExpansions | None = None + search_type: SearchType = SearchType.SEMANTIC human_selected_filters: BaseFilters | None = None @@ -187,6 +195,8 @@ class SearchQuery(ChunkContext): precomputed_query_embedding: Embedding | None = None + expanded_queries: QueryExpansions | None = None + class RetrievalDetails(ChunkContext): # Use LLM to determine whether to do a retrieval or only rely on existing history diff --git a/backend/onyx/context/search/preprocessing/preprocessing.py b/backend/onyx/context/search/preprocessing/preprocessing.py index a8b22a983d..16402a360e 100644 --- a/backend/onyx/context/search/preprocessing/preprocessing.py +++ b/backend/onyx/context/search/preprocessing/preprocessing.py @@ -20,7 +20,7 @@ from onyx.context.search.models import SearchRequest from onyx.context.search.preprocessing.access_filters import ( build_access_filters_for_user, ) -from onyx.context.search.retrieval.search_runner import ( +from onyx.context.search.utils import ( remove_stop_words_and_punctuation, ) from onyx.db.models import User @@ -36,7 +36,6 @@ from onyx.utils.timing import log_function_time from shared_configs.configs import MULTI_TENANT from shared_configs.contextvars import get_current_tenant_id - logger = setup_logger() @@ -264,4 +263,5 @@ def retrieval_preprocessing( chunks_below=chunks_below, full_doc=search_request.full_doc, precomputed_query_embedding=search_request.precomputed_query_embedding, + expanded_queries=search_request.expanded_queries, ) diff --git a/backend/onyx/context/search/retrieval/search_runner.py b/backend/onyx/context/search/retrieval/search_runner.py index f69b2c3b23..acfd415c82 100644 --- a/backend/onyx/context/search/retrieval/search_runner.py +++ b/backend/onyx/context/search/retrieval/search_runner.py @@ -2,10 +2,10 @@ import string from collections.abc import Callable import nltk # type:ignore -from nltk.corpus import stopwords # type:ignore -from nltk.tokenize import word_tokenize # type:ignore from sqlalchemy.orm import Session +from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType +from onyx.context.search.enums import SearchType from onyx.context.search.models import ChunkMetric from onyx.context.search.models import IndexFilters from onyx.context.search.models import InferenceChunk @@ -15,6 +15,8 @@ from onyx.context.search.models import MAX_METRICS_CONTENT from onyx.context.search.models import RetrievalMetricsContainer from onyx.context.search.models import SearchQuery from onyx.context.search.postprocessing.postprocessing import cleanup_chunks +from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA +from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA_KEYWORD from onyx.context.search.utils import inference_section_from_chunks from onyx.db.search_settings import get_current_search_settings from onyx.db.search_settings import get_multilingual_expansion @@ -27,6 +29,9 @@ from onyx.natural_language_processing.search_nlp_models import EmbeddingModel from onyx.secondary_llm_flows.query_expansion import multilingual_query_expansion from onyx.utils.logger import setup_logger from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel +from onyx.utils.threadpool_concurrency import run_in_background +from onyx.utils.threadpool_concurrency import TimeoutThread +from onyx.utils.threadpool_concurrency import wait_on_background from onyx.utils.timing import log_function_time from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT @@ -36,6 +41,23 @@ from shared_configs.model_server_models import Embedding logger = setup_logger() +def _dedupe_chunks( + chunks: list[InferenceChunkUncleaned], +) -> list[InferenceChunkUncleaned]: + used_chunks: dict[tuple[str, int], InferenceChunkUncleaned] = {} + for chunk in chunks: + key = (chunk.document_id, chunk.chunk_id) + if key not in used_chunks: + used_chunks[key] = chunk + else: + stored_chunk_score = used_chunks[key].score or 0 + this_chunk_score = chunk.score or 0 + if stored_chunk_score < this_chunk_score: + used_chunks[key] = chunk + + return list(used_chunks.values()) + + def download_nltk_data() -> None: resources = { "stopwords": "corpora/stopwords", @@ -69,22 +91,6 @@ def lemmatize_text(keywords: list[str]) -> list[str]: # return keywords -def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]: - try: - # Re-tokenize using the NLTK tokenizer for better matching - query = " ".join(keywords) - stop_words = set(stopwords.words("english")) - word_tokens = word_tokenize(query) - text_trimmed = [ - word - for word in word_tokens - if (word.casefold() not in stop_words and word not in string.punctuation) - ] - return text_trimmed or word_tokens - except Exception: - return keywords - - def combine_retrieval_results( chunk_sets: list[list[InferenceChunk]], ) -> list[InferenceChunk]: @@ -123,6 +129,20 @@ def get_query_embedding(query: str, db_session: Session) -> Embedding: return query_embedding +def get_query_embeddings(queries: list[str], db_session: Session) -> list[Embedding]: + search_settings = get_current_search_settings(db_session) + + model = EmbeddingModel.from_db_model( + search_settings=search_settings, + # The below are globally set, this flow always uses the indexing one + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) + + query_embedding = model.encode(queries, text_type=EmbedTextType.QUERY) + return query_embedding + + @log_function_time(print_only=True) def doc_index_retrieval( query: SearchQuery, @@ -139,17 +159,113 @@ def doc_index_retrieval( query.query, db_session ) - top_chunks = document_index.hybrid_retrieval( - query=query.query, - query_embedding=query_embedding, - final_keywords=query.processed_keywords, - filters=query.filters, - hybrid_alpha=query.hybrid_alpha, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - offset=query.offset, + keyword_embeddings_thread: TimeoutThread[list[Embedding]] | None = None + semantic_embeddings_thread: TimeoutThread[list[Embedding]] | None = None + top_base_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = None + + top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = ( + None ) + keyword_embeddings: list[Embedding] | None = None + semantic_embeddings: list[Embedding] | None = None + + top_semantic_chunks: list[InferenceChunkUncleaned] | None = None + + # original retrieveal method + top_base_chunks_thread = run_in_background( + document_index.hybrid_retrieval, + query.query, + query_embedding, + query.processed_keywords, + query.filters, + query.hybrid_alpha, + query.recency_bias_multiplier, + query.num_hits, + "semantic", + query.offset, + ) + + if ( + query.expanded_queries + and query.expanded_queries.keywords_expansions + and query.expanded_queries.semantic_expansions + ): + + keyword_embeddings_thread = run_in_background( + get_query_embeddings, + query.expanded_queries.keywords_expansions, + db_session, + ) + + if query.search_type == SearchType.SEMANTIC: + semantic_embeddings_thread = run_in_background( + get_query_embeddings, + query.expanded_queries.semantic_expansions, + db_session, + ) + + keyword_embeddings = wait_on_background(keyword_embeddings_thread) + if query.search_type == SearchType.SEMANTIC: + assert semantic_embeddings_thread is not None + semantic_embeddings = wait_on_background(semantic_embeddings_thread) + + # Use original query embedding for keyword retrieval embedding + keyword_embeddings = [query_embedding] + + # Note: we generally prepped earlier for multiple expansions, but for now we only use one. + top_keyword_chunks_thread = run_in_background( + document_index.hybrid_retrieval, + query.expanded_queries.keywords_expansions[0], + keyword_embeddings[0], + query.processed_keywords, + query.filters, + HYBRID_ALPHA_KEYWORD, + query.recency_bias_multiplier, + query.num_hits, + QueryExpansionType.KEYWORD, + query.offset, + ) + + if query.search_type == SearchType.SEMANTIC: + assert semantic_embeddings is not None + + top_semantic_chunks_thread = run_in_background( + document_index.hybrid_retrieval, + query.expanded_queries.semantic_expansions[0], + semantic_embeddings[0], + query.processed_keywords, + query.filters, + HYBRID_ALPHA, + query.recency_bias_multiplier, + query.num_hits, + QueryExpansionType.SEMANTIC, + query.offset, + ) + + top_base_chunks = wait_on_background(top_base_chunks_thread) + + top_keyword_chunks = wait_on_background(top_keyword_chunks_thread) + + if query.search_type == SearchType.SEMANTIC: + assert top_semantic_chunks_thread is not None + top_semantic_chunks = wait_on_background(top_semantic_chunks_thread) + + all_top_chunks = top_base_chunks + top_keyword_chunks + + # use all three retrieval methods to retrieve top chunks + + if query.search_type == SearchType.SEMANTIC and top_semantic_chunks is not None: + + all_top_chunks += top_semantic_chunks + + top_chunks = _dedupe_chunks(all_top_chunks) + + else: + + top_base_chunks = wait_on_background(top_base_chunks_thread) + top_chunks = _dedupe_chunks(top_base_chunks) + retrieval_requests: list[VespaChunkRequest] = [] normal_chunks: list[InferenceChunkUncleaned] = [] referenced_chunk_scores: dict[tuple[str, int], float] = {} diff --git a/backend/onyx/context/search/utils.py b/backend/onyx/context/search/utils.py index 91724ffb32..22e6b0f8df 100644 --- a/backend/onyx/context/search/utils.py +++ b/backend/onyx/context/search/utils.py @@ -1,6 +1,10 @@ +import string from collections.abc import Sequence from typing import TypeVar +from nltk.corpus import stopwords # type:ignore +from nltk.tokenize import word_tokenize # type:ignore + from onyx.chat.models import SectionRelevancePiece from onyx.context.search.models import InferenceChunk from onyx.context.search.models import InferenceSection @@ -136,3 +140,19 @@ def chunks_or_sections_to_search_docs( ] return search_docs + + +def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]: + try: + # Re-tokenize using the NLTK tokenizer for better matching + query = " ".join(keywords) + stop_words = set(stopwords.words("english")) + word_tokens = word_tokenize(query) + text_trimmed = [ + word + for word in word_tokens + if (word.casefold() not in stop_words and word not in string.punctuation) + ] + return text_trimmed or word_tokens + except Exception: + return keywords diff --git a/backend/onyx/document_index/interfaces.py b/backend/onyx/document_index/interfaces.py index 66912a971c..668898c149 100644 --- a/backend/onyx/document_index/interfaces.py +++ b/backend/onyx/document_index/interfaces.py @@ -4,6 +4,8 @@ from datetime import datetime from typing import Any from onyx.access.models import DocumentAccess +from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType +from onyx.configs.chat_configs import TITLE_CONTENT_RATIO from onyx.context.search.models import IndexFilters from onyx.context.search.models import InferenceChunkUncleaned from onyx.db.enums import EmbeddingPrecision @@ -351,7 +353,9 @@ class HybridCapable(abc.ABC): hybrid_alpha: float, time_decay_multiplier: float, num_to_retrieve: int, + ranking_profile_type: QueryExpansionType, offset: int = 0, + title_content_ratio: float | None = TITLE_CONTENT_RATIO, ) -> list[InferenceChunkUncleaned]: """ Run hybrid search and return a list of inference chunks. diff --git a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd index d3fcf73a76..c068234935 100644 --- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd @@ -176,7 +176,7 @@ schema DANSWER_CHUNK_NAME { match-features: recency_bias } - rank-profile hybrid_searchVARIABLE_DIM inherits default, default_rank { + rank-profile hybrid_search_semantic_base_VARIABLE_DIM inherits default, default_rank { inputs { query(query_embedding) tensor(x[VARIABLE_DIM]) } @@ -192,7 +192,75 @@ schema DANSWER_CHUNK_NAME { # First phase must be vector to allow hits that have no keyword matches first-phase { - expression: closeness(field, embeddings) + expression: query(title_content_ratio) * closeness(field, title_embedding) + (1 - query(title_content_ratio)) * closeness(field, embeddings) + } + + # Weighted average between Vector Search and BM-25 + global-phase { + expression { + ( + # Weighted Vector Similarity Score + ( + query(alpha) * ( + (query(title_content_ratio) * normalize_linear(title_vector_score)) + + + ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings))) + ) + ) + + + + + # Weighted Keyword Similarity Score + # Note: for the BM25 Title score, it requires decent stopword removal in the query + # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1 + ( + (1 - query(alpha)) * ( + (query(title_content_ratio) * normalize_linear(bm25(title))) + + + ((1 - query(title_content_ratio)) * normalize_linear(bm25(content))) + ) + ) + ) + # Boost based on user feedback + * document_boost + # Decay factor based on time document was last updated + * recency_bias + # Boost based on aggregated boost calculation + * aggregated_chunk_boost + } + rerank-count: 1000 + } + + match-features { + bm25(title) + bm25(content) + closeness(field, title_embedding) + closeness(field, embeddings) + document_boost + recency_bias + aggregated_chunk_boost + closest(embeddings) + } + } + + + rank-profile hybrid_search_keyword_base_VARIABLE_DIM inherits default, default_rank { + inputs { + query(query_embedding) tensor(x[VARIABLE_DIM]) + } + + function title_vector_score() { + expression { + # If no good matching titles, then it should use the context embeddings rather than having some + # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest + # matching content score getting the full score + max(closeness(field, embeddings), closeness(field, title_embedding)) + } + } + + # First phase must be vector to allow hits that have no keyword matches + first-phase { + expression: query(title_content_ratio) * bm25(title) + (1 - query(title_content_ratio)) * bm25(content) } # Weighted average between Vector Search and BM-25 diff --git a/backend/onyx/document_index/vespa/index.py b/backend/onyx/document_index/vespa/index.py index 92210d712e..213d8aab4e 100644 --- a/backend/onyx/document_index/vespa/index.py +++ b/backend/onyx/document_index/vespa/index.py @@ -19,6 +19,7 @@ import httpx # type: ignore import requests # type: ignore from retry import retry +from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType from onyx.configs.chat_configs import DOC_TIME_DECAY from onyx.configs.chat_configs import NUM_RETURNED_HITS from onyx.configs.chat_configs import TITLE_CONTENT_RATIO @@ -800,12 +801,14 @@ class VespaIndex(DocumentIndex): hybrid_alpha: float, time_decay_multiplier: float, num_to_retrieve: int, + ranking_profile_type: QueryExpansionType, offset: int = 0, title_content_ratio: float | None = TITLE_CONTENT_RATIO, ) -> list[InferenceChunkUncleaned]: vespa_where_clauses = build_vespa_filters(filters) # Needs to be at least as much as the value set in Vespa schema config target_hits = max(10 * num_to_retrieve, 1000) + yql = ( YQL_BASE.format(index_name=self.index_name) + vespa_where_clauses @@ -817,6 +820,11 @@ class VespaIndex(DocumentIndex): final_query = " ".join(final_keywords) if final_keywords else query + if ranking_profile_type == QueryExpansionType.KEYWORD: + ranking_profile = f"hybrid_search_keyword_base_{len(query_embedding)}" + else: + ranking_profile = f"hybrid_search_semantic_base_{len(query_embedding)}" + logger.debug(f"Query YQL: {yql}") params: dict[str, str | int | float] = { @@ -832,7 +840,7 @@ class VespaIndex(DocumentIndex): ), "hits": num_to_retrieve, "offset": offset, - "ranking.profile": f"hybrid_search{len(query_embedding)}", + "ranking.profile": ranking_profile, "timeout": VESPA_TIMEOUT, } diff --git a/backend/onyx/prompts/chat_prompts.py b/backend/onyx/prompts/chat_prompts.py index 65c4f9e859..8656dfa106 100644 --- a/backend/onyx/prompts/chat_prompts.py +++ b/backend/onyx/prompts/chat_prompts.py @@ -246,3 +246,75 @@ Please give a short succinct summary of the entire document. Answer only with th summary and nothing else. """ DOCUMENT_SUMMARY_TOKEN_ESTIMATE = 29 + + +QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT = """ +Please rephrase the following user question/query as a semantic query that would be appropriate for a \ +search engine. + +Note: + - do not change the meaning of the question! Specifically, if the query is a an instruction, keep it \ +as an instruction! + +Here is the user question/query: +{question} + +Respond with EXACTLY and ONLY one rephrased question/query. + +Rephrased question/query for search engine: +""".strip() + + +QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT = """ +Following a previous message history, a user created a follow-up question/query. +Please rephrase that question/query as a semantic query \ +that would be appropriate for a SEARCH ENGINE. Only use the information provided \ +from the history that is relevant to provide the relevant context for the search query, \ +meaning that the rephrased search query should be a suitable stand-alone search query. + +Note: + - do not change the meaning of the question! Specifically, if the query is a an instruction, keep it \ +as an instruction! + +Here is the relevant previous message history: +{history} + +Here is the user question: +{question} + +Respond with EXACTLY and ONLY one rephrased query. + +Rephrased query for search engine: +""".strip() + + +QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT = """ +Please rephrase the following user question as a keyword query that would be appropriate for a \ +search engine. + +Here is the user question: +{question} + +Respond with EXACTLY and ONLY one rephrased query. + +Rephrased query for search engine: +""".strip() + + +QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT = """ +Following a previous message history, a user created a follow-up question/query. +Please rephrase that question/query as a keyword query \ +that would be appropriate for a SEARCH ENGINE. Only use the information provided \ +from the history that is relevant to provide the relevant context for the search query, \ +meaning that the rephrased search query should be a suitable stand-alone search query. + +Here is the relevant previous message history: +{history} + +Here is the user question: +{question} + +Respond with EXACTLY and ONLY one rephrased query. + +Rephrased query for search engine: +""".strip() diff --git a/backend/onyx/tools/models.py b/backend/onyx/tools/models.py index 2d1459ef43..f6d5f1f881 100644 --- a/backend/onyx/tools/models.py +++ b/backend/onyx/tools/models.py @@ -11,6 +11,7 @@ from onyx.configs.constants import DocumentSource from onyx.context.search.enums import SearchType from onyx.context.search.models import IndexFilters from onyx.context.search.models import InferenceSection +from onyx.context.search.models import QueryExpansions from shared_configs.model_server_models import Embedding @@ -79,6 +80,7 @@ class SearchToolOverrideKwargs(BaseModel): ) document_sources: list[DocumentSource] | None = None time_cutoff: datetime | None = None + expanded_queries: QueryExpansions | None = None class Config: arbitrary_types_allowed = True diff --git a/backend/onyx/tools/tool_implementations/search/search_tool.py b/backend/onyx/tools/tool_implementations/search/search_tool.py index 9fcc64cd7a..f6fdfd417c 100644 --- a/backend/onyx/tools/tool_implementations/search/search_tool.py +++ b/backend/onyx/tools/tool_implementations/search/search_tool.py @@ -295,6 +295,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): ordering_only = False document_sources = None time_cutoff = None + expanded_queries = None if override_kwargs: force_no_rerank = use_alt_not_None(override_kwargs.force_no_rerank, False) alternate_db_session = override_kwargs.alternate_db_session @@ -307,6 +308,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): ordering_only = use_alt_not_None(override_kwargs.ordering_only, False) document_sources = override_kwargs.document_sources time_cutoff = override_kwargs.time_cutoff + expanded_queries = override_kwargs.expanded_queries # Fast path for ordering-only search if ordering_only: @@ -391,6 +393,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): precomputed_query_embedding=precomputed_query_embedding, precomputed_is_keyword=precomputed_is_keyword, precomputed_keywords=precomputed_keywords, + # add expanded queries + expanded_queries=expanded_queries, ), user=self.user, llm=self.llm, diff --git a/backend/scripts/query_time_check/test_query_times.py b/backend/scripts/query_time_check/test_query_times.py index 4ea6cf0161..6825afe24e 100644 --- a/backend/scripts/query_time_check/test_query_times.py +++ b/backend/scripts/query_time_check/test_query_times.py @@ -5,6 +5,7 @@ RUN THIS AFTER SEED_DUMMY_DOCS.PY import random import time +from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType from onyx.configs.constants import DocumentSource from onyx.configs.model_configs import DOC_EMBEDDING_DIM from onyx.context.search.models import IndexFilters @@ -96,6 +97,7 @@ def test_hybrid_retrieval_times( hybrid_alpha=0.5, time_decay_multiplier=1.0, num_to_retrieve=50, + ranking_profile_type=QueryExpansionType.SEMANTIC, offset=0, title_content_ratio=0.5, )