diff --git a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
index ba23808d8d..5b46ca29ad 100644
--- a/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/choose_tool.py
@@ -1,6 +1,8 @@
 from typing import cast
 from uuid import uuid4
 
+from langchain_core.messages import AIMessage
+from langchain_core.messages import HumanMessage
 from langchain_core.messages import ToolCall
 from langchain_core.runnables.config import RunnableConfig
 from langgraph.types import StreamWriter
@@ -10,13 +12,21 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.orchestration.states import ToolChoice
 from onyx.agents.agent_search.orchestration.states import ToolChoiceState
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
+from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.tool_handling.tool_response_handler import get_tool_by_name
 from onyx.chat.tool_handling.tool_response_handler import (
     get_tool_call_for_non_tool_calling_llm_impl,
 )
+from onyx.configs.chat_configs import USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH
 from onyx.context.search.preprocessing.preprocessing import query_analysis
 from onyx.context.search.retrieval.search_runner import get_query_embedding
+from onyx.llm.factory import get_default_llms
+from onyx.prompts.chat_prompts import QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT
+from onyx.prompts.chat_prompts import QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT
+from onyx.prompts.chat_prompts import QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT
+from onyx.prompts.chat_prompts import QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT
+from onyx.tools.models import QueryExpansions
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool import Tool
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
@@ -30,6 +40,49 @@ from shared_configs.model_server_models import Embedding
 logger = setup_logger()
 
 
+def _create_history_str(prompt_builder: AnswerPromptBuilder) -> str:
+    # TODO: Add trimming logic
+    history_segments = []
+    for msg in prompt_builder.message_history:
+        if isinstance(msg, HumanMessage):
+            role = "User"
+        elif isinstance(msg, AIMessage):
+            role = "Assistant"
+        else:
+            continue
+        history_segments.append(f"{role}:\n {msg.content}\n\n")
+    return "\n".join(history_segments)
+
+
+def _expand_query(
+    query: str,
+    expansion_type: QueryExpansionType,
+    prompt_builder: AnswerPromptBuilder,
+) -> str:
+
+    history_str = _create_history_str(prompt_builder)
+
+    if history_str:
+        if expansion_type == QueryExpansionType.KEYWORD:
+            base_prompt = QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT
+        else:
+            base_prompt = QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT
+        expansion_prompt = base_prompt.format(question=query, history=history_str)
+    else:
+        if expansion_type == QueryExpansionType.KEYWORD:
+            base_prompt = QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT
+        else:
+            base_prompt = QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT
+        expansion_prompt = base_prompt.format(question=query)
+
+    msg = HumanMessage(content=expansion_prompt)
+    primary_llm, _ = get_default_llms()
+    response = primary_llm.invoke([msg])
+    rephrased_query: str = cast(str, response.content)
+
+    return rephrased_query
+
+
 # TODO: break this out into an implementation function
 # and a function that handles extracting the necessary fields
 # from the state and config
@@ -52,7 +105,16 @@ def choose_tool(
 
     embedding_thread: TimeoutThread[Embedding] | None = None
     keyword_thread: TimeoutThread[tuple[bool, list[str]]] | None = None
+    expanded_keyword_thread: TimeoutThread[str] | None = None
+    expanded_semantic_thread: TimeoutThread[str] | None = None
     override_kwargs: SearchToolOverrideKwargs | None = None
+
+    using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm
+    prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder
+
+    llm = agent_config.tooling.primary_llm
+    skip_gen_ai_answer_generation = agent_config.behavior.skip_gen_ai_answer_generation
+
     if (
         not agent_config.behavior.use_agentic_search
         and agent_config.tooling.search_tool is not None
@@ -72,11 +134,20 @@ def choose_tool(
             agent_config.inputs.search_request.query,
         )
 
-    using_tool_calling_llm = agent_config.tooling.using_tool_calling_llm
-    prompt_builder = state.prompt_snapshot or agent_config.inputs.prompt_builder
+        if USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH:
 
-    llm = agent_config.tooling.primary_llm
-    skip_gen_ai_answer_generation = agent_config.behavior.skip_gen_ai_answer_generation
+            expanded_keyword_thread = run_in_background(
+                _expand_query,
+                agent_config.inputs.search_request.query,
+                QueryExpansionType.KEYWORD,
+                prompt_builder,
+            )
+            expanded_semantic_thread = run_in_background(
+                _expand_query,
+                agent_config.inputs.search_request.query,
+                QueryExpansionType.SEMANTIC,
+                prompt_builder,
+            )
 
     structured_response_format = agent_config.inputs.structured_response_format
     tools = [
@@ -209,6 +280,19 @@ def choose_tool(
         override_kwargs.precomputed_is_keyword = is_keyword
         override_kwargs.precomputed_keywords = keywords
 
+    if (
+        selected_tool.name == SearchTool._NAME
+        and expanded_keyword_thread
+        and expanded_semantic_thread
+    ):
+        keyword_expansion = wait_on_background(expanded_keyword_thread)
+        semantic_expansion = wait_on_background(expanded_semantic_thread)
+        assert override_kwargs is not None, "must have override kwargs"
+        override_kwargs.expanded_queries = QueryExpansions(
+            keywords_expansions=[keyword_expansion],
+            semantic_expansions=[semantic_expansion],
+        )
+
     return ToolChoiceUpdate(
         tool_choice=ToolChoice(
             tool=selected_tool,
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/models.py b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
index f22bf162d9..812f97bd6a 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from typing import Any
 
 from pydantic import BaseModel
@@ -153,3 +154,8 @@ class AnswerGenerationDocuments(BaseModel):
 
 
 BaseMessage_Content = str | list[str | dict[str, Any]]
+
+
+class QueryExpansionType(Enum):
+    KEYWORD = "keyword"
+    SEMANTIC = "semantic"
diff --git a/backend/onyx/configs/chat_configs.py b/backend/onyx/configs/chat_configs.py
index 45d80b6e22..56612b39e7 100644
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -96,3 +96,9 @@ BING_API_KEY = os.environ.get("BING_API_KEY") or None
 ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)
 
 VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)
+
+# Whether or not to use the semantic & keyword search expansions for Basic Search
+USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
+    os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
+    == "true"
+)
diff --git a/backend/onyx/context/search/models.py b/backend/onyx/context/search/models.py
index ef82c6bb54..47cfca563f 100644
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -18,11 +18,17 @@ from onyx.indexing.models import IndexingSetting
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import Embedding
 
+
 MAX_METRICS_CONTENT = (
     200  # Just need enough characters to identify where in the doc the chunk is
 )
 
 
+class QueryExpansions(BaseModel):
+    keywords_expansions: list[str] | None = None
+    semantic_expansions: list[str] | None = None
+
+
 class RerankingDetails(BaseModel):
     # If model is None (or num_rerank is 0), then reranking is turned off
     rerank_model_name: str | None
@@ -139,6 +145,8 @@ class ChunkContext(BaseModel):
 class SearchRequest(ChunkContext):
     query: str
 
+    expanded_queries: QueryExpansions | None = None
+
     search_type: SearchType = SearchType.SEMANTIC
 
     human_selected_filters: BaseFilters | None = None
@@ -187,6 +195,8 @@ class SearchQuery(ChunkContext):
 
     precomputed_query_embedding: Embedding | None = None
 
+    expanded_queries: QueryExpansions | None = None
+
 
 class RetrievalDetails(ChunkContext):
     # Use LLM to determine whether to do a retrieval or only rely on existing history
diff --git a/backend/onyx/context/search/preprocessing/preprocessing.py b/backend/onyx/context/search/preprocessing/preprocessing.py
index a8b22a983d..16402a360e 100644
--- a/backend/onyx/context/search/preprocessing/preprocessing.py
+++ b/backend/onyx/context/search/preprocessing/preprocessing.py
@@ -20,7 +20,7 @@ from onyx.context.search.models import SearchRequest
 from onyx.context.search.preprocessing.access_filters import (
     build_access_filters_for_user,
 )
-from onyx.context.search.retrieval.search_runner import (
+from onyx.context.search.utils import (
     remove_stop_words_and_punctuation,
 )
 from onyx.db.models import User
@@ -36,7 +36,6 @@ from onyx.utils.timing import log_function_time
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
 
-
 logger = setup_logger()
 
 
@@ -264,4 +263,5 @@ def retrieval_preprocessing(
         chunks_below=chunks_below,
         full_doc=search_request.full_doc,
         precomputed_query_embedding=search_request.precomputed_query_embedding,
+        expanded_queries=search_request.expanded_queries,
     )
diff --git a/backend/onyx/context/search/retrieval/search_runner.py b/backend/onyx/context/search/retrieval/search_runner.py
index f69b2c3b23..acfd415c82 100644
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -2,10 +2,10 @@ import string
 from collections.abc import Callable
 
 import nltk  # type:ignore
-from nltk.corpus import stopwords  # type:ignore
-from nltk.tokenize import word_tokenize  # type:ignore
 from sqlalchemy.orm import Session
 
+from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
+from onyx.context.search.enums import SearchType
 from onyx.context.search.models import ChunkMetric
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunk
@@ -15,6 +15,8 @@ from onyx.context.search.models import MAX_METRICS_CONTENT
 from onyx.context.search.models import RetrievalMetricsContainer
 from onyx.context.search.models import SearchQuery
 from onyx.context.search.postprocessing.postprocessing import cleanup_chunks
+from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA
+from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA_KEYWORD
 from onyx.context.search.utils import inference_section_from_chunks
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_multilingual_expansion
@@ -27,6 +29,9 @@ from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.secondary_llm_flows.query_expansion import multilingual_query_expansion
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
+from onyx.utils.threadpool_concurrency import run_in_background
+from onyx.utils.threadpool_concurrency import TimeoutThread
+from onyx.utils.threadpool_concurrency import wait_on_background
 from onyx.utils.timing import log_function_time
 from shared_configs.configs import MODEL_SERVER_HOST
 from shared_configs.configs import MODEL_SERVER_PORT
@@ -36,6 +41,23 @@ from shared_configs.model_server_models import Embedding
 logger = setup_logger()
 
 
+def _dedupe_chunks(
+    chunks: list[InferenceChunkUncleaned],
+) -> list[InferenceChunkUncleaned]:
+    used_chunks: dict[tuple[str, int], InferenceChunkUncleaned] = {}
+    for chunk in chunks:
+        key = (chunk.document_id, chunk.chunk_id)
+        if key not in used_chunks:
+            used_chunks[key] = chunk
+        else:
+            stored_chunk_score = used_chunks[key].score or 0
+            this_chunk_score = chunk.score or 0
+            if stored_chunk_score < this_chunk_score:
+                used_chunks[key] = chunk
+
+    return list(used_chunks.values())
+
+
 def download_nltk_data() -> None:
     resources = {
         "stopwords": "corpora/stopwords",
@@ -69,22 +91,6 @@ def lemmatize_text(keywords: list[str]) -> list[str]:
     #     return keywords
 
 
-def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
-    try:
-        # Re-tokenize using the NLTK tokenizer for better matching
-        query = " ".join(keywords)
-        stop_words = set(stopwords.words("english"))
-        word_tokens = word_tokenize(query)
-        text_trimmed = [
-            word
-            for word in word_tokens
-            if (word.casefold() not in stop_words and word not in string.punctuation)
-        ]
-        return text_trimmed or word_tokens
-    except Exception:
-        return keywords
-
-
 def combine_retrieval_results(
     chunk_sets: list[list[InferenceChunk]],
 ) -> list[InferenceChunk]:
@@ -123,6 +129,20 @@ def get_query_embedding(query: str, db_session: Session) -> Embedding:
     return query_embedding
 
 
+def get_query_embeddings(queries: list[str], db_session: Session) -> list[Embedding]:
+    search_settings = get_current_search_settings(db_session)
+
+    model = EmbeddingModel.from_db_model(
+        search_settings=search_settings,
+        # The below are globally set, this flow always uses the indexing one
+        server_host=MODEL_SERVER_HOST,
+        server_port=MODEL_SERVER_PORT,
+    )
+
+    query_embedding = model.encode(queries, text_type=EmbedTextType.QUERY)
+    return query_embedding
+
+
 @log_function_time(print_only=True)
 def doc_index_retrieval(
     query: SearchQuery,
@@ -139,17 +159,113 @@ def doc_index_retrieval(
         query.query, db_session
     )
 
-    top_chunks = document_index.hybrid_retrieval(
-        query=query.query,
-        query_embedding=query_embedding,
-        final_keywords=query.processed_keywords,
-        filters=query.filters,
-        hybrid_alpha=query.hybrid_alpha,
-        time_decay_multiplier=query.recency_bias_multiplier,
-        num_to_retrieve=query.num_hits,
-        offset=query.offset,
+    keyword_embeddings_thread: TimeoutThread[list[Embedding]] | None = None
+    semantic_embeddings_thread: TimeoutThread[list[Embedding]] | None = None
+    top_base_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = None
+
+    top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = (
+        None
     )
 
+    keyword_embeddings: list[Embedding] | None = None
+    semantic_embeddings: list[Embedding] | None = None
+
+    top_semantic_chunks: list[InferenceChunkUncleaned] | None = None
+
+    # original retrieveal method
+    top_base_chunks_thread = run_in_background(
+        document_index.hybrid_retrieval,
+        query.query,
+        query_embedding,
+        query.processed_keywords,
+        query.filters,
+        query.hybrid_alpha,
+        query.recency_bias_multiplier,
+        query.num_hits,
+        "semantic",
+        query.offset,
+    )
+
+    if (
+        query.expanded_queries
+        and query.expanded_queries.keywords_expansions
+        and query.expanded_queries.semantic_expansions
+    ):
+
+        keyword_embeddings_thread = run_in_background(
+            get_query_embeddings,
+            query.expanded_queries.keywords_expansions,
+            db_session,
+        )
+
+        if query.search_type == SearchType.SEMANTIC:
+            semantic_embeddings_thread = run_in_background(
+                get_query_embeddings,
+                query.expanded_queries.semantic_expansions,
+                db_session,
+            )
+
+        keyword_embeddings = wait_on_background(keyword_embeddings_thread)
+        if query.search_type == SearchType.SEMANTIC:
+            assert semantic_embeddings_thread is not None
+            semantic_embeddings = wait_on_background(semantic_embeddings_thread)
+
+        # Use original query embedding for keyword retrieval embedding
+        keyword_embeddings = [query_embedding]
+
+        # Note: we generally prepped earlier for multiple expansions, but for now we only use one.
+        top_keyword_chunks_thread = run_in_background(
+            document_index.hybrid_retrieval,
+            query.expanded_queries.keywords_expansions[0],
+            keyword_embeddings[0],
+            query.processed_keywords,
+            query.filters,
+            HYBRID_ALPHA_KEYWORD,
+            query.recency_bias_multiplier,
+            query.num_hits,
+            QueryExpansionType.KEYWORD,
+            query.offset,
+        )
+
+        if query.search_type == SearchType.SEMANTIC:
+            assert semantic_embeddings is not None
+
+            top_semantic_chunks_thread = run_in_background(
+                document_index.hybrid_retrieval,
+                query.expanded_queries.semantic_expansions[0],
+                semantic_embeddings[0],
+                query.processed_keywords,
+                query.filters,
+                HYBRID_ALPHA,
+                query.recency_bias_multiplier,
+                query.num_hits,
+                QueryExpansionType.SEMANTIC,
+                query.offset,
+            )
+
+        top_base_chunks = wait_on_background(top_base_chunks_thread)
+
+        top_keyword_chunks = wait_on_background(top_keyword_chunks_thread)
+
+        if query.search_type == SearchType.SEMANTIC:
+            assert top_semantic_chunks_thread is not None
+            top_semantic_chunks = wait_on_background(top_semantic_chunks_thread)
+
+        all_top_chunks = top_base_chunks + top_keyword_chunks
+
+        # use all three retrieval methods to retrieve top chunks
+
+        if query.search_type == SearchType.SEMANTIC and top_semantic_chunks is not None:
+
+            all_top_chunks += top_semantic_chunks
+
+        top_chunks = _dedupe_chunks(all_top_chunks)
+
+    else:
+
+        top_base_chunks = wait_on_background(top_base_chunks_thread)
+        top_chunks = _dedupe_chunks(top_base_chunks)
+
     retrieval_requests: list[VespaChunkRequest] = []
     normal_chunks: list[InferenceChunkUncleaned] = []
     referenced_chunk_scores: dict[tuple[str, int], float] = {}
diff --git a/backend/onyx/context/search/utils.py b/backend/onyx/context/search/utils.py
index 91724ffb32..22e6b0f8df 100644
--- a/backend/onyx/context/search/utils.py
+++ b/backend/onyx/context/search/utils.py
@@ -1,6 +1,10 @@
+import string
 from collections.abc import Sequence
 from typing import TypeVar
 
+from nltk.corpus import stopwords  # type:ignore
+from nltk.tokenize import word_tokenize  # type:ignore
+
 from onyx.chat.models import SectionRelevancePiece
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceSection
@@ -136,3 +140,19 @@ def chunks_or_sections_to_search_docs(
     ]
 
     return search_docs
+
+
+def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
+    try:
+        # Re-tokenize using the NLTK tokenizer for better matching
+        query = " ".join(keywords)
+        stop_words = set(stopwords.words("english"))
+        word_tokens = word_tokenize(query)
+        text_trimmed = [
+            word
+            for word in word_tokens
+            if (word.casefold() not in stop_words and word not in string.punctuation)
+        ]
+        return text_trimmed or word_tokens
+    except Exception:
+        return keywords
diff --git a/backend/onyx/document_index/interfaces.py b/backend/onyx/document_index/interfaces.py
index 66912a971c..668898c149 100644
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -4,6 +4,8 @@ from datetime import datetime
 from typing import Any
 
 from onyx.access.models import DocumentAccess
+from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
+from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
 from onyx.db.enums import EmbeddingPrecision
@@ -351,7 +353,9 @@ class HybridCapable(abc.ABC):
         hybrid_alpha: float,
         time_decay_multiplier: float,
         num_to_retrieve: int,
+        ranking_profile_type: QueryExpansionType,
         offset: int = 0,
+        title_content_ratio: float | None = TITLE_CONTENT_RATIO,
     ) -> list[InferenceChunkUncleaned]:
         """
         Run hybrid search and return a list of inference chunks.
diff --git a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
index d3fcf73a76..c068234935 100644
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@@ -176,7 +176,7 @@ schema DANSWER_CHUNK_NAME {
         match-features: recency_bias
     }
 
-    rank-profile hybrid_searchVARIABLE_DIM inherits default, default_rank {
+    rank-profile hybrid_search_semantic_base_VARIABLE_DIM inherits default, default_rank {
         inputs {
             query(query_embedding) tensor<float>(x[VARIABLE_DIM])
         }
@@ -192,7 +192,75 @@ schema DANSWER_CHUNK_NAME {
 
         # First phase must be vector to allow hits that have no keyword matches
         first-phase {
-            expression: closeness(field, embeddings)
+            expression: query(title_content_ratio) * closeness(field, title_embedding) + (1 - query(title_content_ratio)) * closeness(field, embeddings)
+        }
+
+        # Weighted average between Vector Search and BM-25
+        global-phase {
+            expression {
+                (
+                    # Weighted Vector Similarity Score
+                    (
+                        query(alpha) * (
+                            (query(title_content_ratio) * normalize_linear(title_vector_score))
+                            +
+                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))
+                        )
+                    )
+
+                    +
+
+                    # Weighted Keyword Similarity Score
+                    # Note: for the BM25 Title score, it requires decent stopword removal in the query
+                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1
+                    (
+                        (1 - query(alpha)) * (
+                            (query(title_content_ratio) * normalize_linear(bm25(title)))
+                            +
+                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))
+                        )
+                    )
+                )
+                # Boost based on user feedback
+                * document_boost
+                # Decay factor based on time document was last updated
+                * recency_bias
+                # Boost based on aggregated boost calculation
+                * aggregated_chunk_boost
+            }
+            rerank-count: 1000
+        }
+
+        match-features {
+            bm25(title)
+            bm25(content)
+            closeness(field, title_embedding)
+            closeness(field, embeddings)
+            document_boost
+            recency_bias
+            aggregated_chunk_boost
+            closest(embeddings)
+        }
+    }
+
+
+    rank-profile hybrid_search_keyword_base_VARIABLE_DIM inherits default, default_rank {
+        inputs {
+            query(query_embedding) tensor<float>(x[VARIABLE_DIM])
+        }
+
+        function title_vector_score() {
+            expression {
+                # If no good matching titles, then it should use the context embeddings rather than having some
+                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest
+                # matching content score getting the full score
+                max(closeness(field, embeddings), closeness(field, title_embedding))
+            }
+        }
+
+        # First phase must be vector to allow hits that have no keyword matches
+        first-phase {
+            expression: query(title_content_ratio) * bm25(title) + (1 - query(title_content_ratio)) * bm25(content)
         }
 
         # Weighted average between Vector Search and BM-25
diff --git a/backend/onyx/document_index/vespa/index.py b/backend/onyx/document_index/vespa/index.py
index 92210d712e..213d8aab4e 100644
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -19,6 +19,7 @@ import httpx  # type: ignore
 import requests  # type: ignore
 from retry import retry
 
+from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
 from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -800,12 +801,14 @@ class VespaIndex(DocumentIndex):
         hybrid_alpha: float,
         time_decay_multiplier: float,
         num_to_retrieve: int,
+        ranking_profile_type: QueryExpansionType,
         offset: int = 0,
         title_content_ratio: float | None = TITLE_CONTENT_RATIO,
     ) -> list[InferenceChunkUncleaned]:
         vespa_where_clauses = build_vespa_filters(filters)
         # Needs to be at least as much as the value set in Vespa schema config
         target_hits = max(10 * num_to_retrieve, 1000)
+
         yql = (
             YQL_BASE.format(index_name=self.index_name)
             + vespa_where_clauses
@@ -817,6 +820,11 @@ class VespaIndex(DocumentIndex):
 
         final_query = " ".join(final_keywords) if final_keywords else query
 
+        if ranking_profile_type == QueryExpansionType.KEYWORD:
+            ranking_profile = f"hybrid_search_keyword_base_{len(query_embedding)}"
+        else:
+            ranking_profile = f"hybrid_search_semantic_base_{len(query_embedding)}"
+
         logger.debug(f"Query YQL: {yql}")
 
         params: dict[str, str | int | float] = {
@@ -832,7 +840,7 @@ class VespaIndex(DocumentIndex):
             ),
             "hits": num_to_retrieve,
             "offset": offset,
-            "ranking.profile": f"hybrid_search{len(query_embedding)}",
+            "ranking.profile": ranking_profile,
             "timeout": VESPA_TIMEOUT,
         }
 
diff --git a/backend/onyx/prompts/chat_prompts.py b/backend/onyx/prompts/chat_prompts.py
index 65c4f9e859..8656dfa106 100644
--- a/backend/onyx/prompts/chat_prompts.py
+++ b/backend/onyx/prompts/chat_prompts.py
@@ -246,3 +246,75 @@ Please give a short succinct summary of the entire document. Answer only with th
 summary and nothing else. """
 
 DOCUMENT_SUMMARY_TOKEN_ESTIMATE = 29
+
+
+QUERY_SEMANTIC_EXPANSION_WITHOUT_HISTORY_PROMPT = """
+Please rephrase the following user question/query as a semantic query that would be appropriate for a \
+search engine.
+
+Note:
+ - do not change the meaning of the question! Specifically, if the query is a an instruction, keep it \
+as an instruction!
+
+Here is the user question/query:
+{question}
+
+Respond with EXACTLY and ONLY one rephrased question/query.
+
+Rephrased question/query for search engine:
+""".strip()
+
+
+QUERY_SEMANTIC_EXPANSION_WITH_HISTORY_PROMPT = """
+Following a previous message history, a user created a follow-up question/query.
+Please rephrase that question/query as a semantic query \
+that would be appropriate for a SEARCH ENGINE. Only use the information provided \
+from the history that is relevant to provide the relevant context for the search query, \
+meaning that the rephrased search query should be a suitable stand-alone search query.
+
+Note:
+ - do not change the meaning of the question! Specifically, if the query is a an instruction, keep it \
+as an instruction!
+
+Here is the relevant previous message history:
+{history}
+
+Here is the user question:
+{question}
+
+Respond with EXACTLY and ONLY one rephrased query.
+
+Rephrased query for search engine:
+""".strip()
+
+
+QUERY_KEYWORD_EXPANSION_WITHOUT_HISTORY_PROMPT = """
+Please rephrase the following user question as a keyword query that would be appropriate for a \
+search engine.
+
+Here is the user question:
+{question}
+
+Respond with EXACTLY and ONLY one rephrased query.
+
+Rephrased query for search engine:
+""".strip()
+
+
+QUERY_KEYWORD_EXPANSION_WITH_HISTORY_PROMPT = """
+Following a previous message history, a user created a follow-up question/query.
+Please rephrase that question/query as a keyword query \
+that would be appropriate for a SEARCH ENGINE. Only use the information provided \
+from the history that is relevant to provide the relevant context for the search query, \
+meaning that the rephrased search query should be a suitable stand-alone search query.
+
+Here is the relevant previous message history:
+{history}
+
+Here is the user question:
+{question}
+
+Respond with EXACTLY and ONLY one rephrased query.
+
+Rephrased query for search engine:
+""".strip()
diff --git a/backend/onyx/tools/models.py b/backend/onyx/tools/models.py
index 2d1459ef43..f6d5f1f881 100644
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -11,6 +11,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.context.search.enums import SearchType
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import QueryExpansions
 from shared_configs.model_server_models import Embedding
 
 
@@ -79,6 +80,7 @@ class SearchToolOverrideKwargs(BaseModel):
     )
     document_sources: list[DocumentSource] | None = None
     time_cutoff: datetime | None = None
+    expanded_queries: QueryExpansions | None = None
 
     class Config:
         arbitrary_types_allowed = True
diff --git a/backend/onyx/tools/tool_implementations/search/search_tool.py b/backend/onyx/tools/tool_implementations/search/search_tool.py
index 9fcc64cd7a..f6fdfd417c 100644
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -295,6 +295,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
         ordering_only = False
         document_sources = None
         time_cutoff = None
+        expanded_queries = None
         if override_kwargs:
             force_no_rerank = use_alt_not_None(override_kwargs.force_no_rerank, False)
             alternate_db_session = override_kwargs.alternate_db_session
@@ -307,6 +308,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
             ordering_only = use_alt_not_None(override_kwargs.ordering_only, False)
             document_sources = override_kwargs.document_sources
             time_cutoff = override_kwargs.time_cutoff
+            expanded_queries = override_kwargs.expanded_queries
 
         # Fast path for ordering-only search
         if ordering_only:
@@ -391,6 +393,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                 precomputed_query_embedding=precomputed_query_embedding,
                 precomputed_is_keyword=precomputed_is_keyword,
                 precomputed_keywords=precomputed_keywords,
+                # add expanded queries
+                expanded_queries=expanded_queries,
             ),
             user=self.user,
             llm=self.llm,
diff --git a/backend/scripts/query_time_check/test_query_times.py b/backend/scripts/query_time_check/test_query_times.py
index 4ea6cf0161..6825afe24e 100644
--- a/backend/scripts/query_time_check/test_query_times.py
+++ b/backend/scripts/query_time_check/test_query_times.py
@@ -5,6 +5,7 @@ RUN THIS AFTER SEED_DUMMY_DOCS.PY
 import random
 import time
 
+from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
 from onyx.configs.constants import DocumentSource
 from onyx.configs.model_configs import DOC_EMBEDDING_DIM
 from onyx.context.search.models import IndexFilters
@@ -96,6 +97,7 @@ def test_hybrid_retrieval_times(
             hybrid_alpha=0.5,
             time_decay_multiplier=1.0,
             num_to_retrieve=50,
+            ranking_profile_type=QueryExpansionType.SEMANTIC,
             offset=0,
             title_content_ratio=0.5,
         )