diff --git a/backend/onyx/agents/agent_search/basic/utils.py b/backend/onyx/agents/agent_search/basic/utils.py
index cd0c63afa..bf7f5eaa6 100644
--- a/backend/onyx/agents/agent_search/basic/utils.py
+++ b/backend/onyx/agents/agent_search/basic/utils.py
@@ -60,5 +60,5 @@ def process_llm_stream(
                     writer,
                 )
 
-    logger.info(f"Full answer: {full_answer}")
+    logger.debug(f"Full answer: {full_answer}")
     return cast(AIMessageChunk, tool_call_chunk)
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/general_sub_answers/nodes/format_initial_sub_answers.py b/backend/onyx/agents/agent_search/deep_search/initial/general_sub_answers/nodes/format_initial_sub_answers.py
index dfc7e2d2a..7da233b95 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/general_sub_answers/nodes/format_initial_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/general_sub_answers/nodes/format_initial_sub_answers.py
@@ -17,11 +17,11 @@ def format_initial_sub_answers(
 ) -> SubQuestionResultsUpdate:
     now_start = datetime.now()
 
-    logger.info(f"--------{now_start}--------INGEST ANSWERS---")
+    logger.debug(f"--------{now_start}--------INGEST ANSWERS---")
     documents = []
     context_documents = []
     cited_documents = []
-    answer_results = state.answer_results if hasattr(state, "answer_results") else []
+    answer_results = state.answer_results
     for answer_result in answer_results:
         documents.extend(answer_result.verified_reranked_documents)
         context_documents.extend(answer_result.context_documents)
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
index e40265756..aade2a189 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -29,6 +29,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.utils.logger import setup_logger
 
@@ -45,7 +46,7 @@ def generate_sub_answer(
     graph_config = cast(GraphConfig, config["metadata"]["config"])
     question = state.question
     state.verified_reranked_documents
-    level, question_nr = parse_question_id(state.question_id)
+    level, question_num = parse_question_id(state.question_id)
     context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
     persona_contextualized_prompt = get_persona_agent_prompt_expressions(
         graph_config.inputs.search_request.persona
@@ -58,7 +59,7 @@ def generate_sub_answer(
             AgentAnswerPiece(
                 answer_piece=answer_str,
                 level=level,
-                level_question_nr=question_nr,
+                level_question_num=question_num,
                 answer_type="agent_sub_answer",
             ),
             writer,
@@ -90,7 +91,7 @@ def generate_sub_answer(
                 AgentAnswerPiece(
                     answer_piece=content,
                     level=level,
-                    level_question_nr=question_nr,
+                    level_question_num=question_num,
                     answer_type="agent_sub_answer",
                 ),
                 writer,
@@ -113,9 +114,9 @@ def generate_sub_answer(
 
     stop_event = StreamStopInfo(
         stop_reason=StreamStopReason.FINISHED,
-        stream_type="sub_answer",
+        stream_type=StreamType.SUB_ANSWER,
         level=level,
-        level_question_nr=question_nr,
+        level_question_num=question_num,
     )
     write_custom_event("stream_finished", stop_event, writer)
 
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py
index 55b1fe385..0ef0526b3 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py
@@ -35,13 +35,13 @@ def parallelize_initial_sub_question_answering(
                 "answer_query_subgraph",
                 AnswerQuestionInput(
                     question=question,
-                    question_id=make_question_id(0, question_nr + 1),
+                    question_id=make_question_id(0, question_num + 1),
                     log_messages=[
                         f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
                     ],
                 ),
             )
-            for question_nr, question in enumerate(state.initial_sub_questions)
+            for question_num, question in enumerate(state.initial_sub_questions)
         ]
 
     else:
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
index 7a3f8a2f5..fc1d29431 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -112,7 +112,7 @@ def generate_initial_answer(
                 id=tool_response.id,
                 response=tool_response.response,
                 level=0,
-                level_question_nr=0,  # 0, 0 is the base question
+                level_question_num=0,  # 0, 0 is the base question
             ),
             writer,
         )
@@ -123,7 +123,7 @@ def generate_initial_answer(
             AgentAnswerPiece(
                 answer_piece=UNKNOWN_ANSWER,
                 level=0,
-                level_question_nr=0,
+                level_question_num=0,
                 answer_type="agent_level_answer",
             ),
             writer,
@@ -142,7 +142,7 @@ def generate_initial_answer(
 
         good_qa_list: list[str] = []
 
-        sub_question_nr = 1
+        sub_question_num = 1
 
         for decomp_answer_result in decomp_answer_results:
             decomp_questions.append(decomp_answer_result.question)
@@ -155,10 +155,10 @@ def generate_initial_answer(
                     SUB_QUESTION_ANSWER_TEMPLATE.format(
                         sub_question=decomp_answer_result.question,
                         sub_answer=decomp_answer_result.answer,
-                        sub_question_nr=sub_question_nr,
+                        sub_question_num=sub_question_num,
                     )
                 )
-            sub_question_nr += 1
+            sub_question_num += 1
 
         # Determine which base prompt to use given the sub-question information
         if len(good_qa_list) > 0:
@@ -212,7 +212,7 @@ def generate_initial_answer(
                 AgentAnswerPiece(
                     answer_piece=content,
                     level=0,
-                    level_question_nr=0,
+                    level_question_num=0,
                     answer_type="agent_level_answer",
                 ),
                 writer,
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py
index aa564b4a0..852a2ba50 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py
@@ -35,13 +35,13 @@ def parallelize_initial_sub_question_answering(
                 "answer_sub_question_subgraphs",
                 AnswerQuestionInput(
                     question=question,
-                    question_id=make_question_id(0, question_nr + 1),
+                    question_id=make_question_id(0, question_num + 1),
                     log_messages=[
                         f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
                     ],
                 ),
             )
-            for question_nr, question in enumerate(state.initial_sub_questions)
+            for question_num, question in enumerate(state.initial_sub_questions)
         ]
 
     else:
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
index ef600c452..e5cc3e115 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -35,6 +35,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
 
@@ -55,7 +56,6 @@ def decompose_orig_question(
     history = build_history_prompt(graph_config, question)
 
     # Use the initial search results to inform the decomposition
-    sample_doc_str = state.sample_doc_str if hasattr(state, "sample_doc_str") else ""
     agent_start_time = datetime.now()
 
     # Initial search to inform decomposition. Just get top 3 fits
@@ -91,7 +91,7 @@ def decompose_orig_question(
         SubQuestionPiece(
             sub_question=question,
             level=0,
-            level_question_nr=0,
+            level_question_num=0,
         ),
         writer,
     )
@@ -102,7 +102,7 @@ def decompose_orig_question(
 
     stop_event = StreamStopInfo(
         stop_reason=StreamStopReason.FINISHED,
-        stream_type="sub_questions",
+        stream_type=StreamType.SUB_QUESTIONS,
         level=0,
     )
     write_custom_event("stream_finished", stop_event, writer)
diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
index 2d845be84..101b09e19 100644
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
@@ -22,7 +22,7 @@ def format_initial_sub_answers(
     documents = []
     context_documents = []
     cited_documents = []
-    answer_results = state.answer_results if hasattr(state, "answer_results") else []
+    answer_results = state.answer_results
     for answer_result in answer_results:
         documents.extend(answer_result.verified_reranked_documents)
         context_documents.extend(answer_result.context_documents)
diff --git a/backend/onyx/agents/agent_search/deep_search/main/edges.py b/backend/onyx/agents/agent_search/deep_search/main/edges.py
index 9af3c5995..5c09cac0b 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -60,13 +60,13 @@ def parallelize_initial_sub_question_answering(
                 "answer_query_subgraph",
                 AnswerQuestionInput(
                     question=question,
-                    question_id=make_question_id(0, question_nr + 1),
+                    question_id=make_question_id(0, question_num + 1),
                     log_messages=[
                         f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
                     ],
                 ),
             )
-            for question_nr, question in enumerate(state.initial_sub_questions)
+            for question_num, question in enumerate(state.initial_sub_questions)
         ]
 
     else:
@@ -100,13 +100,13 @@ def parallelize_refined_sub_question_answering(
                 "answer_refined_question_subgraphs",
                 AnswerQuestionInput(
                     question=question_data.sub_question,
-                    question_id=make_question_id(1, question_nr),
+                    question_id=make_question_id(1, question_num),
                     log_messages=[
                         f"{edge_start_time} -- Main Edge - Parallelize Refined Sub-question Answering"
                     ],
                 ),
             )
-            for question_nr, question_data in state.refined_sub_questions.items()
+            for question_num, question_data in state.refined_sub_questions.items()
         ]
 
     else:
diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
index 92cae1505..46e447a59 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -103,16 +103,16 @@ def create_refined_sub_questions(
         raise ValueError("LLM response is not a string")
 
     refined_sub_question_dict = {}
-    for sub_question_nr, sub_question in enumerate(parsed_response):
+    for sub_question_num, sub_question in enumerate(parsed_response):
         refined_sub_question = FollowUpSubQuestion(
             sub_question=sub_question,
-            sub_question_id=make_question_id(1, sub_question_nr + 1),
+            sub_question_id=make_question_id(1, sub_question_num + 1),
             verified=False,
             answered=False,
             answer="",
         )
 
-        refined_sub_question_dict[sub_question_nr + 1] = refined_sub_question
+        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
 
     return RefinedQuestionDecompositionUpdate(
         refined_sub_questions=refined_sub_question_dict,
diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
index 2145815d7..33676ddb5 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -26,6 +26,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
     get_langgraph_node_log_string,
 )
+from onyx.configs.constants import NUM_EXPLORATORY_DOCS
 
 
 def extract_entities_terms(
@@ -53,7 +54,7 @@ def extract_entities_terms(
 
     # first four lines duplicates from generate_initial_answer
     question = graph_config.inputs.search_request.query
-    initial_search_docs = state.exploratory_search_results[:15]
+    initial_search_docs = state.exploratory_search_results[:NUM_EXPLORATORY_DOCS]
 
     # start with the entity/term/extraction
     doc_context = format_docs(initial_search_docs)
diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
index 57edf07b3..b6b2a3f05 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
@@ -112,7 +112,7 @@ def generate_refined_answer(
                 id=tool_response.id,
                 response=tool_response.response,
                 level=1,
-                level_question_nr=0,  # 0, 0 is the base question
+                level_question_num=0,  # 0, 0 is the base question
             ),
             writer,
         )
@@ -132,10 +132,10 @@ def generate_refined_answer(
     initial_good_sub_questions: list[str] = []
     new_revised_good_sub_questions: list[str] = []
 
-    sub_question_nr = 1
+    sub_question_num = 1
 
     for decomp_answer_result in decomp_answer_results:
-        question_level, question_nr = parse_question_id(
+        question_level, question_num = parse_question_id(
             decomp_answer_result.question_id
         )
 
@@ -155,12 +155,12 @@ def generate_refined_answer(
                 SUB_QUESTION_ANSWER_TEMPLATE_REVISED.format(
                     sub_question=decomp_answer_result.question,
                     sub_answer=decomp_answer_result.answer,
-                    sub_question_nr=sub_question_nr,
+                    sub_question_num=sub_question_num,
                     sub_question_type=sub_question_type,
                 )
             )
 
-        sub_question_nr += 1
+        sub_question_num += 1
 
     initial_good_sub_questions = list(set(initial_good_sub_questions))
     new_revised_good_sub_questions = list(set(new_revised_good_sub_questions))
@@ -239,7 +239,7 @@ def generate_refined_answer(
             AgentAnswerPiece(
                 answer_piece=content,
                 level=1,
-                level_question_nr=0,
+                level_question_num=0,
                 answer_type="agent_level_answer",
             ),
             writer,
diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_answers.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_answers.py
index 6b92defd2..89bbd884c 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_answers.py
@@ -20,7 +20,7 @@ def ingest_refined_answers(
     node_start_time = datetime.now()
 
     documents = []
-    answer_results = state.answer_results if hasattr(state, "answer_results") else []
+    answer_results = state.answer_results
     for answer_result in answer_results:
         documents.extend(answer_result.verified_reranked_documents)
 
diff --git a/backend/onyx/agents/agent_search/deep_search/main/operations.py b/backend/onyx/agents/agent_search/deep_search/main/operations.py
index af55cd4a8..903e07060 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/operations.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/operations.py
@@ -20,13 +20,13 @@ logger = setup_logger()
 def dispatch_subquestion(
     level: int, writer: StreamWriter
 ) -> Callable[[str, int], None]:
-    def _helper(sub_question_part: str, num: int) -> None:
+    def _helper(sub_question_part: str, sep_num: int) -> None:
         write_custom_event(
             "decomp_qs",
             SubQuestionPiece(
                 sub_question=sub_question_part,
                 level=level,
-                level_question_nr=num,
+                level_question_num=sep_num,
             ),
             writer,
         )
diff --git a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
index 9306069b6..2f8e8b501 100644
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -41,9 +41,9 @@ def expand_queries(
     llm = graph_config.tooling.fast_llm
     sub_question_id = state.sub_question_id
     if sub_question_id is None:
-        level, question_nr = 0, 0
+        level, question_num = 0, 0
     else:
-        level, question_nr = parse_question_id(sub_question_id)
+        level, question_num = parse_question_id(sub_question_id)
 
     msg = [
         HumanMessage(
@@ -52,7 +52,7 @@ def expand_queries(
     ]
 
     llm_response_list = dispatch_separated(
-        llm.stream(prompt=msg), dispatch_subquery(level, question_nr, writer)
+        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
     )
 
     llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
diff --git a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
index c7a5b6b92..07caace7a 100644
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
@@ -30,7 +30,7 @@ def format_results(
     config: RunnableConfig,
     writer: StreamWriter = lambda _: None,
 ) -> ExpandedRetrievalUpdate:
-    level, question_nr = parse_question_id(state.sub_question_id or "0_0")
+    level, question_num = parse_question_id(state.sub_question_id or "0_0")
     query_info = get_query_info(state.query_retrieval_results)
 
     graph_config = cast(GraphConfig, config["metadata"]["config"])
@@ -38,7 +38,7 @@ def format_results(
 
     reranked_documents = state.reranked_documents
 
-    if not (level == 0 and question_nr == 0):
+    if not (level == 0 and question_num == 0):
         if len(reranked_documents) == 0:
             # The sub-question is used as the last query. If no verified documents are found, stream
             # the top 3 for that one. We may want to revisit this.
@@ -63,7 +63,7 @@ def format_results(
                     id=tool_response.id,
                     response=tool_response.response,
                     level=level,
-                    level_question_nr=question_nr,
+                    level_question_num=question_num,
                 ),
                 writer,
             )
diff --git a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/operations.py b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/operations.py
index 79471ee6d..3e759e7fc 100644
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/operations.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/operations.py
@@ -15,7 +15,7 @@ logger = setup_logger()
 
 
 def dispatch_subquery(
-    level: int, question_nr: int, writer: StreamWriter
+    level: int, question_num: int, writer: StreamWriter
 ) -> Callable[[str, int], None]:
     def helper(token: str, num: int) -> None:
         write_custom_event(
@@ -23,7 +23,7 @@ def dispatch_subquery(
             SubQueryPiece(
                 sub_query=token,
                 level=level,
-                level_question_nr=question_nr,
+                level_question_num=question_num,
                 query_id=num,
             ),
             writer,
diff --git a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
index 8a5e0bb2a..1b2e3eed5 100644
--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -112,7 +112,7 @@ def llm_tool_choice(
 
     # If no tool calls are emitted by the LLM, we should not choose a tool
     if len(tool_message.tool_calls) == 0:
-        logger.info("No tool calls emitted by LLM")
+        logger.debug("No tool calls emitted by LLM")
         return ToolChoiceUpdate(
             tool_choice=None,
         )
@@ -142,7 +142,7 @@ def llm_tool_choice(
             f"Tool call attempted with tool {selected_tool}, request {selected_tool_call_request}"
         )
 
-    logger.info(f"Selected tool: {selected_tool.name}")
+    logger.debug(f"Selected tool: {selected_tool.name}")
     logger.debug(f"Selected tool call request: {selected_tool_call_request}")
 
     return ToolChoiceUpdate(
diff --git a/backend/onyx/agents/agent_search/run_graph.py b/backend/onyx/agents/agent_search/run_graph.py
index c44e9a4de..f415a4739 100644
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -29,6 +29,7 @@ from onyx.configs.agent_configs import ALLOW_REFINEMENT
 from onyx.configs.agent_configs import INITIAL_SEARCH_DECOMPOSITION_ENABLED
 from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
+from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger
 
@@ -144,8 +145,6 @@ def run_basic_graph(
 
 
 if __name__ == "__main__":
-    from onyx.llm.factory import get_default_llms
-
     for _ in range(1):
         query_start_time = datetime.now()
         logger.debug(f"Start at {query_start_time}")
@@ -188,29 +187,29 @@ if __name__ == "__main__":
                 elif isinstance(output, ExtendedToolResponse):
                     tool_responses.append(output.response)
                     logger.info(
-                        f"   ---- ET {output.level} - {output.level_question_nr} |  "
+                        f"   ---- ET {output.level} - {output.level_question_num} |  "
                     )
                 elif isinstance(output, SubQueryPiece):
                     logger.info(
-                        f"Sq {output.level} - {output.level_question_nr} - {output.sub_query} | "
+                        f"Sq {output.level} - {output.level_question_num} - {output.sub_query} | "
                     )
                 elif isinstance(output, SubQuestionPiece):
                     logger.info(
-                        f"SQ {output.level} - {output.level_question_nr} - {output.sub_question} | "
+                        f"SQ {output.level} - {output.level_question_num} - {output.sub_question} | "
                     )
                 elif (
                     isinstance(output, AgentAnswerPiece)
                     and output.answer_type == "agent_sub_answer"
                 ):
                     logger.info(
-                        f"   ---- SA {output.level} - {output.level_question_nr} {output.answer_piece} | "
+                        f"   ---- SA {output.level} - {output.level_question_num} {output.answer_piece} | "
                     )
                 elif (
                     isinstance(output, AgentAnswerPiece)
                     and output.answer_type == "agent_level_answer"
                 ):
                     logger.info(
-                        f"   ---------- FA {output.level} - {output.level_question_nr}  {output.answer_piece} | "
+                        f"   ---------- FA {output.level} - {output.level_question_num}  {output.answer_piece} | "
                     )
                 elif isinstance(output, RefinedAnswerImprovement):
                     logger.info(
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
index f2774183a..3398c6b81 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -10,7 +10,6 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import HISTORY_PROMPT
 from onyx.agents.agent_search.shared_graph_utils.utils import (
     get_persona_agent_prompt_expressions,
 )
-from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_citations
 from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
 from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_WORD_LENGTH
@@ -19,6 +18,7 @@ from onyx.llm.interfaces import LLMConfig
 from onyx.llm.utils import get_max_input_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.natural_language_processing.utils import tokenizer_trim_content
+from onyx.prompts.prompt_utils import build_date_time_string
 
 
 def build_sub_question_answer_prompt(
@@ -32,12 +32,12 @@ def build_sub_question_answer_prompt(
         content=persona_specification,
     )
 
-    date_str = get_today_prompt()
+    date_str = build_date_time_string()
 
     docs_format_list = [
-        f"""Document Number: [D{doc_nr + 1}]\n
+        f"""Document Number: [D{doc_num + 1}]\n
                              Content: {doc.combined_content}\n\n"""
-        for doc_nr, doc in enumerate(docs)
+        for doc_num, doc in enumerate(docs)
     ]
 
     docs_str = "\n\n".join(docs_format_list)
@@ -126,7 +126,7 @@ def get_prompt_enrichment_components(
 
     history = build_history_prompt(config, config.inputs.search_request.query)
 
-    date_str = get_today_prompt()
+    date_str = build_date_time_string()
 
     return AgentPromptEnrichmentComponents(
         persona_prompts=persona_prompts,
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
index 2ce6c4c5a..09723f8a7 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
@@ -765,11 +765,11 @@ You are an assistant for question-answering tasks. Here is more information abou
 """
 
 SUB_QUESTION_ANSWER_TEMPLATE = """\n
-Sub-Question: Q{sub_question_nr}\n  Sub-Question:\n  - \n{sub_question}\n  --\nAnswer:\n  -\n {sub_answer}\n\n
+Sub-Question: Q{sub_question_num}\n  Sub-Question:\n  - \n{sub_question}\n  --\nAnswer:\n  -\n {sub_answer}\n\n
 """
 
 SUB_QUESTION_ANSWER_TEMPLATE_REVISED = """\n
-Sub-Question: Q{sub_question_nr}\n
+Sub-Question: Q{sub_question_num}\n
 Type:
 \n----\n
 {sub_question_type}
@@ -787,12 +787,12 @@ Sub-Question:
 
 
 SUB_QUESTION_ANSWER_TEMPLATE_REVISED = """\n
-Sub-Question: Q{sub_question_nr}\n  Type: {sub_question_type}\n Sub-Question:\n
+Sub-Question: Q{sub_question_num}\n  Type: {sub_question_type}\n Sub-Question:\n
 - \n{sub_question}\n  --\nAnswer:\n  -\n {sub_answer}\n\n
     """
 
 SUB_QUESTION_SEARCH_RESULTS_TEMPLATE = """\n
-Sub-Question: Q{sub_question_nr}\n  Sub-Question:\n  - \n{sub_question}\n  --\nRelevant Documents:\n
+Sub-Question: Q{sub_question_num}\n  Sub-Question:\n  - \n{sub_question}\n  --\nRelevant Documents:\n
 -\n {formatted_sub_question_docs}\n\n
 """
 
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
index 023533651..50114ba87 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -33,7 +33,6 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import (
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
     ASSISTANT_SYSTEM_PROMPT_PERSONA,
 )
-from onyx.agents.agent_search.shared_graph_utils.prompts import DATE_PROMPT
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
     HISTORY_CONTEXT_SUMMARY_PROMPT,
 )
@@ -45,11 +44,13 @@ from onyx.chat.models import PromptConfig
 from onyx.chat.models import SectionRelevancePiece
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
 from onyx.configs.constants import DISPATCH_SEP_CHAR
+from onyx.configs.constants import FORMAT_DOCS_SEPARATOR
 from onyx.context.search.enums import LLMEvaluationType
 from onyx.context.search.models import InferenceSection
 from onyx.context.search.models import RetrievalDetails
@@ -81,10 +82,10 @@ def normalize_whitespace(text: str) -> str:
 def format_docs(docs: Sequence[InferenceSection]) -> str:
     formatted_doc_list = []
 
-    for doc_nr, doc in enumerate(docs):
-        formatted_doc_list.append(f"Document D{doc_nr + 1}:\n{doc.combined_content}")
+    for doc_num, doc in enumerate(docs):
+        formatted_doc_list.append(f"Document D{doc_num + 1}:\n{doc.combined_content}")
 
-    return "\n\n".join(formatted_doc_list)
+    return FORMAT_DOCS_SEPARATOR.join(formatted_doc_list)
 
 
 def format_docs_content_flat(docs: Sequence[InferenceSection]) -> str:
@@ -93,7 +94,7 @@ def format_docs_content_flat(docs: Sequence[InferenceSection]) -> str:
     for _, doc in enumerate(docs):
         formatted_doc_list.append(f"\n...{doc.combined_content}\n")
 
-    return "\n\n".join(formatted_doc_list)
+    return FORMAT_DOCS_SEPARATOR.join(formatted_doc_list)
 
 
 def clean_and_parse_list_string(json_string: str) -> list[dict]:
@@ -289,20 +290,27 @@ def get_persona_agent_prompt_expressions(persona: Persona | None) -> PersonaExpr
     )
 
 
-def make_question_id(level: int, question_nr: int) -> str:
-    return f"{level}_{question_nr}"
+def make_question_id(level: int, question_num: int) -> str:
+    return f"{level}_{question_num}"
 
 
 def parse_question_id(question_id: str) -> tuple[int, int]:
-    level, question_nr = question_id.split("_")
-    return int(level), int(question_nr)
+    level, question_num = question_id.split("_")
+    return int(level), int(question_num)
 
 
 def _dispatch_nonempty(
-    content: str, dispatch_event: Callable[[str, int], None], num: int
+    content: str, dispatch_event: Callable[[str, int], None], sep_num: int
 ) -> None:
+    """
+    Dispatch a content string if it is not empty using the given callback.
+    This function is used in the context of dispatching some arbitrary number
+    of similar objects which are separated by a separator during the LLM stream.
+    The callback expects a sep_num denoting which object is being dispatched; these
+    numbers go from 1 to however many strings the LLM decides to stream.
+    """
     if content != "":
-        dispatch_event(content, num)
+        dispatch_event(content, sep_num)
 
 
 def dispatch_separated(
@@ -331,16 +339,12 @@ def dispatch_separated(
 def dispatch_main_answer_stop_info(level: int, writer: StreamWriter) -> None:
     stop_event = StreamStopInfo(
         stop_reason=StreamStopReason.FINISHED,
-        stream_type="main_answer",
+        stream_type=StreamType.MAIN_ANSWER,
         level=level,
     )
     write_custom_event("stream_finished", stop_event, writer)
 
 
-def get_today_prompt() -> str:
-    return DATE_PROMPT.format(date=datetime.now().strftime("%A, %B %d, %Y"))
-
-
 def retrieve_search_docs(
     search_tool: SearchTool, question: str
 ) -> list[InferenceSection]:
@@ -379,13 +383,8 @@ def summarize_history(
     )
 
     history_response = model.invoke(history_context_prompt)
-
-    if isinstance(history_response.content, str):
-        history_context_response_str = history_response.content
-    else:
-        history_context_response_str = ""
-
-    return history_context_response_str
+    assert isinstance(history_response.content, str)
+    return history_response.content
 
 
 # taken from langchain_core.runnables.schema
diff --git a/backend/onyx/chat/answer.py b/backend/onyx/chat/answer.py
index d5ca27512..88be1a9f7 100644
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -19,6 +19,7 @@ from onyx.chat.models import CitationInfo
 from onyx.chat.models import OnyxAnswerPiece
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
+from onyx.chat.models import SubQuestionKey
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.constants import BASIC_KEY
 from onyx.context.search.models import SearchRequest
@@ -32,6 +33,8 @@ from onyx.utils.logger import setup_logger
 
 logger = setup_logger()
 
+BASIC_SQ_KEY = SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])
+
 
 class Answer:
     def __init__(
@@ -164,6 +167,7 @@ class Answer:
                 and packet.answer_piece
                 and packet.answer_type == "agent_level_answer"
             ):
+                assert packet.level is not None
                 answer_by_level[packet.level] += packet.answer_piece
             elif isinstance(packet, OnyxAnswerPiece) and packet.answer_piece:
                 answer_by_level[BASIC_KEY[0]] += packet.answer_piece
@@ -178,19 +182,20 @@ class Answer:
 
         return citations
 
-    # TODO: replace tuple of ints with SubQuestionId EVERYWHERE
-    def citations_by_subquestion(self) -> dict[tuple[int, int], list[CitationInfo]]:
+    def citations_by_subquestion(self) -> dict[SubQuestionKey, list[CitationInfo]]:
         citations_by_subquestion: dict[
-            tuple[int, int], list[CitationInfo]
+            SubQuestionKey, list[CitationInfo]
         ] = defaultdict(list)
         for packet in self.processed_streamed_output:
             if isinstance(packet, CitationInfo):
-                if packet.level_question_nr is not None and packet.level is not None:
+                if packet.level_question_num is not None and packet.level is not None:
                     citations_by_subquestion[
-                        (packet.level, packet.level_question_nr)
+                        SubQuestionKey(
+                            level=packet.level, question_num=packet.level_question_num
+                        )
                     ].append(packet)
                 elif packet.level is None:
-                    citations_by_subquestion[BASIC_KEY].append(packet)
+                    citations_by_subquestion[BASIC_SQ_KEY].append(packet)
         return citations_by_subquestion
 
     def is_cancelled(self) -> bool:
diff --git a/backend/onyx/chat/models.py b/backend/onyx/chat/models.py
index 78a2b15da..4ba3d742a 100644
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -16,6 +16,8 @@ from onyx.context.search.enums import QueryFlow
 from onyx.context.search.enums import RecencyBiasSetting
 from onyx.context.search.enums import SearchType
 from onyx.context.search.models import RetrievalDocs
+from onyx.db.models import SearchDoc as DbSearchDoc
+from onyx.file_store.models import FileDescriptor
 from onyx.llm.override_models import PromptOverride
 from onyx.tools.models import ToolCallFinalResult
 from onyx.tools.models import ToolCallKickoff
@@ -41,16 +43,19 @@ class LlmDoc(BaseModel):
     match_highlights: list[str] | None
 
 
+class SubQuestionIdentifier(BaseModel):
+    level: int | None = None
+    level_question_num: int | None = None
+
+
 # First chunk of info for streaming QA
-class QADocsResponse(RetrievalDocs):
+class QADocsResponse(RetrievalDocs, SubQuestionIdentifier):
     rephrased_query: str | None = None
     predicted_flow: QueryFlow | None
     predicted_search: SearchType | None
     applied_source_filters: list[DocumentSource] | None
     applied_time_cutoff: datetime | None
     recency_bias_multiplier: float
-    level: int | None = None
-    level_question_nr: int | None = None
 
     def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
         initial_dict = super().model_dump(mode="json", *args, **kwargs)  # type: ignore
@@ -67,13 +72,16 @@ class StreamStopReason(Enum):
     FINISHED = "finished"
 
 
-class StreamStopInfo(BaseModel):
+class StreamType(Enum):
+    SUB_QUESTIONS = "sub_questions"
+    SUB_ANSWER = "sub_answer"
+    MAIN_ANSWER = "main_answer"
+
+
+class StreamStopInfo(SubQuestionIdentifier):
     stop_reason: StreamStopReason
 
-    stream_type: Literal["", "sub_questions", "sub_answer", "main_answer"] = ""
-    # used to identify the stream that was stopped for agent search
-    level: int | None = None
-    level_question_nr: int | None = None
+    stream_type: StreamType = StreamType.MAIN_ANSWER
 
     def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
         data = super().model_dump(mode="json", *args, **kwargs)  # type: ignore
@@ -114,11 +122,9 @@ class OnyxAnswerPiece(BaseModel):
 
 # An intermediate representation of citations, later translated into
 # a mapping of the citation [n] number to SearchDoc
-class CitationInfo(BaseModel):
+class CitationInfo(SubQuestionIdentifier):
     citation_num: int
     document_id: str
-    level: int | None = None
-    level_question_nr: int | None = None
 
 
 class AllCitations(BaseModel):
@@ -310,29 +316,22 @@ class PromptConfig(BaseModel):
     model_config = ConfigDict(frozen=True)
 
 
-class SubQueryPiece(BaseModel):
+class SubQueryPiece(SubQuestionIdentifier):
     sub_query: str
-    level: int
-    level_question_nr: int
     query_id: int
 
 
-class AgentAnswerPiece(BaseModel):
+class AgentAnswerPiece(SubQuestionIdentifier):
     answer_piece: str
-    level: int
-    level_question_nr: int
     answer_type: Literal["agent_sub_answer", "agent_level_answer"]
 
 
-class SubQuestionPiece(BaseModel):
+class SubQuestionPiece(SubQuestionIdentifier):
     sub_question: str
-    level: int
-    level_question_nr: int
 
 
-class ExtendedToolResponse(ToolResponse):
-    level: int
-    level_question_nr: int
+class ExtendedToolResponse(ToolResponse, SubQuestionIdentifier):
+    pass
 
 
 class RefinedAnswerImprovement(BaseModel):
@@ -363,3 +362,29 @@ ResponsePart = (
 )
 
 AnswerStream = Iterator[AnswerPacket]
+
+
+class AnswerPostInfo(BaseModel):
+    ai_message_files: list[FileDescriptor]
+    qa_docs_response: QADocsResponse | None = None
+    reference_db_search_docs: list[DbSearchDoc] | None = None
+    dropped_indices: list[int] | None = None
+    tool_result: ToolCallFinalResult | None = None
+    message_specific_citations: MessageSpecificCitations | None = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class SubQuestionKey(BaseModel):
+    level: int
+    question_num: int
+
+    def __hash__(self) -> int:
+        return hash((self.level, self.question_num))
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, SubQuestionKey) and (
+            self.level,
+            self.question_num,
+        ) == (other.level, other.question_num)
diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py
index f4e06cf28..aaab9f9b2 100644
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -2,7 +2,6 @@ import traceback
 from collections import defaultdict
 from collections.abc import Callable
 from collections.abc import Iterator
-from dataclasses import dataclass
 from functools import partial
 from typing import cast
 
@@ -13,6 +12,7 @@ from onyx.chat.chat_utils import create_chat_chain
 from onyx.chat.chat_utils import create_temporary_persona
 from onyx.chat.models import AgentSearchPacket
 from onyx.chat.models import AllCitations
+from onyx.chat.models import AnswerPostInfo
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import ChatOnyxBotResponse
 from onyx.chat.models import CitationConfig
@@ -33,6 +33,7 @@ from onyx.chat.models import RefinedAnswerImprovement
 from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
+from onyx.chat.models import SubQuestionKey
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
 from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
@@ -196,9 +197,9 @@ def _handle_search_tool_response_summary(
         for db_search_doc in reference_db_search_docs
     ]
 
-    level, question_nr = None, None
+    level, question_num = None, None
     if isinstance(packet, ExtendedToolResponse):
-        level, question_nr = packet.level, packet.level_question_nr
+        level, question_num = packet.level, packet.level_question_num
     return (
         QADocsResponse(
             rephrased_query=response_sumary.rephrased_query,
@@ -209,7 +210,7 @@ def _handle_search_tool_response_summary(
             applied_time_cutoff=response_sumary.final_filters.time_cutoff,
             recency_bias_multiplier=response_sumary.recency_bias_multiplier,
             level=level,
-            level_question_nr=question_nr,
+            level_question_num=question_num,
         ),
         reference_db_search_docs,
         dropped_inds,
@@ -310,17 +311,6 @@ ChatPacket = (
 ChatPacketStream = Iterator[ChatPacket]
 
 
-# can't store a DbSearchDoc in a Pydantic BaseModel
-@dataclass
-class AnswerPostInfo:
-    ai_message_files: list[FileDescriptor]
-    qa_docs_response: QADocsResponse | None = None
-    reference_db_search_docs: list[DbSearchDoc] | None = None
-    dropped_indices: list[int] | None = None
-    tool_result: ToolCallFinalResult | None = None
-    message_specific_citations: MessageSpecificCitations | None = None
-
-
 def stream_chat_message_objects(
     new_msg_req: CreateChatMessageRequest,
     user: User | None,
@@ -794,18 +784,22 @@ def stream_chat_message_objects(
         # tool_result = None
 
         # TODO: different channels for stored info when it's coming from the agent flow
-        info_by_subq: dict[tuple[int, int], AnswerPostInfo] = defaultdict(
+        info_by_subq: dict[SubQuestionKey, AnswerPostInfo] = defaultdict(
             lambda: AnswerPostInfo(ai_message_files=[])
         )
         refined_answer_improvement = True
         for packet in answer.processed_streamed_output:
             if isinstance(packet, ToolResponse):
-                level, level_question_nr = (
-                    (packet.level, packet.level_question_nr)
+                level, level_question_num = (
+                    (packet.level, packet.level_question_num)
                     if isinstance(packet, ExtendedToolResponse)
                     else BASIC_KEY
                 )
-                info = info_by_subq[(level, level_question_nr)]
+                assert level is not None
+                assert level_question_num is not None
+                info = info_by_subq[
+                    SubQuestionKey(level=level, question_num=level_question_num)
+                ]
                 # TODO: don't need to dedupe here when we do it in agent flow
                 if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
                     (
@@ -928,13 +922,15 @@ def stream_chat_message_objects(
                 yield packet
             else:
                 if isinstance(packet, ToolCallFinalResult):
-                    level, level_question_nr = (
-                        (packet.level, packet.level_question_nr)
+                    level, level_question_num = (
+                        (packet.level, packet.level_question_num)
                         if packet.level is not None
-                        and packet.level_question_nr is not None
+                        and packet.level_question_num is not None
                         else BASIC_KEY
                     )
-                    info = info_by_subq[(level, level_question_nr)]
+                    info = info_by_subq[
+                        SubQuestionKey(level=level, question_num=level_question_num)
+                    ]
                     info.tool_result = packet
                 yield cast(ChatPacket, packet)
         logger.debug("Reached end of stream")
@@ -971,26 +967,30 @@ def stream_chat_message_objects(
                 tool_name_to_tool_id[tool.name] = tool_id
 
         subq_citations = answer.citations_by_subquestion()
-        for pair in subq_citations:
-            level, level_question_nr = pair
-            info = info_by_subq[(level, level_question_nr)]
+        for subq_key in subq_citations:
+            info = info_by_subq[subq_key]
             logger.debug("Post-LLM answer processing")
             if info.reference_db_search_docs:
                 info.message_specific_citations = _translate_citations(
-                    citations_list=subq_citations[pair],
+                    citations_list=subq_citations[subq_key],
                     db_docs=info.reference_db_search_docs,
                 )
 
             # TODO: AllCitations should contain subq info?
             if not answer.is_cancelled():
-                yield AllCitations(citations=subq_citations[pair])
+                yield AllCitations(citations=subq_citations[subq_key])
 
         # Saving Gen AI answer and responding with message info
 
         info = (
-            info_by_subq[BASIC_KEY]
+            info_by_subq[SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])]
             if BASIC_KEY in info_by_subq
-            else info_by_subq[AGENT_SEARCH_INITIAL_KEY]
+            else info_by_subq[
+                SubQuestionKey(
+                    level=AGENT_SEARCH_INITIAL_KEY[0],
+                    question_num=AGENT_SEARCH_INITIAL_KEY[1],
+                )
+            ]
         )
         gen_ai_response_message = partial_response(
             message=answer.llm_answer,
@@ -1025,7 +1025,11 @@ def stream_chat_message_objects(
         agent_answers = answer.llm_answer_by_level()
         while next_level in agent_answers:
             next_answer = agent_answers[next_level]
-            info = info_by_subq[(next_level, AGENT_SEARCH_INITIAL_KEY[1])]
+            info = info_by_subq[
+                SubQuestionKey(
+                    level=next_level, question_num=AGENT_SEARCH_INITIAL_KEY[1]
+                )
+            ]
             next_answer_message = create_new_chat_message(
                 chat_session_id=chat_session_id,
                 parent_message=prev_message,
diff --git a/backend/onyx/configs/constants.py b/backend/onyx/configs/constants.py
index cfaf0f81d..80960a6f1 100644
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -43,6 +43,8 @@ BASIC_KEY = (-1, -1)
 AGENT_SEARCH_INITIAL_KEY = (0, 0)
 CANCEL_CHECK_INTERVAL = 20
 DISPATCH_SEP_CHAR = "\n"
+FORMAT_DOCS_SEPARATOR = "\n\n"
+NUM_EXPLORATORY_DOCS = 15
 # Postgres connection constants for application_name
 POSTGRES_WEB_APP_NAME = "web"
 POSTGRES_INDEXER_APP_NAME = "indexer"
diff --git a/backend/onyx/db/chat.py b/backend/onyx/db/chat.py
index 23710334f..947a870a9 100644
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -889,7 +889,7 @@ def translate_db_sub_questions_to_server_objects(
         sub_questions.append(
             SubQuestionDetail(
                 level=sub_question.level,
-                level_question_nr=sub_question.level_question_num,
+                level_question_num=sub_question.level_question_num,
                 question=sub_question.sub_question,
                 answer=sub_question.sub_answer,
                 sub_queries=sub_queries,
@@ -1012,7 +1012,7 @@ def log_agent_sub_question_results(
     now = datetime.now()
 
     for sub_question_answer_result in sub_question_answer_results:
-        level, level_question_nr = [
+        level, level_question_num = [
             int(x) for x in sub_question_answer_result.question_id.split("_")
         ]
         sub_question = sub_question_answer_result.question
@@ -1025,7 +1025,7 @@ def log_agent_sub_question_results(
             chat_session_id=chat_session_id,
             primary_question_id=primary_message_id,
             level=level,
-            level_question_num=level_question_nr,
+            level_question_num=level_question_num,
             sub_question=sub_question,
             sub_answer=sub_answer,
             sub_question_doc_results=sub_document_results,
@@ -1033,7 +1033,6 @@ def log_agent_sub_question_results(
 
         db_session.add(sub_question_object)
         db_session.commit()
-        # db_session.flush()
 
         sub_question_id = sub_question_object.id
 
@@ -1047,7 +1046,6 @@ def log_agent_sub_question_results(
 
             db_session.add(sub_query_object)
             db_session.commit()
-            # db_session.flush()
 
             search_docs = chunks_or_sections_to_search_docs(sub_query.search_results)
             for doc in search_docs:
diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py
index 355fe0be3..0333e471e 100644
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -429,8 +429,6 @@ def handle_new_chat_message(
                 ),
                 is_connected=is_connected_func,
             ):
-                # with open('chat_packets.log', 'a') as log_file:
-                #     log_file.write(json.dumps(packet) + '\n')
                 yield json.dumps(packet) if isinstance(packet, dict) else packet
 
         except Exception as e:
diff --git a/backend/onyx/server/query_and_chat/models.py b/backend/onyx/server/query_and_chat/models.py
index 24f374c7e..cdbdf354b 100644
--- a/backend/onyx/server/query_and_chat/models.py
+++ b/backend/onyx/server/query_and_chat/models.py
@@ -215,7 +215,7 @@ class SubQueryDetail(BaseModel):
 
 class SubQuestionDetail(BaseModel):
     level: int
-    level_question_nr: int
+    level_question_num: int
     question: str
     answer: str
     sub_queries: list[SubQueryDetail] | None = None
diff --git a/backend/onyx/tools/models.py b/backend/onyx/tools/models.py
index 23f80aee3..a8918b691 100644
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -43,7 +43,7 @@ class ToolCallFinalResult(ToolCallKickoff):
     )
     # agentic additions; only need to set during agentic tool calls
     level: int | None = None
-    level_question_nr: int | None = None
+    level_question_num: int | None = None
 
 
 class DynamicSchemaInfo(BaseModel):
diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx
index b5e72865e..99682400e 100644
--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@@ -277,10 +277,10 @@ export function ChatPage({
           (assistant) => assistant.id === existingChatSessionAssistantId
         )
       : defaultAssistantId !== undefined
-      ? availableAssistants.find(
-          (assistant) => assistant.id === defaultAssistantId
-        )
-      : undefined
+        ? availableAssistants.find(
+            (assistant) => assistant.id === defaultAssistantId
+          )
+        : undefined
   );
   // Gather default temperature settings
   const search_param_temperature = searchParams.get(
@@ -290,12 +290,12 @@ export function ChatPage({
   const defaultTemperature = search_param_temperature
     ? parseFloat(search_param_temperature)
     : selectedAssistant?.tools.some(
-        (tool) =>
-          tool.in_code_tool_id === SEARCH_TOOL_ID ||
-          tool.in_code_tool_id === INTERNET_SEARCH_TOOL_ID
-      )
-    ? 0
-    : 0.7;
+          (tool) =>
+            tool.in_code_tool_id === SEARCH_TOOL_ID ||
+            tool.in_code_tool_id === INTERNET_SEARCH_TOOL_ID
+        )
+      ? 0
+      : 0.7;
 
   const setSelectedAssistantFromId = (assistantId: number) => {
     // NOTE: also intentionally look through available assistants here, so that
@@ -1234,8 +1234,8 @@ export function ChatPage({
     const currentAssistantId = alternativeAssistantOverride
       ? alternativeAssistantOverride.id
       : alternativeAssistant
-      ? alternativeAssistant.id
-      : liveAssistant.id;
+        ? alternativeAssistant.id
+        : liveAssistant.id;
 
     resetInputBar();
     let messageUpdates: Message[] | null = null;
@@ -1427,7 +1427,7 @@ export function ChatPage({
             // Continuously refine the sub_questions based on the packets that we receive
             if (
               Object.hasOwn(packet, "stop_reason") &&
-              Object.hasOwn(packet, "level_question_nr")
+              Object.hasOwn(packet, "level_question_num")
             ) {
               sub_questions = constructSubQuestions(
                 sub_questions,
@@ -1471,8 +1471,8 @@ export function ChatPage({
               }
             } else if (
               Object.hasOwn(packet, "top_documents") &&
-              Object.hasOwn(packet, "level_question_nr") &&
-              (packet as DocumentsResponse).level_question_nr != undefined
+              Object.hasOwn(packet, "level_question_num") &&
+              (packet as DocumentsResponse).level_question_num != undefined
             ) {
               const documentsResponse = packet as DocumentsResponse;
               sub_questions = constructSubQuestions(
@@ -1481,12 +1481,12 @@ export function ChatPage({
               );
 
               if (
-                documentsResponse.level_question_nr === 0 &&
+                documentsResponse.level_question_num === 0 &&
                 documentsResponse.level == 0
               ) {
                 documents = (packet as DocumentsResponse).top_documents;
               } else if (
-                documentsResponse.level_question_nr === 0 &&
+                documentsResponse.level_question_num === 0 &&
                 documentsResponse.level == 1
               ) {
                 agenticDocs = (packet as DocumentsResponse).top_documents;
diff --git a/web/src/app/chat/interfaces.ts b/web/src/app/chat/interfaces.ts
index 7d8115fa7..17c3a9989 100644
--- a/web/src/app/chat/interfaces.ts
+++ b/web/src/app/chat/interfaces.ts
@@ -158,7 +158,7 @@ export interface DocumentsResponse {
   top_documents: OnyxDocument[];
   rephrased_query: string | null;
   level?: number | null;
-  level_question_nr?: number | null;
+  level_question_num?: number | null;
 }
 
 export interface FileChatDisplay {
@@ -209,7 +209,7 @@ export interface PromptData {
 
 export interface BaseQuestionIdentifier {
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
 }
 
 export interface SubQuestionDetail extends BaseQuestionIdentifier {
@@ -239,34 +239,34 @@ export const constructSubQuestions = (
   if (!newDetail) {
     return subQuestions;
   }
-  if (newDetail.level_question_nr == 0) {
+  if (newDetail.level_question_num == 0) {
     return subQuestions;
   }
 
   const updatedSubQuestions = [...subQuestions];
   // .filter(
-  //   (sq) => sq.level_question_nr !== 0
+  //   (sq) => sq.level_question_num !== 0
   // );
 
   if ("stop_reason" in newDetail) {
     console.log("STOP REASON");
     console.log(newDetail);
-    const { level, level_question_nr } = newDetail;
+    const { level, level_question_num } = newDetail;
     let subQuestion = updatedSubQuestions.find(
-      (sq) => sq.level === level && sq.level_question_nr === level_question_nr
+      (sq) => sq.level === level && sq.level_question_num === level_question_num
     );
     if (subQuestion) {
       subQuestion.is_complete = true;
     }
   } else if ("top_documents" in newDetail) {
-    const { level, level_question_nr, top_documents } = newDetail;
+    const { level, level_question_num, top_documents } = newDetail;
     let subQuestion = updatedSubQuestions.find(
-      (sq) => sq.level === level && sq.level_question_nr === level_question_nr
+      (sq) => sq.level === level && sq.level_question_num === level_question_num
     );
     if (!subQuestion) {
       subQuestion = {
         level: level ?? 0,
-        level_question_nr: level_question_nr ?? 0,
+        level_question_num: level_question_num ?? 0,
         question: "",
         answer: "",
         sub_queries: [],
@@ -277,16 +277,16 @@ export const constructSubQuestions = (
     }
   } else if ("answer_piece" in newDetail) {
     // Handle AgentAnswerPiece
-    const { level, level_question_nr, answer_piece } = newDetail;
+    const { level, level_question_num, answer_piece } = newDetail;
     // Find or create the relevant SubQuestionDetail
     let subQuestion = updatedSubQuestions.find(
-      (sq) => sq.level === level && sq.level_question_nr === level_question_nr
+      (sq) => sq.level === level && sq.level_question_num === level_question_num
     );
 
     if (!subQuestion) {
       subQuestion = {
         level,
-        level_question_nr,
+        level_question_num,
         question: "",
         answer: "",
         sub_queries: [],
@@ -299,17 +299,17 @@ export const constructSubQuestions = (
     subQuestion.answer += answer_piece;
   } else if ("sub_question" in newDetail) {
     // Handle SubQuestionPiece
-    const { level, level_question_nr, sub_question } = newDetail;
+    const { level, level_question_num, sub_question } = newDetail;
 
     // Find or create the relevant SubQuestionDetail
     let subQuestion = updatedSubQuestions.find(
-      (sq) => sq.level === level && sq.level_question_nr === level_question_nr
+      (sq) => sq.level === level && sq.level_question_num === level_question_num
     );
 
     if (!subQuestion) {
       subQuestion = {
         level,
-        level_question_nr,
+        level_question_num,
         question: "",
         answer: "",
         sub_queries: [],
@@ -322,18 +322,18 @@ export const constructSubQuestions = (
     subQuestion.question += sub_question;
   } else if ("sub_query" in newDetail) {
     // Handle SubQueryPiece
-    const { level, level_question_nr, query_id, sub_query } = newDetail;
+    const { level, level_question_num, query_id, sub_query } = newDetail;
 
     // Find the relevant SubQuestionDetail
     let subQuestion = updatedSubQuestions.find(
-      (sq) => sq.level === level && sq.level_question_nr === level_question_nr
+      (sq) => sq.level === level && sq.level_question_num === level_question_num
     );
 
     if (!subQuestion) {
       // If we receive a sub_query before its parent question, create a placeholder
       subQuestion = {
         level,
-        level_question_nr: level_question_nr,
+        level_question_num: level_question_num,
         question: "",
         answer: "",
         sub_queries: [],
diff --git a/web/src/app/chat/message/AgenticMessage.tsx b/web/src/app/chat/message/AgenticMessage.tsx
index 9c1233414..905d21aa6 100644
--- a/web/src/app/chat/message/AgenticMessage.tsx
+++ b/web/src/app/chat/message/AgenticMessage.tsx
@@ -227,7 +227,7 @@ export const AgenticMessage = ({
     (question: SubQuestionDetail) => {
       setCurrentlyOpenQuestion({
         level: question.level,
-        level_question_nr: question.level_question_nr,
+        level_question_num: question.level_question_num,
       });
       setTimeout(() => {
         console.log("closing question");
diff --git a/web/src/app/chat/message/StreamingMessages.ts b/web/src/app/chat/message/StreamingMessages.ts
index c8fd1b303..96f425046 100644
--- a/web/src/app/chat/message/StreamingMessages.ts
+++ b/web/src/app/chat/message/StreamingMessages.ts
@@ -93,7 +93,7 @@ export const useStreamingMessages = (
       if (!dynamicSubQuestionsRef.current[i]) {
         dynamicSubQuestionsRef.current[i] = {
           level: sq.level,
-          level_question_nr: sq.level_question_nr,
+          level_question_num: sq.level_question_num,
           question: "",
           answer: "",
           sub_queries: [],
@@ -270,11 +270,11 @@ export const useStreamingMessages = (
                   // Check if this is the last subquestion at level 0
                   if (
                     sq.level === 0 &&
-                    sq.level_question_nr ===
+                    sq.level_question_num ===
                       Math.max(
                         ...subQuestions
                           .filter((q) => q.level === 0)
-                          .map((q) => q.level_question_nr)
+                          .map((q) => q.level_question_num)
                       )
                   ) {
                     console.log("ALLOW STREAMING");
diff --git a/web/src/app/chat/message/SubQuestionProgress.tsx b/web/src/app/chat/message/SubQuestionProgress.tsx
index 84f583bed..31925990b 100644
--- a/web/src/app/chat/message/SubQuestionProgress.tsx
+++ b/web/src/app/chat/message/SubQuestionProgress.tsx
@@ -30,7 +30,7 @@ const SubQuestionProgress: React.FC<SubQuestionProgressProps> = ({
           {subQuestions.map((sq, index) => (
             <TableRow key={index}>
               <TableCell>
-                Level {sq.level}, Q{sq.level_question_nr}
+                Level {sq.level}, Q{sq.level_question_num}
               </TableCell>
               <TableCell>
                 <Popover>
diff --git a/web/src/app/chat/message/SubQuestionsDisplay.tsx b/web/src/app/chat/message/SubQuestionsDisplay.tsx
index 7768592a9..4db5ececb 100644
--- a/web/src/app/chat/message/SubQuestionsDisplay.tsx
+++ b/web/src/app/chat/message/SubQuestionsDisplay.tsx
@@ -634,16 +634,16 @@ const SubQuestionsDisplay: React.FC<SubQuestionsDisplayProps> = ({
           <SubQuestionDisplay
             currentlyOpen={
               currentlyOpenQuestion?.level === subQuestion.level &&
-              currentlyOpenQuestion?.level_question_nr ===
-                subQuestion.level_question_nr
+              currentlyOpenQuestion?.level_question_num ===
+                subQuestion.level_question_num
             }
             currentlyClosed={
               currentlyOpenQuestion != null &&
               currentlyOpenQuestion != undefined &&
               !(
                 currentlyOpenQuestion.level === subQuestion.level &&
-                currentlyOpenQuestion.level_question_nr ===
-                  subQuestion.level_question_nr
+                currentlyOpenQuestion.level_question_num ===
+                  subQuestion.level_question_num
               )
             }
             key={index}
@@ -681,16 +681,16 @@ const SubQuestionsDisplay: React.FC<SubQuestionsDisplayProps> = ({
               }
               currentlyOpen={
                 currentlyOpenQuestion?.level === subQuestion.level &&
-                currentlyOpenQuestion?.level_question_nr ===
-                  subQuestion.level_question_nr
+                currentlyOpenQuestion?.level_question_num ===
+                  subQuestion.level_question_num
               }
               currentlyClosed={
                 currentlyOpenQuestion != null &&
                 currentlyOpenQuestion != undefined &&
                 !(
                   currentlyOpenQuestion.level === subQuestion.level &&
-                  currentlyOpenQuestion.level_question_nr ===
-                    subQuestion.level_question_nr
+                  currentlyOpenQuestion.level_question_num ===
+                    subQuestion.level_question_num
                 )
               }
               key={index}
diff --git a/web/src/lib/search/interfaces.ts b/web/src/lib/search/interfaces.ts
index b2f50c532..e6cf93a96 100644
--- a/web/src/lib/search/interfaces.ts
+++ b/web/src/lib/search/interfaces.ts
@@ -21,7 +21,7 @@ export interface ProSearchPacket {
   sub_query?: string;
   tool_response?: ToolResponse;
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
 }
 
 export interface RefinedAnswerImprovement {
@@ -31,26 +31,26 @@ export interface RefinedAnswerImprovement {
 export interface AgentAnswerPiece {
   answer_piece: string;
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
   answer_type: "agent_sub_answer" | "agent_level_answer";
 }
 
 export interface SubQuestionPiece {
   sub_question: string;
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
 }
 
 export interface SubQueryPiece {
   sub_query: string;
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
   query_id: number;
 }
 
 export interface SubQuestionSearchDoc {
   context_docs: OnyxDocument[];
-  level_question_nr: number;
+  level_question_num: number;
   level: number;
 }
 
@@ -60,7 +60,7 @@ export interface ToolResponse {
 }
 export interface ExtendedToolResponse extends ToolResponse {
   level: number;
-  level_question_nr: number;
+  level_question_num: number;
 }
 
 export interface AnswerPiecePacket {
@@ -75,7 +75,7 @@ export enum StreamStopReason {
 export interface StreamStopInfo {
   stop_reason: StreamStopReason;
   level?: number;
-  level_question_nr?: number;
+  level_question_num?: number;
 }
 
 export interface ErrorMessagePacket {