diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py index 345bd05d78d5..4803e88d9491 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py @@ -60,6 +60,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import ExtendedToolResponse from onyx.chat.models import StreamingError +from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS @@ -230,7 +231,11 @@ def generate_initial_answer( sub_questions = all_sub_questions # Replace the original assignment - model = graph_config.tooling.fast_llm + model = ( + graph_config.tooling.fast_llm + if AGENT_ANSWER_GENERATION_BY_FAST_LLM + else graph_config.tooling.primary_llm + ) doc_context = format_docs(answer_generation_documents.context_documents) doc_context = trim_prompt_piece( diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py index bd954c26f597..263c7dcacdd0 100644 --- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py @@ -66,6 +66,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import ExtendedToolResponse from onyx.chat.models import StreamingError +from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS @@ -253,7 +254,12 @@ def generate_validate_refined_answer( else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS ) - model = graph_config.tooling.fast_llm + model = ( + graph_config.tooling.fast_llm + if AGENT_ANSWER_GENERATION_BY_FAST_LLM + else graph_config.tooling.primary_llm + ) + relevant_docs_str = format_docs(answer_generation_documents.context_documents) relevant_docs_str = trim_prompt_piece( model.config, @@ -383,8 +389,9 @@ def generate_validate_refined_answer( ) ] + validation_model = graph_config.tooling.fast_llm try: - validation_response = model.invoke( + validation_response = validation_model.invoke( msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION ) refined_answer_quality = binary_string_test_after_answer_separator( diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 523f87878869..0e36676a6554 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -47,6 +47,9 @@ AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds +AGENT_ANSWER_GENERATION_BY_FAST_LLM = ( + os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true" +) AGENT_RETRIEVAL_STATS = ( not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"