By default, use primary LLM for initial & refined answer (#4012)

* By default, use primary LLM for initial & refined answer

Use of new env variable

* simplification
This commit is contained in:
joachim-danswer
2025-02-16 15:20:07 -08:00
committed by GitHub
parent ec0e55fd39
commit 20d3efc86e
3 changed files with 18 additions and 3 deletions

View File

@@ -60,6 +60,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
@@ -230,7 +231,11 @@ def generate_initial_answer(
sub_questions = all_sub_questions # Replace the original assignment sub_questions = all_sub_questions # Replace the original assignment
model = graph_config.tooling.fast_llm model = (
graph_config.tooling.fast_llm
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
else graph_config.tooling.primary_llm
)
doc_context = format_docs(answer_generation_documents.context_documents) doc_context = format_docs(answer_generation_documents.context_documents)
doc_context = trim_prompt_piece( doc_context = trim_prompt_piece(

View File

@@ -66,6 +66,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
@@ -253,7 +254,12 @@ def generate_validate_refined_answer(
else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
) )
model = graph_config.tooling.fast_llm model = (
graph_config.tooling.fast_llm
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
else graph_config.tooling.primary_llm
)
relevant_docs_str = format_docs(answer_generation_documents.context_documents) relevant_docs_str = format_docs(answer_generation_documents.context_documents)
relevant_docs_str = trim_prompt_piece( relevant_docs_str = trim_prompt_piece(
model.config, model.config,
@@ -383,8 +389,9 @@ def generate_validate_refined_answer(
) )
] ]
validation_model = graph_config.tooling.fast_llm
try: try:
validation_response = model.invoke( validation_response = validation_model.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
) )
refined_answer_quality = binary_string_test_after_answer_separator( refined_answer_quality = binary_string_test_after_answer_separator(

View File

@@ -47,6 +47,9 @@ AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
)
AGENT_RETRIEVAL_STATS = ( AGENT_RETRIEVAL_STATS = (
not os.environ.get("AGENT_RETRIEVAL_STATS") == "False" not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"