mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-26 20:08:38 +02:00
By default, use primary LLM for initial & refined answer (#4012)
* By default, use primary LLM for initial & refined answer Use of new env variable * simplification
This commit is contained in:
@@ -60,6 +60,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
|||||||
from onyx.chat.models import AgentAnswerPiece
|
from onyx.chat.models import AgentAnswerPiece
|
||||||
from onyx.chat.models import ExtendedToolResponse
|
from onyx.chat.models import ExtendedToolResponse
|
||||||
from onyx.chat.models import StreamingError
|
from onyx.chat.models import StreamingError
|
||||||
|
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
|
||||||
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||||
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
|
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
|
||||||
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
||||||
@@ -230,7 +231,11 @@ def generate_initial_answer(
|
|||||||
|
|
||||||
sub_questions = all_sub_questions # Replace the original assignment
|
sub_questions = all_sub_questions # Replace the original assignment
|
||||||
|
|
||||||
model = graph_config.tooling.fast_llm
|
model = (
|
||||||
|
graph_config.tooling.fast_llm
|
||||||
|
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
|
||||||
|
else graph_config.tooling.primary_llm
|
||||||
|
)
|
||||||
|
|
||||||
doc_context = format_docs(answer_generation_documents.context_documents)
|
doc_context = format_docs(answer_generation_documents.context_documents)
|
||||||
doc_context = trim_prompt_piece(
|
doc_context = trim_prompt_piece(
|
||||||
|
@@ -66,6 +66,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
|||||||
from onyx.chat.models import AgentAnswerPiece
|
from onyx.chat.models import AgentAnswerPiece
|
||||||
from onyx.chat.models import ExtendedToolResponse
|
from onyx.chat.models import ExtendedToolResponse
|
||||||
from onyx.chat.models import StreamingError
|
from onyx.chat.models import StreamingError
|
||||||
|
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
|
||||||
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||||
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
|
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
|
||||||
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
|
||||||
@@ -253,7 +254,12 @@ def generate_validate_refined_answer(
|
|||||||
else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
|
else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
|
||||||
)
|
)
|
||||||
|
|
||||||
model = graph_config.tooling.fast_llm
|
model = (
|
||||||
|
graph_config.tooling.fast_llm
|
||||||
|
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
|
||||||
|
else graph_config.tooling.primary_llm
|
||||||
|
)
|
||||||
|
|
||||||
relevant_docs_str = format_docs(answer_generation_documents.context_documents)
|
relevant_docs_str = format_docs(answer_generation_documents.context_documents)
|
||||||
relevant_docs_str = trim_prompt_piece(
|
relevant_docs_str = trim_prompt_piece(
|
||||||
model.config,
|
model.config,
|
||||||
@@ -383,8 +389,9 @@ def generate_validate_refined_answer(
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
validation_model = graph_config.tooling.fast_llm
|
||||||
try:
|
try:
|
||||||
validation_response = model.invoke(
|
validation_response = validation_model.invoke(
|
||||||
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
|
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
|
||||||
)
|
)
|
||||||
refined_answer_quality = binary_string_test_after_answer_separator(
|
refined_answer_quality = binary_string_test_after_answer_separator(
|
||||||
|
@@ -47,6 +47,9 @@ AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds
|
|||||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
|
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
|
||||||
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
|
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
|
||||||
|
|
||||||
|
AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
|
||||||
|
os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
AGENT_RETRIEVAL_STATS = (
|
AGENT_RETRIEVAL_STATS = (
|
||||||
not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
|
not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
|
||||||
|
Reference in New Issue
Block a user