From 5aa2de7a4089ce5b46fbc7180054173ee75903d7 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Mon, 4 Dec 2023 15:40:10 -0800 Subject: [PATCH] Fix Weak Models Concurrency Issue (#811) --- backend/danswer/configs/app_configs.py | 2 ++ .../secondary_llm_flows/query_validation.py | 14 +++++++++++++- backend/danswer/server/chat/search_backend.py | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 0d952eafc..79e2e4962 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -199,6 +199,8 @@ DOC_TIME_DECAY = float( os.environ.get("DOC_TIME_DECAY") or 0.5 # Hits limit at 2 years by default ) FAVOR_RECENT_DECAY_MULTIPLIER = 2 +# Currently this next one is not configurable via env +DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak" DISABLE_LLM_FILTER_EXTRACTION = ( os.environ.get("DISABLE_LLM_FILTER_EXTRACTION", "").lower() == "true" ) diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py index a34512724..5ad52cf0d 100644 --- a/backend/danswer/secondary_llm_flows/query_validation.py +++ b/backend/danswer/secondary_llm_flows/query_validation.py @@ -1,6 +1,7 @@ import re from collections.abc import Iterator +from danswer.configs.app_configs import DISABLE_LLM_QUERY_ANSWERABILITY from danswer.direct_qa.interfaces import DanswerAnswerPiece from danswer.direct_qa.interfaces import StreamingError from danswer.llm.factory import get_default_llm @@ -52,7 +53,18 @@ def get_query_answerability(user_query: str) -> tuple[str, bool]: return reasoning, answerable -def stream_query_answerability(user_query: str) -> Iterator[str]: +def stream_query_answerability( + user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY +) -> Iterator[str]: + if skip_check: + yield get_json_line( + QueryValidationResponse( + reasoning="Query Answerability Eval feature is turned off", + answerable=True, + ).dict() + ) + return + messages = get_query_validation_messages(user_query) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) try: diff --git a/backend/danswer/server/chat/search_backend.py b/backend/danswer/server/chat/search_backend.py index b98cd89e8..ec3f54774 100644 --- a/backend/danswer/server/chat/search_backend.py +++ b/backend/danswer/server/chat/search_backend.py @@ -104,6 +104,7 @@ def query_validation( def stream_query_validation( new_message_request: NewMessageRequest, _: User = Depends(current_user) ) -> StreamingResponse: + # Note if weak model prompt is chosen, this check does not occur query = new_message_request.query return StreamingResponse( stream_query_answerability(query), media_type="application/json"