Fix Weak Models Concurrency Issue (#811)

2025-10-10 13:15:18 +02:00 · 2023-12-04 15:40:10 -08:00
parent e0b87d9d4e
commit 5aa2de7a40
3 changed files with 16 additions and 1 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -199,6 +199,8 @@ DOC_TIME_DECAY = float(
    os.environ.get("DOC_TIME_DECAY") or 0.5  # Hits limit at 2 years by default
 )
 FAVOR_RECENT_DECAY_MULTIPLIER = 2
 # Currently this next one is not configurable via env
 DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
 DISABLE_LLM_FILTER_EXTRACTION = (
    os.environ.get("DISABLE_LLM_FILTER_EXTRACTION", "").lower() == "true"
 )
--- a/backend/danswer/secondary_llm_flows/query_validation.py
+++ b/backend/danswer/secondary_llm_flows/query_validation.py
@@ -1,6 +1,7 @@
 import re
 from collections.abc import Iterator
 from danswer.configs.app_configs import DISABLE_LLM_QUERY_ANSWERABILITY
 from danswer.direct_qa.interfaces import DanswerAnswerPiece
 from danswer.direct_qa.interfaces import StreamingError
 from danswer.llm.factory import get_default_llm
@@ -52,7 +53,18 @@ def get_query_answerability(user_query: str) -> tuple[str, bool]:
    return reasoning, answerable
-def stream_query_answerability(user_query: str) -> Iterator[str]:
+def stream_query_answerability(
    user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
 ) -> Iterator[str]:
    if skip_check:
        yield get_json_line(
            QueryValidationResponse(
                reasoning="Query Answerability Eval feature is turned off",
                answerable=True,
            ).dict()
        )
        return
    messages = get_query_validation_messages(user_query)
    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
    try:
--- a/backend/danswer/server/chat/search_backend.py
+++ b/backend/danswer/server/chat/search_backend.py
@@ -104,6 +104,7 @@ def query_validation(
 def stream_query_validation(
    new_message_request: NewMessageRequest, _: User = Depends(current_user)
 ) -> StreamingResponse:
    # Note if weak model prompt is chosen, this check does not occur
    query = new_message_request.query
    return StreamingResponse(
        stream_query_answerability(query), media_type="application/json"