Remove Dead Code (#3234)

2025-09-27 20:38:32 +02:00 · 2024-11-23 14:31:59 -08:00
parent 694e79f5e1
commit a3439605a5
7 changed files with 5 additions and 96 deletions
--- a/backend/danswer/configs/chat_configs.py
+++ b/backend/danswer/configs/chat_configs.py
@@ -17,9 +17,6 @@ MAX_CHUNKS_FED_TO_CHAT = float(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 10.0)
 # ~3k input, half for docs, half for chat history + prompts
 CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072

-# For selecting a different LLM question-answering prompt format
-# Valid values: default, cot, weak
-QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
 # 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
 # Capped in Vespa at 0.5
 DOC_TIME_DECAY = float(
@@ -27,8 +24,6 @@ DOC_TIME_DECAY = float(
 )
 BASE_RECENCY_DECAY = 0.5
 FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
-# Currently this next one is not configurable via env
-DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
 # For the highest matching base size chunk, how many chunks above and below do we pull in by default
 # Note this is not in any of the deployment configs yet
 # Currently only applies to search flow not chat
--- a/backend/danswer/llm/answering/prompts/quotes_prompt.py
+++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py
@@ -2,7 +2,6 @@ from langchain.schema.messages import HumanMessage

 from danswer.chat.models import LlmDoc
 from danswer.configs.chat_configs import LANGUAGE_HINT
-from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
 from danswer.context.search.models import InferenceChunk
 from danswer.db.search_settings import get_multilingual_expansion
 from danswer.llm.answering.models import PromptConfig
@@ -10,39 +9,10 @@ from danswer.llm.utils import message_to_prompt_and_imgs
 from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
 from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
 from danswer.prompts.direct_qa_prompts import JSON_PROMPT
-from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
 from danswer.prompts.prompt_utils import add_date_time_to_prompt
 from danswer.prompts.prompt_utils import build_complete_context_str


-def _build_weak_llm_quotes_prompt(
-    question: str,
-    context_docs: list[LlmDoc] | list[InferenceChunk],
-    history_str: str,
-    prompt: PromptConfig,
-) -> HumanMessage:
-    """Since Danswer supports a variety of LLMs, this less demanding prompt is provided
-    as an option to use with weaker LLMs such as small version, low float precision, quantized,
-    or distilled models. It only uses one context document and has very weak requirements of
-    output format.
-    """
-    context_block = ""
-    if context_docs:
-        context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content)
-
-    prompt_str = WEAK_LLM_PROMPT.format(
-        system_prompt=prompt.system_prompt,
-        context_block=context_block,
-        task_prompt=prompt.task_prompt,
-        user_query=question,
-    )
-
-    if prompt.datetime_aware:
-        prompt_str = add_date_time_to_prompt(prompt_str=prompt_str)
-
-    return HumanMessage(content=prompt_str)
-
-
 def _build_strong_llm_quotes_prompt(
    question: str,
    context_docs: list[LlmDoc] | list[InferenceChunk],
@@ -81,15 +51,9 @@ def build_quotes_user_message(
    history_str: str,
    prompt: PromptConfig,
 ) -> HumanMessage:
-    prompt_builder = (
-        _build_weak_llm_quotes_prompt
-        if QA_PROMPT_OVERRIDE == "weak"
-        else _build_strong_llm_quotes_prompt
-    )
-
    query, _ = message_to_prompt_and_imgs(message)

-    return prompt_builder(
+    return _build_strong_llm_quotes_prompt(
        question=query,
        context_docs=context_docs,
        history_str=history_str,
--- a/backend/danswer/one_shot_answer/models.py
+++ b/backend/danswer/one_shot_answer/models.py
@@ -36,10 +36,6 @@ class PromptConfig(BaseModel):
    datetime_aware: bool = True


-class DocumentSetConfig(BaseModel):
-    id: int
-
-
 class ToolConfig(BaseModel):
    id: int

--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@@ -118,18 +118,6 @@ You should always get right to the point, and never use extraneous language.
 """


-# For weak LLM which only takes one chunk and cannot output json
-# Also not requiring quotes as it tends to not work
-WEAK_LLM_PROMPT = f"""
-{{system_prompt}}
-{{context_block}}
-{{task_prompt}}
-
-{QUESTION_PAT.upper()}
-{{user_query}}
-""".strip()
-
-
 # This is only for visualization for the users to specify their own prompts
 # The actual flow does not work like this
 PARAMATERIZED_PROMPT = f"""
--- a/backend/danswer/secondary_llm_flows/query_validation.py
+++ b/backend/danswer/secondary_llm_flows/query_validation.py
@@ -1,9 +1,9 @@
+# NOTE No longer used. This needs to be revisited later.
 import re
 from collections.abc import Iterator

 from danswer.chat.models import DanswerAnswerPiece
 from danswer.chat.models import StreamingError
-from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY
 from danswer.llm.exceptions import GenAIDisabledException
 from danswer.llm.factory import get_default_llms
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
@@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool:


 def get_query_answerability(
-    user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
+    user_query: str, skip_check: bool = False
 ) -> tuple[str, bool]:
    if skip_check:
        return "Query Answerability Evaluation feature is turned off", True
@@ -67,7 +67,7 @@ def get_query_answerability(


 def stream_query_answerability(
-    user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
+    user_query: str, skip_check: bool = False
 ) -> Iterator[str]:
    if skip_check:
        yield get_json_line(
--- a/backend/danswer/server/query_and_chat/models.py
+++ b/backend/danswer/server/query_and_chat/models.py
@@ -29,10 +29,6 @@ class TagResponse(BaseModel):
    tags: list[SourceTag]


-class SimpleQueryRequest(BaseModel):
-    query: str
-
-
 class UpdateChatSessionThreadRequest(BaseModel):
    # If not specified, use Danswer default persona
    chat_session_id: UUID
@@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel):
    current_alternate_model: str | None


+# This one is not used anymore
 class QueryValidationResponse(BaseModel):
    reasoning: str
    answerable: bool
--- a/backend/danswer/server/query_and_chat/query_backend.py
+++ b/backend/danswer/server/query_and_chat/query_backend.py
@@ -34,15 +34,11 @@ from danswer.document_index.factory import get_default_document_index
 from danswer.document_index.vespa.index import VespaIndex
 from danswer.one_shot_answer.answer_question import stream_search_answer
 from danswer.one_shot_answer.models import DirectQARequest
-from danswer.secondary_llm_flows.query_validation import get_query_answerability
-from danswer.secondary_llm_flows.query_validation import stream_query_answerability
 from danswer.server.query_and_chat.models import AdminSearchRequest
 from danswer.server.query_and_chat.models import AdminSearchResponse
 from danswer.server.query_and_chat.models import ChatSessionDetails
 from danswer.server.query_and_chat.models import ChatSessionsResponse
-from danswer.server.query_and_chat.models import QueryValidationResponse
 from danswer.server.query_and_chat.models import SearchSessionDetailResponse
-from danswer.server.query_and_chat.models import SimpleQueryRequest
 from danswer.server.query_and_chat.models import SourceTag
 from danswer.server.query_and_chat.models import TagResponse
 from danswer.server.query_and_chat.token_limit import check_token_rate_limits
@@ -135,18 +131,6 @@ def get_tags(
    return TagResponse(tags=server_tags)


-@basic_router.post("/query-validation")
-def query_validation(
-    simple_query: SimpleQueryRequest, _: User = Depends(current_user)
-) -> QueryValidationResponse:
-    # Note if weak model prompt is chosen, this check does not occur and will simply return that
-    # the query is valid, this is because weaker models cannot really handle this task well.
-    # Additionally, some weak model servers cannot handle concurrent inferences.
-    logger.notice(f"Validating query: {simple_query.query}")
-    reasoning, answerable = get_query_answerability(simple_query.query)
-    return QueryValidationResponse(reasoning=reasoning, answerable=answerable)
-
-
@basic_router.get("/user-searches")
 def get_user_search_sessions(
    user: User | None = Depends(current_user),
@@ -247,21 +231,6 @@ def get_search_session(
    return response


-# NOTE No longer used, after search/chat redesign.
-# No search responses are answered with a conversational generative AI response
-@basic_router.post("/stream-query-validation")
-def stream_query_validation(
-    simple_query: SimpleQueryRequest, _: User = Depends(current_user)
-) -> StreamingResponse:
-    # Note if weak model prompt is chosen, this check does not occur and will simply return that
-    # the query is valid, this is because weaker models cannot really handle this task well.
-    # Additionally, some weak model servers cannot handle concurrent inferences.
-    logger.notice(f"Validating query: {simple_query.query}")
-    return StreamingResponse(
-        stream_query_answerability(simple_query.query), media_type="application/json"
-    )
-
-
@basic_router.post("/stream-answer-with-quote")
 def get_answer_with_quote(
    query_request: DirectQARequest,