mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-27 20:38:32 +02:00
Remove Dead Code (#3234)
This commit is contained in:
@@ -17,9 +17,6 @@ MAX_CHUNKS_FED_TO_CHAT = float(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 10.0)
|
||||
# ~3k input, half for docs, half for chat history + prompts
|
||||
CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072
|
||||
|
||||
# For selecting a different LLM question-answering prompt format
|
||||
# Valid values: default, cot, weak
|
||||
QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
|
||||
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
|
||||
# Capped in Vespa at 0.5
|
||||
DOC_TIME_DECAY = float(
|
||||
@@ -27,8 +24,6 @@ DOC_TIME_DECAY = float(
|
||||
)
|
||||
BASE_RECENCY_DECAY = 0.5
|
||||
FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
|
||||
# Currently this next one is not configurable via env
|
||||
DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
|
||||
# For the highest matching base size chunk, how many chunks above and below do we pull in by default
|
||||
# Note this is not in any of the deployment configs yet
|
||||
# Currently only applies to search flow not chat
|
||||
|
@@ -2,7 +2,6 @@ from langchain.schema.messages import HumanMessage
|
||||
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
|
||||
from danswer.context.search.models import InferenceChunk
|
||||
from danswer.db.search_settings import get_multilingual_expansion
|
||||
from danswer.llm.answering.models import PromptConfig
|
||||
@@ -10,39 +9,10 @@ from danswer.llm.utils import message_to_prompt_and_imgs
|
||||
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
|
||||
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
|
||||
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
|
||||
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
||||
from danswer.prompts.prompt_utils import add_date_time_to_prompt
|
||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||
|
||||
|
||||
def _build_weak_llm_quotes_prompt(
|
||||
question: str,
|
||||
context_docs: list[LlmDoc] | list[InferenceChunk],
|
||||
history_str: str,
|
||||
prompt: PromptConfig,
|
||||
) -> HumanMessage:
|
||||
"""Since Danswer supports a variety of LLMs, this less demanding prompt is provided
|
||||
as an option to use with weaker LLMs such as small version, low float precision, quantized,
|
||||
or distilled models. It only uses one context document and has very weak requirements of
|
||||
output format.
|
||||
"""
|
||||
context_block = ""
|
||||
if context_docs:
|
||||
context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content)
|
||||
|
||||
prompt_str = WEAK_LLM_PROMPT.format(
|
||||
system_prompt=prompt.system_prompt,
|
||||
context_block=context_block,
|
||||
task_prompt=prompt.task_prompt,
|
||||
user_query=question,
|
||||
)
|
||||
|
||||
if prompt.datetime_aware:
|
||||
prompt_str = add_date_time_to_prompt(prompt_str=prompt_str)
|
||||
|
||||
return HumanMessage(content=prompt_str)
|
||||
|
||||
|
||||
def _build_strong_llm_quotes_prompt(
|
||||
question: str,
|
||||
context_docs: list[LlmDoc] | list[InferenceChunk],
|
||||
@@ -81,15 +51,9 @@ def build_quotes_user_message(
|
||||
history_str: str,
|
||||
prompt: PromptConfig,
|
||||
) -> HumanMessage:
|
||||
prompt_builder = (
|
||||
_build_weak_llm_quotes_prompt
|
||||
if QA_PROMPT_OVERRIDE == "weak"
|
||||
else _build_strong_llm_quotes_prompt
|
||||
)
|
||||
|
||||
query, _ = message_to_prompt_and_imgs(message)
|
||||
|
||||
return prompt_builder(
|
||||
return _build_strong_llm_quotes_prompt(
|
||||
question=query,
|
||||
context_docs=context_docs,
|
||||
history_str=history_str,
|
||||
|
@@ -36,10 +36,6 @@ class PromptConfig(BaseModel):
|
||||
datetime_aware: bool = True
|
||||
|
||||
|
||||
class DocumentSetConfig(BaseModel):
|
||||
id: int
|
||||
|
||||
|
||||
class ToolConfig(BaseModel):
|
||||
id: int
|
||||
|
||||
|
@@ -118,18 +118,6 @@ You should always get right to the point, and never use extraneous language.
|
||||
"""
|
||||
|
||||
|
||||
# For weak LLM which only takes one chunk and cannot output json
|
||||
# Also not requiring quotes as it tends to not work
|
||||
WEAK_LLM_PROMPT = f"""
|
||||
{{system_prompt}}
|
||||
{{context_block}}
|
||||
{{task_prompt}}
|
||||
|
||||
{QUESTION_PAT.upper()}
|
||||
{{user_query}}
|
||||
""".strip()
|
||||
|
||||
|
||||
# This is only for visualization for the users to specify their own prompts
|
||||
# The actual flow does not work like this
|
||||
PARAMATERIZED_PROMPT = f"""
|
||||
|
@@ -1,9 +1,9 @@
|
||||
# NOTE No longer used. This needs to be revisited later.
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import StreamingError
|
||||
from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY
|
||||
from danswer.llm.exceptions import GenAIDisabledException
|
||||
from danswer.llm.factory import get_default_llms
|
||||
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
|
||||
@@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool:
|
||||
|
||||
|
||||
def get_query_answerability(
|
||||
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
|
||||
user_query: str, skip_check: bool = False
|
||||
) -> tuple[str, bool]:
|
||||
if skip_check:
|
||||
return "Query Answerability Evaluation feature is turned off", True
|
||||
@@ -67,7 +67,7 @@ def get_query_answerability(
|
||||
|
||||
|
||||
def stream_query_answerability(
|
||||
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
|
||||
user_query: str, skip_check: bool = False
|
||||
) -> Iterator[str]:
|
||||
if skip_check:
|
||||
yield get_json_line(
|
||||
|
@@ -29,10 +29,6 @@ class TagResponse(BaseModel):
|
||||
tags: list[SourceTag]
|
||||
|
||||
|
||||
class SimpleQueryRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
|
||||
class UpdateChatSessionThreadRequest(BaseModel):
|
||||
# If not specified, use Danswer default persona
|
||||
chat_session_id: UUID
|
||||
@@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel):
|
||||
current_alternate_model: str | None
|
||||
|
||||
|
||||
# This one is not used anymore
|
||||
class QueryValidationResponse(BaseModel):
|
||||
reasoning: str
|
||||
answerable: bool
|
||||
|
@@ -34,15 +34,11 @@ from danswer.document_index.factory import get_default_document_index
|
||||
from danswer.document_index.vespa.index import VespaIndex
|
||||
from danswer.one_shot_answer.answer_question import stream_search_answer
|
||||
from danswer.one_shot_answer.models import DirectQARequest
|
||||
from danswer.secondary_llm_flows.query_validation import get_query_answerability
|
||||
from danswer.secondary_llm_flows.query_validation import stream_query_answerability
|
||||
from danswer.server.query_and_chat.models import AdminSearchRequest
|
||||
from danswer.server.query_and_chat.models import AdminSearchResponse
|
||||
from danswer.server.query_and_chat.models import ChatSessionDetails
|
||||
from danswer.server.query_and_chat.models import ChatSessionsResponse
|
||||
from danswer.server.query_and_chat.models import QueryValidationResponse
|
||||
from danswer.server.query_and_chat.models import SearchSessionDetailResponse
|
||||
from danswer.server.query_and_chat.models import SimpleQueryRequest
|
||||
from danswer.server.query_and_chat.models import SourceTag
|
||||
from danswer.server.query_and_chat.models import TagResponse
|
||||
from danswer.server.query_and_chat.token_limit import check_token_rate_limits
|
||||
@@ -135,18 +131,6 @@ def get_tags(
|
||||
return TagResponse(tags=server_tags)
|
||||
|
||||
|
||||
@basic_router.post("/query-validation")
|
||||
def query_validation(
|
||||
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
|
||||
) -> QueryValidationResponse:
|
||||
# Note if weak model prompt is chosen, this check does not occur and will simply return that
|
||||
# the query is valid, this is because weaker models cannot really handle this task well.
|
||||
# Additionally, some weak model servers cannot handle concurrent inferences.
|
||||
logger.notice(f"Validating query: {simple_query.query}")
|
||||
reasoning, answerable = get_query_answerability(simple_query.query)
|
||||
return QueryValidationResponse(reasoning=reasoning, answerable=answerable)
|
||||
|
||||
|
||||
@basic_router.get("/user-searches")
|
||||
def get_user_search_sessions(
|
||||
user: User | None = Depends(current_user),
|
||||
@@ -247,21 +231,6 @@ def get_search_session(
|
||||
return response
|
||||
|
||||
|
||||
# NOTE No longer used, after search/chat redesign.
|
||||
# No search responses are answered with a conversational generative AI response
|
||||
@basic_router.post("/stream-query-validation")
|
||||
def stream_query_validation(
|
||||
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
|
||||
) -> StreamingResponse:
|
||||
# Note if weak model prompt is chosen, this check does not occur and will simply return that
|
||||
# the query is valid, this is because weaker models cannot really handle this task well.
|
||||
# Additionally, some weak model servers cannot handle concurrent inferences.
|
||||
logger.notice(f"Validating query: {simple_query.query}")
|
||||
return StreamingResponse(
|
||||
stream_query_answerability(simple_query.query), media_type="application/json"
|
||||
)
|
||||
|
||||
|
||||
@basic_router.post("/stream-answer-with-quote")
|
||||
def get_answer_with_quote(
|
||||
query_request: DirectQARequest,
|
||||
|
Reference in New Issue
Block a user