Remove Dead Code (#3234)

This commit is contained in:
Yuhong Sun
2024-11-23 14:31:59 -08:00
committed by GitHub
parent 694e79f5e1
commit a3439605a5
7 changed files with 5 additions and 96 deletions

View File

@@ -17,9 +17,6 @@ MAX_CHUNKS_FED_TO_CHAT = float(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 10.0)
# ~3k input, half for docs, half for chat history + prompts
CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072
# For selecting a different LLM question-answering prompt format
# Valid values: default, cot, weak
QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
# Capped in Vespa at 0.5
DOC_TIME_DECAY = float(
@@ -27,8 +24,6 @@ DOC_TIME_DECAY = float(
)
BASE_RECENCY_DECAY = 0.5
FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
# Currently this next one is not configurable via env
DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
# For the highest matching base size chunk, how many chunks above and below do we pull in by default
# Note this is not in any of the deployment configs yet
# Currently only applies to search flow not chat

View File

@@ -2,7 +2,6 @@ from langchain.schema.messages import HumanMessage
from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
from danswer.context.search.models import InferenceChunk
from danswer.db.search_settings import get_multilingual_expansion
from danswer.llm.answering.models import PromptConfig
@@ -10,39 +9,10 @@ from danswer.llm.utils import message_to_prompt_and_imgs
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.prompt_utils import add_date_time_to_prompt
from danswer.prompts.prompt_utils import build_complete_context_str
def _build_weak_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
"""Since Danswer supports a variety of LLMs, this less demanding prompt is provided
as an option to use with weaker LLMs such as small version, low float precision, quantized,
or distilled models. It only uses one context document and has very weak requirements of
output format.
"""
context_block = ""
if context_docs:
context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content)
prompt_str = WEAK_LLM_PROMPT.format(
system_prompt=prompt.system_prompt,
context_block=context_block,
task_prompt=prompt.task_prompt,
user_query=question,
)
if prompt.datetime_aware:
prompt_str = add_date_time_to_prompt(prompt_str=prompt_str)
return HumanMessage(content=prompt_str)
def _build_strong_llm_quotes_prompt(
question: str,
context_docs: list[LlmDoc] | list[InferenceChunk],
@@ -81,15 +51,9 @@ def build_quotes_user_message(
history_str: str,
prompt: PromptConfig,
) -> HumanMessage:
prompt_builder = (
_build_weak_llm_quotes_prompt
if QA_PROMPT_OVERRIDE == "weak"
else _build_strong_llm_quotes_prompt
)
query, _ = message_to_prompt_and_imgs(message)
return prompt_builder(
return _build_strong_llm_quotes_prompt(
question=query,
context_docs=context_docs,
history_str=history_str,

View File

@@ -36,10 +36,6 @@ class PromptConfig(BaseModel):
datetime_aware: bool = True
class DocumentSetConfig(BaseModel):
id: int
class ToolConfig(BaseModel):
id: int

View File

@@ -118,18 +118,6 @@ You should always get right to the point, and never use extraneous language.
"""
# For weak LLM which only takes one chunk and cannot output json
# Also not requiring quotes as it tends to not work
WEAK_LLM_PROMPT = f"""
{{system_prompt}}
{{context_block}}
{{task_prompt}}
{QUESTION_PAT.upper()}
{{user_query}}
""".strip()
# This is only for visualization for the users to specify their own prompts
# The actual flow does not work like this
PARAMATERIZED_PROMPT = f"""

View File

@@ -1,9 +1,9 @@
# NOTE No longer used. This needs to be revisited later.
import re
from collections.abc import Iterator
from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import StreamingError
from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY
from danswer.llm.exceptions import GenAIDisabledException
from danswer.llm.factory import get_default_llms
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
@@ -46,7 +46,7 @@ def extract_answerability_bool(model_raw: str) -> bool:
def get_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> tuple[str, bool]:
if skip_check:
return "Query Answerability Evaluation feature is turned off", True
@@ -67,7 +67,7 @@ def get_query_answerability(
def stream_query_answerability(
user_query: str, skip_check: bool = DISABLE_LLM_QUERY_ANSWERABILITY
user_query: str, skip_check: bool = False
) -> Iterator[str]:
if skip_check:
yield get_json_line(

View File

@@ -29,10 +29,6 @@ class TagResponse(BaseModel):
tags: list[SourceTag]
class SimpleQueryRequest(BaseModel):
query: str
class UpdateChatSessionThreadRequest(BaseModel):
# If not specified, use Danswer default persona
chat_session_id: UUID
@@ -217,6 +213,7 @@ class ChatSessionDetailResponse(BaseModel):
current_alternate_model: str | None
# This one is not used anymore
class QueryValidationResponse(BaseModel):
reasoning: str
answerable: bool

View File

@@ -34,15 +34,11 @@ from danswer.document_index.factory import get_default_document_index
from danswer.document_index.vespa.index import VespaIndex
from danswer.one_shot_answer.answer_question import stream_search_answer
from danswer.one_shot_answer.models import DirectQARequest
from danswer.secondary_llm_flows.query_validation import get_query_answerability
from danswer.secondary_llm_flows.query_validation import stream_query_answerability
from danswer.server.query_and_chat.models import AdminSearchRequest
from danswer.server.query_and_chat.models import AdminSearchResponse
from danswer.server.query_and_chat.models import ChatSessionDetails
from danswer.server.query_and_chat.models import ChatSessionsResponse
from danswer.server.query_and_chat.models import QueryValidationResponse
from danswer.server.query_and_chat.models import SearchSessionDetailResponse
from danswer.server.query_and_chat.models import SimpleQueryRequest
from danswer.server.query_and_chat.models import SourceTag
from danswer.server.query_and_chat.models import TagResponse
from danswer.server.query_and_chat.token_limit import check_token_rate_limits
@@ -135,18 +131,6 @@ def get_tags(
return TagResponse(tags=server_tags)
@basic_router.post("/query-validation")
def query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> QueryValidationResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
reasoning, answerable = get_query_answerability(simple_query.query)
return QueryValidationResponse(reasoning=reasoning, answerable=answerable)
@basic_router.get("/user-searches")
def get_user_search_sessions(
user: User | None = Depends(current_user),
@@ -247,21 +231,6 @@ def get_search_session(
return response
# NOTE No longer used, after search/chat redesign.
# No search responses are answered with a conversational generative AI response
@basic_router.post("/stream-query-validation")
def stream_query_validation(
simple_query: SimpleQueryRequest, _: User = Depends(current_user)
) -> StreamingResponse:
# Note if weak model prompt is chosen, this check does not occur and will simply return that
# the query is valid, this is because weaker models cannot really handle this task well.
# Additionally, some weak model servers cannot handle concurrent inferences.
logger.notice(f"Validating query: {simple_query.query}")
return StreamingResponse(
stream_query_answerability(simple_query.query), media_type="application/json"
)
@basic_router.post("/stream-answer-with-quote")
def get_answer_with_quote(
query_request: DirectQARequest,