From b337a521f840482683d59285441a37362c9423f1 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 17 Sep 2023 19:23:59 -0700 Subject: [PATCH] Slack Bot Interface Rework (#454) --- backend/danswer/bots/slack/blocks.py | 165 ++++++++++++++---- .../bots/slack/handlers/handle_feedback.py | 47 ++++- .../bots/slack/handlers/handle_message.py | 23 ++- backend/danswer/bots/slack/listener.py | 35 ++-- backend/danswer/bots/slack/utils.py | 71 ++++++-- backend/danswer/chunking/models.py | 12 ++ backend/danswer/configs/app_configs.py | 3 + backend/danswer/configs/constants.py | 1 + backend/danswer/db/feedback.py | 1 + backend/danswer/direct_qa/answer_question.py | 22 +-- backend/danswer/direct_qa/qa_block.py | 8 +- backend/danswer/direct_qa/qa_utils.py | 6 +- backend/danswer/search/semantic_search.py | 2 +- .../secondary_llm_flows/answer_validation.py | 85 +++++---- backend/danswer/server/models.py | 1 + backend/danswer/utils/text_processing.py | 4 + 16 files changed, 345 insertions(+), 141 deletions(-) diff --git a/backend/danswer/bots/slack/blocks.py b/backend/danswer/bots/slack/blocks.py index e2f3c39b6380..926c2289dd73 100644 --- a/backend/danswer/bots/slack/blocks.py +++ b/backend/danswer/bots/slack/blocks.py @@ -1,21 +1,31 @@ from slack_sdk.models.blocks import ActionsBlock from slack_sdk.models.blocks import Block from slack_sdk.models.blocks import ButtonElement +from slack_sdk.models.blocks import ConfirmObject +from slack_sdk.models.blocks import DividerBlock +from slack_sdk.models.blocks import HeaderBlock from slack_sdk.models.blocks import SectionBlock from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID -from danswer.bots.slack.utils import build_block_id_from_query_event_id +from danswer.bots.slack.utils import build_feedback_block_id +from danswer.bots.slack.utils import translate_vespa_highlight_to_slack from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY +from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK from danswer.configs.constants import DocumentSource +from danswer.configs.constants import SearchFeedbackType from danswer.connectors.slack.utils import UserIdReplacer from danswer.direct_qa.interfaces import DanswerQuote from danswer.server.models import SearchDoc +from danswer.utils.text_processing import replace_whitespaces_w_space -def build_feedback_block(query_event_id: int) -> Block: +_MAX_BLURB_LEN = 75 + + +def build_qa_feedback_block(query_event_id: int) -> Block: return ActionsBlock( - block_id=build_block_id_from_query_event_id(query_event_id), + block_id=build_feedback_block_id(query_event_id), elements=[ ButtonElement( action_id=LIKE_BLOCK_ACTION_ID, @@ -31,11 +41,38 @@ def build_feedback_block(query_event_id: int) -> Block: ) -_MAX_BLURB_LEN = 75 +def build_doc_feedback_block( + query_event_id: int, + document_id: str, + document_rank: int, +) -> Block: + return ActionsBlock( + block_id=build_feedback_block_id(query_event_id, document_id, document_rank), + elements=[ + ButtonElement( + action_id=SearchFeedbackType.ENDORSE.value, + text="⬆", + style="primary", + confirm=ConfirmObject( + title="Endorse this Document", + text="This is a good source of information and should be shown more often!", + ), + ), + ButtonElement( + action_id=SearchFeedbackType.REJECT.value, + text="⬇", + style="danger", + confirm=ConfirmObject( + title="Reject this Document", + text="This is a bad source of information and should be shown less often.", + ), + ), + ], + ) def _build_custom_semantic_identifier( - semantic_identifier: str, blurb: str, source: str + semantic_identifier: str, match_str: str, source: str ) -> str: """ On slack, since we just show the semantic identifier rather than semantic + blurb, we need @@ -43,7 +80,9 @@ def _build_custom_semantic_identifier( """ if source == DocumentSource.SLACK.value: truncated_blurb = ( - f"{blurb[:_MAX_BLURB_LEN]}..." if len(blurb) > _MAX_BLURB_LEN else blurb + f"{match_str[:_MAX_BLURB_LEN]}..." + if len(match_str) > _MAX_BLURB_LEN + else match_str ) # NOTE: removing tags so that we don't accidentally tag users in Slack + # so that it can be used as part of a link @@ -61,37 +100,51 @@ def _build_custom_semantic_identifier( return semantic_identifier -def build_documents_block( +def build_documents_blocks( documents: list[SearchDoc], - already_displayed_doc_identifiers: list[str], + query_event_id: int, num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY, -) -> SectionBlock: - seen_docs_identifiers = set(already_displayed_doc_identifiers) - top_document_lines: list[str] = [] - for d in documents: + include_feedback: bool = ENABLE_SLACK_DOC_FEEDBACK, +) -> list[Block]: + seen_docs_identifiers = set() + section_blocks: list[Block] = [HeaderBlock(text="Reference Documents")] + included_docs = 0 + for rank, d in enumerate(documents): if d.document_id in seen_docs_identifiers: continue seen_docs_identifiers.add(d.document_id) - custom_semantic_identifier = _build_custom_semantic_identifier( - semantic_identifier=d.semantic_identifier, - blurb=d.blurb, - source=d.source_type, + used_chars = len(d.semantic_identifier) + 3 + match_str = translate_vespa_highlight_to_slack(d.match_highlights, used_chars) + + included_docs += 1 + + section_blocks.append( + SectionBlock( + fields=[ + f"<{d.link}|{d.semantic_identifier}>:\n>{match_str}", + ] + ), ) - top_document_lines.append(f"- <{d.link}|{custom_semantic_identifier}>") - if len(top_document_lines) >= num_docs_to_display: + if include_feedback: + section_blocks.append( + build_doc_feedback_block( + query_event_id=query_event_id, + document_id=d.document_id, + document_rank=rank, + ), + ) + + section_blocks.append(DividerBlock()) + + if included_docs >= num_docs_to_display: break - return SectionBlock( - fields=[ - "*Other potentially relevant docs:*", - *top_document_lines, - ] - ) + return section_blocks -def build_quotes_block( +def build_blurb_quotes_block( quotes: list[DanswerQuote], ) -> tuple[list[Block], list[str]]: quote_lines: list[str] = [] @@ -104,7 +157,7 @@ def build_quotes_block( doc_identifiers.append(doc_id) custom_semantic_identifier = _build_custom_semantic_identifier( semantic_identifier=doc_name, - blurb=quote.blurb, + match_str=quote.blurb, source=quote.source_type, ) quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>") @@ -125,14 +178,58 @@ def build_quotes_block( ) +def build_quotes_block( + quotes: list[DanswerQuote], +) -> list[Block]: + quote_lines: list[str] = [] + doc_to_quotes: dict[str, list[str]] = {} + doc_to_link: dict[str, str] = {} + doc_to_sem_id: dict[str, str] = {} + for q in quotes: + quote = q.quote + doc_id = q.document_id + doc_link = q.link + doc_name = q.semantic_identifier + if doc_link and doc_name and doc_id and quote: + if doc_id not in doc_to_quotes: + doc_to_quotes[doc_id] = [quote] + doc_to_link[doc_id] = doc_link + doc_to_sem_id[doc_id] = doc_name + else: + doc_to_quotes[doc_id].append(quote) + + for doc_id, quote_strs in doc_to_quotes.items(): + quotes_str_clean = [ + replace_whitespaces_w_space(q_str).strip() for q_str in quote_strs + ] + longest_quotes = sorted(quotes_str_clean, key=len, reverse=True)[:5] + single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes]) + link = doc_to_link[doc_id] + sem_id = doc_to_sem_id[doc_id] + quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}") + + if not doc_to_quotes: + return [] + + return [ + SectionBlock( + fields=[ + "*Relevant Snippets:*", + *quote_lines, + ] + ) + ] + + def build_qa_response_blocks( query_event_id: int, answer: str | None, quotes: list[DanswerQuote] | None, - documents: list[SearchDoc], ) -> list[Block]: - doc_identifiers: list[str] = [] quotes_blocks: list[Block] = [] + + ai_answer_header = HeaderBlock(text="AI Answer") + if not answer: answer_block = SectionBlock( text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓" @@ -140,7 +237,7 @@ def build_qa_response_blocks( else: answer_block = SectionBlock(text=answer) if quotes: - quotes_blocks, doc_identifiers = build_quotes_block(quotes) + quotes_blocks = build_quotes_block(quotes) # if no quotes OR `build_quotes_block()` did not give back any blocks if not quotes_blocks: @@ -150,9 +247,13 @@ def build_qa_response_blocks( ) ] - documents_block = build_documents_block(documents, doc_identifiers) + feedback_block = build_qa_feedback_block(query_event_id=query_event_id) return ( - [answer_block] + [ + ai_answer_header, + answer_block, + feedback_block, + ] + quotes_blocks - + [documents_block, build_feedback_block(query_event_id=query_event_id)] + + [DividerBlock()] ) diff --git a/backend/danswer/bots/slack/handlers/handle_feedback.py b/backend/danswer/bots/slack/handlers/handle_feedback.py index 917cfea9eb9c..e8799f6f4e02 100644 --- a/backend/danswer/bots/slack/handlers/handle_feedback.py +++ b/backend/danswer/bots/slack/handlers/handle_feedback.py @@ -1,27 +1,56 @@ from slack_sdk import WebClient from sqlalchemy.orm import Session +from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID +from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID +from danswer.bots.slack.utils import decompose_block_id from danswer.configs.constants import QAFeedbackType +from danswer.configs.constants import SearchFeedbackType from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.feedback import create_doc_retrieval_feedback from danswer.db.feedback import update_query_event_feedback -def handle_qa_feedback( - query_id: int, - feedback_type: QAFeedbackType, +def handle_slack_feedback( + block_id: str, + feedback_type: str, client: WebClient, user_id_to_post_confirmation: str, channel_id_to_post_confirmation: str, thread_ts_to_post_confirmation: str, ) -> None: engine = get_sqlalchemy_engine() + + query_id, doc_id, doc_rank = decompose_block_id(block_id) + with Session(engine) as db_session: - update_query_event_feedback( - feedback=feedback_type, - query_id=query_id, - user_id=None, # no "user" for Slack bot for now - db_session=db_session, - ) + if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]: + update_query_event_feedback( + feedback=QAFeedbackType.LIKE + if feedback_type == LIKE_BLOCK_ACTION_ID + else QAFeedbackType.DISLIKE, + query_id=query_id, + user_id=None, # no "user" for Slack bot for now + db_session=db_session, + ) + if feedback_type in [ + SearchFeedbackType.ENDORSE.value, + SearchFeedbackType.REJECT.value, + ]: + if doc_id is None or doc_rank is None: + raise ValueError("Missing information for Document Feedback") + + create_doc_retrieval_feedback( + qa_event_id=query_id, + document_id=doc_id, + document_rank=doc_rank, + user_id=None, + db_session=db_session, + clicked=False, # Not tracking this for Slack + feedback=SearchFeedbackType.ENDORSE + if feedback_type == SearchFeedbackType.ENDORSE.value + else SearchFeedbackType.REJECT, + ) # post message to slack confirming that feedback was received client.chat_postEphemeral( diff --git a/backend/danswer/bots/slack/handlers/handle_message.py b/backend/danswer/bots/slack/handlers/handle_message.py index e06f17b57459..da833aa6505d 100644 --- a/backend/danswer/bots/slack/handlers/handle_message.py +++ b/backend/danswer/bots/slack/handlers/handle_message.py @@ -4,6 +4,7 @@ from retry import retry from slack_sdk import WebClient from sqlalchemy.orm import Session +from danswer.bots.slack.blocks import build_documents_blocks from danswer.bots.slack.blocks import build_qa_response_blocks from danswer.bots.slack.utils import respond_in_thread from danswer.configs.app_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT @@ -37,6 +38,7 @@ def handle_message( def _get_answer(question: QuestionRequest) -> QAResponse: engine = get_sqlalchemy_engine() with Session(engine, expire_on_commit=False) as db_session: + # This also handles creating the query event in postgres answer = answer_qa_query( question=question, user=None, @@ -54,7 +56,7 @@ def handle_message( QuestionRequest( query=msg, collection=DOCUMENT_INDEX_NAME, - use_keyword=False, # always use semantic search when handling slack messages + use_keyword=False, # always use semantic search when handling Slack messages filters=None, offset=None, ) @@ -75,6 +77,14 @@ def handle_message( ) return + if answer.eval_res_valid is False: + logger.info( + "Answer was evaluated to be invalid, throwing it away without responding." + ) + if answer.answer: + logger.debug(answer.answer) + return + if not answer.top_ranked_docs: logger.error(f"Unable to answer question: '{msg}' - no documents found") # Optionally, respond in thread with the error message, Used primarily @@ -96,21 +106,26 @@ def handle_message( return # convert raw response into "nicely" formatted Slack message - blocks = build_qa_response_blocks( + answer_blocks = build_qa_response_blocks( query_event_id=answer.query_event_id, answer=answer.answer, - documents=answer.top_ranked_docs, quotes=answer.quotes, ) + + document_blocks = build_documents_blocks( + documents=answer.top_ranked_docs, query_event_id=answer.query_event_id + ) + try: respond_in_thread( client=client, channel=channel, - blocks=blocks, + blocks=answer_blocks + document_blocks, thread_ts=message_ts_to_respond_to, # don't unfurl, since otherwise we will have 5+ previews which makes the message very long unfurl=False, ) + except Exception: logger.exception( f"Unable to process message - could not respond in slack in {num_retries} attempts" diff --git a/backend/danswer/bots/slack/listener.py b/backend/danswer/bots/slack/listener.py index 803005e83c70..dd2e0a2fabdf 100644 --- a/backend/danswer/bots/slack/listener.py +++ b/backend/danswer/bots/slack/listener.py @@ -9,12 +9,9 @@ from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest from slack_sdk.socket_mode.response import SocketModeResponse -from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID -from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID -from danswer.bots.slack.handlers.handle_feedback import handle_qa_feedback +from danswer.bots.slack.handlers.handle_feedback import handle_slack_feedback from danswer.bots.slack.handlers.handle_message import handle_message -from danswer.bots.slack.utils import get_query_event_id_from_block_id -from danswer.configs.constants import QAFeedbackType +from danswer.bots.slack.utils import decompose_block_id from danswer.utils.logger import setup_logger logger = setup_logger() @@ -54,7 +51,7 @@ def _get_socket_client() -> SocketModeClient: def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None: - logger.info(f"Received request of type: '{req.type}', with paylod: '{req.payload}'") + logger.info(f"Received Slack request of type: '{req.type}'") if req.type == "events_api": # Acknowledge the request immediately response = SocketModeResponse(envelope_id=req.envelope_id) @@ -95,7 +92,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No message_ts = event.get("ts") thread_ts = event.get("thread_ts") - # pick the root of the thread (if a thread exists) + # Pick the root of the thread (if a thread exists) message_ts_to_respond_to = cast(str, thread_ts or message_ts) if thread_ts and message_ts != thread_ts: channel_specific_logger.info( @@ -122,7 +119,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No f"Successfully processed message with ts: '{message_ts}'" ) - # handle button clicks + # Handle button clicks if req.type == "interactive" and req.payload.get("type") == "block_actions": # Acknowledge the request immediately response = SocketModeResponse(envelope_id=req.envelope_id) @@ -134,31 +131,22 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No return action = cast(dict[str, Any], actions[0]) - action_id = action.get("action_id") - if action_id == LIKE_BLOCK_ACTION_ID: - feedback_type = QAFeedbackType.LIKE - elif action_id == DISLIKE_BLOCK_ACTION_ID: - feedback_type = QAFeedbackType.DISLIKE - else: - logger.error( - f"Unable to process block action - unknown action_id: '{action_id}'" - ) - return - + action_id = cast(str, action.get("action_id")) block_id = cast(str, action.get("block_id")) user_id = cast(str, req.payload["user"]["id"]) channel_id = cast(str, req.payload["container"]["channel_id"]) thread_ts = cast(str, req.payload["container"]["thread_ts"]) - query_event_id = get_query_event_id_from_block_id(block_id) - handle_qa_feedback( - query_id=query_event_id, - feedback_type=feedback_type, + + handle_slack_feedback( + block_id=block_id, + feedback_type=action_id, client=client.web_client, user_id_to_post_confirmation=user_id, channel_id_to_post_confirmation=channel_id, thread_ts_to_post_confirmation=thread_ts, ) + query_event_id, _, _ = decompose_block_id(block_id) logger.info(f"Successfully handled QA feedback for event: {query_event_id}") @@ -181,6 +169,7 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non if __name__ == "__main__": socket_client = _get_socket_client() socket_client.socket_mode_request_listeners.append(process_slack_event) # type: ignore + # Establish a WebSocket connection to the Socket Mode servers logger.info("Listening for messages from Slack...") socket_client.connect() diff --git a/backend/danswer/bots/slack/utils.py b/backend/danswer/bots/slack/utils.py index 5209e9ea665b..1fa1401883fe 100644 --- a/backend/danswer/bots/slack/utils.py +++ b/backend/danswer/bots/slack/utils.py @@ -1,5 +1,6 @@ import logging import random +import re import string from typing import cast @@ -9,9 +10,10 @@ from slack_sdk.models.blocks import Block from slack_sdk.models.metadata import Metadata from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES +from danswer.configs.constants import ID_SEPARATOR from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.utils.logger import setup_logger - +from danswer.utils.text_processing import replace_whitespaces_w_space logger = setup_logger() @@ -34,11 +36,6 @@ def respond_in_thread( if not text and not blocks: raise ValueError("One of `text` or `blocks` must be provided") - if text: - logger.debug(f"Trying to send message: {text}") - if blocks: - logger.debug(f"Trying to send blocks: {blocks}") - slack_call = make_slack_api_rate_limited(client.chat_postMessage) response = slack_call( channel=channel, @@ -53,9 +50,63 @@ def respond_in_thread( raise RuntimeError(f"Unable to post message: {response}") -def build_block_id_from_query_event_id(query_event_id: int) -> str: - return f"{''.join(random.choice(string.ascii_letters) for _ in range(5))}:{query_event_id}" +def build_feedback_block_id( + query_event_id: int, + document_id: str | None = None, + document_rank: int | None = None, +) -> str: + unique_prefix = "".join(random.choice(string.ascii_letters) for _ in range(10)) + if document_id is not None: + if not document_id or document_rank is None: + raise ValueError("Invalid document, missing information") + if ID_SEPARATOR in document_id: + raise ValueError( + "Separator pattern should not already exist in document id" + ) + block_id = ID_SEPARATOR.join( + [str(query_event_id), document_id, str(document_rank)] + ) + else: + block_id = str(query_event_id) + + return unique_prefix + ID_SEPARATOR + block_id -def get_query_event_id_from_block_id(block_id: str) -> int: - return int(block_id.split(":")[-1]) +def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]: + """Decompose into query_id, document_id, document_rank, see above function""" + try: + components = block_id.split(ID_SEPARATOR) + if len(components) != 2 and len(components) != 4: + raise ValueError("Block ID does not contain right number of elements") + + if len(components) == 2: + return int(components[-1]), None, None + + return int(components[1]), components[2], int(components[3]) + + except Exception as e: + logger.error(e) + raise ValueError("Received invalid Feedback Block Identifier") + + +def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str: + def _replace_highlight(s: str) -> str: + s = re.sub(r"(?=\S)", "", s) + s = re.sub(r"(?<=\S)", "", s) + s = s.replace("", "*").replace("", "*") + return s + + final_matches = [ + replace_whitespaces_w_space(_replace_highlight(match_str)).strip() + for match_str in match_strs + if match_str + ] + combined = "... ".join(final_matches) + + # Slack introduces "Show More" after 300 on desktop which is ugly + # But don't trim the message if there is still a highlight after 300 chars + remaining = 300 - used_chars + if len(combined) > remaining and "*" not in combined[remaining:]: + combined = combined[: remaining - 3] + "..." + + return combined diff --git a/backend/danswer/chunking/models.py b/backend/danswer/chunking/models.py index f9338a8eab29..faee6333c0b8 100644 --- a/backend/danswer/chunking/models.py +++ b/backend/danswer/chunking/models.py @@ -68,6 +68,18 @@ class InferenceChunk(BaseChunk): # ["the answer is 42", "he couldn't find an answer"] match_highlights: list[str] + def __repr__(self) -> str: + blurb_words = self.blurb.split() + short_blurb = "" + for word in blurb_words: + if not short_blurb: + short_blurb = word + continue + if len(short_blurb) > 25: + break + short_blurb += " " + word + return f"Inference Chunk: {self.document_id} - {short_blurb}..." + @classmethod def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk": init_kwargs = { diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 2f0103041b7e..055c2367ea62 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -224,3 +224,6 @@ DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get( ENABLE_DANSWERBOT_REFLEXION = ( os.environ.get("ENABLE_DANSWERBOT_REFLEXION", "").lower() == "true" ) +ENABLE_SLACK_DOC_FEEDBACK = ( + os.environ.get("ENABLE_SLACK_DOC_FEEDBACK", "").lower() == "true" +) diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 5b641699f6ca..3dde92e5dfd2 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -23,6 +23,7 @@ PUBLIC_DOC_PAT = "PUBLIC" QUOTE = "quote" BOOST = "boost" SCORE = "score" +ID_SEPARATOR = ":;:" DEFAULT_BOOST = 0 # Prompt building constants: diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 6b8f2efd01ed..06bac155360d 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -111,6 +111,7 @@ def create_doc_retrieval_feedback( clicked: bool = False, feedback: SearchFeedbackType | None = None, ) -> None: + """Creates a new Document feedback row and updates the boost value in Postgres and Vespa""" if not clicked and feedback is None: raise ValueError("No action taken, not valid feedback") diff --git a/backend/danswer/direct_qa/answer_question.py b/backend/danswer/direct_qa/answer_question.py index 63e450c8b6aa..f9b1c4d271c9 100644 --- a/backend/danswer/direct_qa/answer_question.py +++ b/backend/danswer/direct_qa/answer_question.py @@ -137,17 +137,17 @@ def answer_qa_query( if d_answer.answer is not None: valid = get_answer_validity(query, d_answer.answer) - if not valid: - return QAResponse( - answer=None, - quotes=None, - top_ranked_docs=chunks_to_search_docs(ranked_chunks), - lower_ranked_docs=chunks_to_search_docs(unranked_chunks), - predicted_flow=predicted_flow, - predicted_search=predicted_search, - error_msg=error_msg, - query_event_id=query_event_id, - ) + return QAResponse( + answer=d_answer.answer if d_answer else None, + quotes=quotes.quotes if quotes else None, + top_ranked_docs=chunks_to_search_docs(ranked_chunks), + lower_ranked_docs=chunks_to_search_docs(unranked_chunks), + predicted_flow=predicted_flow, + predicted_search=predicted_search, + eval_res_valid=True if valid else False, + error_msg=error_msg, + query_event_id=query_event_id, + ) return QAResponse( answer=d_answer.answer if d_answer else None, diff --git a/backend/danswer/direct_qa/qa_block.py b/backend/danswer/direct_qa/qa_block.py index 3cc2d5d62f77..68cb513e8d62 100644 --- a/backend/danswer/direct_qa/qa_block.py +++ b/backend/danswer/direct_qa/qa_block.py @@ -1,5 +1,6 @@ import abc import json +import re from collections.abc import Iterator from copy import copy @@ -157,12 +158,11 @@ class SingleMessageScratchpadHandler(QAHandler): model_clean = clean_up_code_blocks(model_output) - answer_start = model_clean.find('{"answer":') - # Only found thoughts, no final answer - if answer_start == -1: + match = re.search(r'{\s*"answer":', model_clean) + if not match: return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) - final_json = escape_newlines(model_clean[answer_start:]) + final_json = escape_newlines(model_clean[match.start() :]) return process_answer( final_json, context_chunks, is_json_prompt=self.is_json_output diff --git a/backend/danswer/direct_qa/qa_utils.py b/backend/danswer/direct_qa/qa_utils.py index edc539c97795..f1a52b4fa41a 100644 --- a/backend/danswer/direct_qa/qa_utils.py +++ b/backend/danswer/direct_qa/qa_utils.py @@ -88,10 +88,8 @@ def separate_answer_quotes( return extract_answer_quotes_json(model_raw_json) except ValueError: if is_json_prompt: - logger.error( - "Model did not output in json format as expected, " - "trying to parse it regardless" - ) + logger.error("Model did not output in json format as expected.") + raise return extract_answer_quotes_freeform(answer_raw) diff --git a/backend/danswer/search/semantic_search.py b/backend/danswer/search/semantic_search.py index 60aff8dea7e6..03d839402acc 100644 --- a/backend/danswer/search/semantic_search.py +++ b/backend/danswer/search/semantic_search.py @@ -107,7 +107,7 @@ def retrieve_ranked_documents( f"Semantic search returned no results with filters: {filters_log_msg}" ) return None, None - logger.info(top_chunks) + logger.debug(top_chunks) ranked_chunks = semantic_reranking(query, top_chunks[:num_rerank]) top_docs = [ diff --git a/backend/danswer/secondary_llm_flows/answer_validation.py b/backend/danswer/secondary_llm_flows/answer_validation.py index 345530b11d0b..31193ce39045 100644 --- a/backend/danswer/secondary_llm_flows/answer_validation.py +++ b/backend/danswer/secondary_llm_flows/answer_validation.py @@ -1,7 +1,7 @@ from danswer.configs.constants import ANSWER_PAT from danswer.configs.constants import CODE_BLOCK_PAT +from danswer.configs.constants import FINAL_ANSWER_PAT from danswer.configs.constants import GENERAL_SEP_PAT -from danswer.configs.constants import INVALID_PAT from danswer.configs.constants import QUESTION_PAT from danswer.configs.constants import THOUGHT_PAT from danswer.direct_qa.qa_block import dict_based_prompt_to_langchain_prompt @@ -12,56 +12,55 @@ from danswer.utils.timing import log_function_time logger = setup_logger() -def get_answer_validation_messages(query: str, answer: str) -> list[dict[str, str]]: - cot_block = ( - f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning " - f"about EACH criterion to ensure that your conclusion is correct.\n" - f"{INVALID_PAT} True or False" - ) - - q_a_block = f"{QUESTION_PAT} {query}\n\n" f"{ANSWER_PAT} {answer}" - - messages = [ - { - "role": "user", - "content": ( - f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n" - "Determine if the answer is valid for the query.\n" - f"The answer is invalid if ANY of the following is true:\n" - "- Does not directly answer the user query.\n" - "- Answers a related but different question.\n" - '- Contains anything meaning "I don\'t know" or "information not found".\n\n' - f"You must use the following format:" - f"{CODE_BLOCK_PAT.format(cot_block)}" - f'Hint: Invalid must be exactly "True" or "False" (without the quotes)' - ), - }, - ] - - return messages - - -def extract_validity(model_output: str) -> bool: - if INVALID_PAT in model_output: - result = model_output.split(INVALID_PAT)[-1].strip() - if "true" in result.lower(): - return False - return True # If something is wrong, let's not toss away the answer - - @log_function_time() def get_answer_validity( query: str, answer: str, ) -> bool: - messages = get_answer_validation_messages(query, answer) + def _get_answer_validation_messages( + query: str, answer: str + ) -> list[dict[str, str]]: + cot_block = ( + f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning " + f"about EACH criterion to ensure that your conclusion is correct. " + f"Be brief when evaluating each condition.\n" + f"{FINAL_ANSWER_PAT} Valid or Invalid" + ) + + q_a_block = f"{QUESTION_PAT} {query}\n\n{ANSWER_PAT} {answer}" + + messages = [ + { + "role": "user", + "content": ( + f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n" + "Determine if the answer is valid for the query.\n" + f"The answer is invalid if ANY of the following is true:\n" + "1. Does not directly answer the user query.\n" + "2. Answers a related but different question.\n" + "3. Query requires a subjective answer or an opinion.\n" + '4. Contains anything meaning "I don\'t know" or "information not found".\n\n' + f"You must use the following format:" + f"{CODE_BLOCK_PAT.format(cot_block)}" + f'Hint: Final Answer must be exactly "Valid" or "Invalid"' + ), + }, + ] + + return messages + + def _extract_validity(model_output: str) -> bool: + if FINAL_ANSWER_PAT in model_output: + result = model_output.split(FINAL_ANSWER_PAT)[-1].strip() + if "invalid" in result.lower(): + return False + return True # If something is wrong, let's not toss away the answer + + messages = _get_answer_validation_messages(query, answer) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) model_output = get_default_llm().invoke(filled_llm_prompt) logger.debug(model_output) - validity = extract_validity(model_output) - logger.info( - f'LLM Answer of "{answer}" was determined to be {"valid" if validity else "invalid"}.' - ) + validity = _extract_validity(model_output) return validity diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 6cca836ff9e9..6c7ecbe78267 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -224,6 +224,7 @@ class QAResponse(SearchResponse): quotes: list[DanswerQuote] | None predicted_flow: QueryFlow predicted_search: SearchType + eval_res_valid: bool | None = None error_msg: str | None = None diff --git a/backend/danswer/utils/text_processing.py b/backend/danswer/utils/text_processing.py index c5e28ff2d55a..307785ba07bb 100644 --- a/backend/danswer/utils/text_processing.py +++ b/backend/danswer/utils/text_processing.py @@ -14,6 +14,10 @@ def escape_newlines(s: str) -> str: return re.sub(r"(? str: + return re.sub(r"\s", " ", s) + + def extract_embedded_json(s: str) -> dict: first_brace_index = s.find("{") last_brace_index = s.rfind("}")