Slack Bot Interface Rework (#454)

2025-10-04 12:58:42 +02:00 · 2023-09-17 19:23:59 -07:00
parent d7b7714d86
commit b337a521f8
16 changed files with 345 additions and 141 deletions
--- a/backend/danswer/bots/slack/blocks.py
+++ b/backend/danswer/bots/slack/blocks.py
@@ -1,21 +1,31 @@
 from slack_sdk.models.blocks import ActionsBlock
 from slack_sdk.models.blocks import Block
 from slack_sdk.models.blocks import ButtonElement
+from slack_sdk.models.blocks import ConfirmObject
+from slack_sdk.models.blocks import DividerBlock
+from slack_sdk.models.blocks import HeaderBlock
 from slack_sdk.models.blocks import SectionBlock

 from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
 from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
-from danswer.bots.slack.utils import build_block_id_from_query_event_id
+from danswer.bots.slack.utils import build_feedback_block_id
+from danswer.bots.slack.utils import translate_vespa_highlight_to_slack
 from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
+from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK
 from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import SearchFeedbackType
 from danswer.connectors.slack.utils import UserIdReplacer
 from danswer.direct_qa.interfaces import DanswerQuote
 from danswer.server.models import SearchDoc
+from danswer.utils.text_processing import replace_whitespaces_w_space


-def build_feedback_block(query_event_id: int) -> Block:
+_MAX_BLURB_LEN = 75
+
+
+def build_qa_feedback_block(query_event_id: int) -> Block:
    return ActionsBlock(
-        block_id=build_block_id_from_query_event_id(query_event_id),
+        block_id=build_feedback_block_id(query_event_id),
        elements=[
            ButtonElement(
                action_id=LIKE_BLOCK_ACTION_ID,
@@ -31,11 +41,38 @@ def build_feedback_block(query_event_id: int) -> Block:
    )


-_MAX_BLURB_LEN = 75
+def build_doc_feedback_block(
+    query_event_id: int,
+    document_id: str,
+    document_rank: int,
+) -> Block:
+    return ActionsBlock(
+        block_id=build_feedback_block_id(query_event_id, document_id, document_rank),
+        elements=[
+            ButtonElement(
+                action_id=SearchFeedbackType.ENDORSE.value,
+                text="⬆",
+                style="primary",
+                confirm=ConfirmObject(
+                    title="Endorse this Document",
+                    text="This is a good source of information and should be shown more often!",
+                ),
+            ),
+            ButtonElement(
+                action_id=SearchFeedbackType.REJECT.value,
+                text="⬇",
+                style="danger",
+                confirm=ConfirmObject(
+                    title="Reject this Document",
+                    text="This is a bad source of information and should be shown less often.",
+                ),
+            ),
+        ],
+    )


 def _build_custom_semantic_identifier(
-    semantic_identifier: str, blurb: str, source: str
+    semantic_identifier: str, match_str: str, source: str
 ) -> str:
    """
    On slack, since we just show the semantic identifier rather than semantic + blurb, we need
@@ -43,7 +80,9 @@ def _build_custom_semantic_identifier(
    """
    if source == DocumentSource.SLACK.value:
        truncated_blurb = (
-            f"{blurb[:_MAX_BLURB_LEN]}..." if len(blurb) > _MAX_BLURB_LEN else blurb
+            f"{match_str[:_MAX_BLURB_LEN]}..."
+            if len(match_str) > _MAX_BLURB_LEN
+            else match_str
        )
        # NOTE: removing tags so that we don't accidentally tag users in Slack +
        # so that it can be used as part of a <link|text> link
@@ -61,37 +100,51 @@ def _build_custom_semantic_identifier(
    return semantic_identifier


-def build_documents_block(
+def build_documents_blocks(
    documents: list[SearchDoc],
-    already_displayed_doc_identifiers: list[str],
+    query_event_id: int,
    num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
-) -> SectionBlock:
-    seen_docs_identifiers = set(already_displayed_doc_identifiers)
-    top_document_lines: list[str] = []
-    for d in documents:
+    include_feedback: bool = ENABLE_SLACK_DOC_FEEDBACK,
+) -> list[Block]:
+    seen_docs_identifiers = set()
+    section_blocks: list[Block] = [HeaderBlock(text="Reference Documents")]
+    included_docs = 0
+    for rank, d in enumerate(documents):
        if d.document_id in seen_docs_identifiers:
            continue
        seen_docs_identifiers.add(d.document_id)

-        custom_semantic_identifier = _build_custom_semantic_identifier(
-            semantic_identifier=d.semantic_identifier,
-            blurb=d.blurb,
-            source=d.source_type,
+        used_chars = len(d.semantic_identifier) + 3
+        match_str = translate_vespa_highlight_to_slack(d.match_highlights, used_chars)
+
+        included_docs += 1
+
+        section_blocks.append(
+            SectionBlock(
+                fields=[
+                    f"<{d.link}|{d.semantic_identifier}>:\n>{match_str}",
+                ]
+            ),
        )

-        top_document_lines.append(f"- <{d.link}|{custom_semantic_identifier}>")
-        if len(top_document_lines) >= num_docs_to_display:
+        if include_feedback:
+            section_blocks.append(
+                build_doc_feedback_block(
+                    query_event_id=query_event_id,
+                    document_id=d.document_id,
+                    document_rank=rank,
+                ),
+            )
+
+        section_blocks.append(DividerBlock())
+
+        if included_docs >= num_docs_to_display:
            break

-    return SectionBlock(
-        fields=[
-            "*Other potentially relevant docs:*",
-            *top_document_lines,
-        ]
-    )
+    return section_blocks


-def build_quotes_block(
+def build_blurb_quotes_block(
    quotes: list[DanswerQuote],
 ) -> tuple[list[Block], list[str]]:
    quote_lines: list[str] = []
@@ -104,7 +157,7 @@ def build_quotes_block(
            doc_identifiers.append(doc_id)
            custom_semantic_identifier = _build_custom_semantic_identifier(
                semantic_identifier=doc_name,
-                blurb=quote.blurb,
+                match_str=quote.blurb,
                source=quote.source_type,
            )
            quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
@@ -125,14 +178,58 @@ def build_quotes_block(
    )


+def build_quotes_block(
+    quotes: list[DanswerQuote],
+) -> list[Block]:
+    quote_lines: list[str] = []
+    doc_to_quotes: dict[str, list[str]] = {}
+    doc_to_link: dict[str, str] = {}
+    doc_to_sem_id: dict[str, str] = {}
+    for q in quotes:
+        quote = q.quote
+        doc_id = q.document_id
+        doc_link = q.link
+        doc_name = q.semantic_identifier
+        if doc_link and doc_name and doc_id and quote:
+            if doc_id not in doc_to_quotes:
+                doc_to_quotes[doc_id] = [quote]
+                doc_to_link[doc_id] = doc_link
+                doc_to_sem_id[doc_id] = doc_name
+            else:
+                doc_to_quotes[doc_id].append(quote)
+
+    for doc_id, quote_strs in doc_to_quotes.items():
+        quotes_str_clean = [
+            replace_whitespaces_w_space(q_str).strip() for q_str in quote_strs
+        ]
+        longest_quotes = sorted(quotes_str_clean, key=len, reverse=True)[:5]
+        single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
+        link = doc_to_link[doc_id]
+        sem_id = doc_to_sem_id[doc_id]
+        quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}")
+
+    if not doc_to_quotes:
+        return []
+
+    return [
+        SectionBlock(
+            fields=[
+                "*Relevant Snippets:*",
+                *quote_lines,
+            ]
+        )
+    ]
+
+
 def build_qa_response_blocks(
    query_event_id: int,
    answer: str | None,
    quotes: list[DanswerQuote] | None,
-    documents: list[SearchDoc],
 ) -> list[Block]:
-    doc_identifiers: list[str] = []
    quotes_blocks: list[Block] = []
+
+    ai_answer_header = HeaderBlock(text="AI Answer")
+
    if not answer:
        answer_block = SectionBlock(
            text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓"
@@ -140,7 +237,7 @@ def build_qa_response_blocks(
    else:
        answer_block = SectionBlock(text=answer)
        if quotes:
-            quotes_blocks, doc_identifiers = build_quotes_block(quotes)
+            quotes_blocks = build_quotes_block(quotes)

        # if no quotes OR `build_quotes_block()` did not give back any blocks
        if not quotes_blocks:
@@ -150,9 +247,13 @@ def build_qa_response_blocks(
                )
            ]

-    documents_block = build_documents_block(documents, doc_identifiers)
+    feedback_block = build_qa_feedback_block(query_event_id=query_event_id)
    return (
-        [answer_block]
+        [
+            ai_answer_header,
+            answer_block,
+            feedback_block,
+        ]
        + quotes_blocks
-        + [documents_block, build_feedback_block(query_event_id=query_event_id)]
+        + [DividerBlock()]
    )
--- a/backend/danswer/bots/slack/handlers/handle_feedback.py
+++ b/backend/danswer/bots/slack/handlers/handle_feedback.py
@@ -1,27 +1,56 @@
 from slack_sdk import WebClient
 from sqlalchemy.orm import Session

+from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
+from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
+from danswer.bots.slack.utils import decompose_block_id
 from danswer.configs.constants import QAFeedbackType
+from danswer.configs.constants import SearchFeedbackType
 from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.feedback import create_doc_retrieval_feedback
 from danswer.db.feedback import update_query_event_feedback


-def handle_qa_feedback(
-    query_id: int,
-    feedback_type: QAFeedbackType,
+def handle_slack_feedback(
+    block_id: str,
+    feedback_type: str,
    client: WebClient,
    user_id_to_post_confirmation: str,
    channel_id_to_post_confirmation: str,
    thread_ts_to_post_confirmation: str,
 ) -> None:
    engine = get_sqlalchemy_engine()
+
+    query_id, doc_id, doc_rank = decompose_block_id(block_id)
+
    with Session(engine) as db_session:
-        update_query_event_feedback(
-            feedback=feedback_type,
-            query_id=query_id,
-            user_id=None,  # no "user" for Slack bot for now
-            db_session=db_session,
-        )
+        if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]:
+            update_query_event_feedback(
+                feedback=QAFeedbackType.LIKE
+                if feedback_type == LIKE_BLOCK_ACTION_ID
+                else QAFeedbackType.DISLIKE,
+                query_id=query_id,
+                user_id=None,  # no "user" for Slack bot for now
+                db_session=db_session,
+            )
+        if feedback_type in [
+            SearchFeedbackType.ENDORSE.value,
+            SearchFeedbackType.REJECT.value,
+        ]:
+            if doc_id is None or doc_rank is None:
+                raise ValueError("Missing information for Document Feedback")
+
+            create_doc_retrieval_feedback(
+                qa_event_id=query_id,
+                document_id=doc_id,
+                document_rank=doc_rank,
+                user_id=None,
+                db_session=db_session,
+                clicked=False,  # Not tracking this for Slack
+                feedback=SearchFeedbackType.ENDORSE
+                if feedback_type == SearchFeedbackType.ENDORSE.value
+                else SearchFeedbackType.REJECT,
+            )

    # post message to slack confirming that feedback was received
    client.chat_postEphemeral(
--- a/backend/danswer/bots/slack/handlers/handle_message.py
+++ b/backend/danswer/bots/slack/handlers/handle_message.py
@@ -4,6 +4,7 @@ from retry import retry
 from slack_sdk import WebClient
 from sqlalchemy.orm import Session

+from danswer.bots.slack.blocks import build_documents_blocks
 from danswer.bots.slack.blocks import build_qa_response_blocks
 from danswer.bots.slack.utils import respond_in_thread
 from danswer.configs.app_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT
@@ -37,6 +38,7 @@ def handle_message(
    def _get_answer(question: QuestionRequest) -> QAResponse:
        engine = get_sqlalchemy_engine()
        with Session(engine, expire_on_commit=False) as db_session:
+            # This also handles creating the query event in postgres
            answer = answer_qa_query(
                question=question,
                user=None,
@@ -54,7 +56,7 @@ def handle_message(
            QuestionRequest(
                query=msg,
                collection=DOCUMENT_INDEX_NAME,
-                use_keyword=False,  # always use semantic search when handling slack messages
+                use_keyword=False,  # always use semantic search when handling Slack messages
                filters=None,
                offset=None,
            )
@@ -75,6 +77,14 @@ def handle_message(
            )
        return

+    if answer.eval_res_valid is False:
+        logger.info(
+            "Answer was evaluated to be invalid, throwing it away without responding."
+        )
+        if answer.answer:
+            logger.debug(answer.answer)
+        return
+
    if not answer.top_ranked_docs:
        logger.error(f"Unable to answer question: '{msg}' - no documents found")
        # Optionally, respond in thread with the error message, Used primarily
@@ -96,21 +106,26 @@ def handle_message(
        return

    # convert raw response into "nicely" formatted Slack message
-    blocks = build_qa_response_blocks(
+    answer_blocks = build_qa_response_blocks(
        query_event_id=answer.query_event_id,
        answer=answer.answer,
-        documents=answer.top_ranked_docs,
        quotes=answer.quotes,
    )
+
+    document_blocks = build_documents_blocks(
+        documents=answer.top_ranked_docs, query_event_id=answer.query_event_id
+    )
+
    try:
        respond_in_thread(
            client=client,
            channel=channel,
-            blocks=blocks,
+            blocks=answer_blocks + document_blocks,
            thread_ts=message_ts_to_respond_to,
            # don't unfurl, since otherwise we will have 5+ previews which makes the message very long
            unfurl=False,
        )
+
    except Exception:
        logger.exception(
            f"Unable to process message - could not respond in slack in {num_retries} attempts"
--- a/backend/danswer/bots/slack/listener.py
+++ b/backend/danswer/bots/slack/listener.py
@@ -9,12 +9,9 @@ from slack_sdk.socket_mode import SocketModeClient
 from slack_sdk.socket_mode.request import SocketModeRequest
 from slack_sdk.socket_mode.response import SocketModeResponse

-from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
-from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
-from danswer.bots.slack.handlers.handle_feedback import handle_qa_feedback
+from danswer.bots.slack.handlers.handle_feedback import handle_slack_feedback
 from danswer.bots.slack.handlers.handle_message import handle_message
-from danswer.bots.slack.utils import get_query_event_id_from_block_id
-from danswer.configs.constants import QAFeedbackType
+from danswer.bots.slack.utils import decompose_block_id
 from danswer.utils.logger import setup_logger

 logger = setup_logger()
@@ -54,7 +51,7 @@ def _get_socket_client() -> SocketModeClient:


 def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None:
-    logger.info(f"Received request of type: '{req.type}', with paylod: '{req.payload}'")
+    logger.info(f"Received Slack request of type: '{req.type}'")
    if req.type == "events_api":
        # Acknowledge the request immediately
        response = SocketModeResponse(envelope_id=req.envelope_id)
@@ -95,7 +92,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No

        message_ts = event.get("ts")
        thread_ts = event.get("thread_ts")
-        # pick the root of the thread (if a thread exists)
+        # Pick the root of the thread (if a thread exists)
        message_ts_to_respond_to = cast(str, thread_ts or message_ts)
        if thread_ts and message_ts != thread_ts:
            channel_specific_logger.info(
@@ -122,7 +119,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No
            f"Successfully processed message with ts: '{message_ts}'"
        )

-    # handle button clicks
+    # Handle button clicks
    if req.type == "interactive" and req.payload.get("type") == "block_actions":
        # Acknowledge the request immediately
        response = SocketModeResponse(envelope_id=req.envelope_id)
@@ -134,31 +131,22 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No
            return

        action = cast(dict[str, Any], actions[0])
-        action_id = action.get("action_id")
-        if action_id == LIKE_BLOCK_ACTION_ID:
-            feedback_type = QAFeedbackType.LIKE
-        elif action_id == DISLIKE_BLOCK_ACTION_ID:
-            feedback_type = QAFeedbackType.DISLIKE
-        else:
-            logger.error(
-                f"Unable to process block action - unknown action_id: '{action_id}'"
-            )
-            return
-
+        action_id = cast(str, action.get("action_id"))
        block_id = cast(str, action.get("block_id"))
        user_id = cast(str, req.payload["user"]["id"])
        channel_id = cast(str, req.payload["container"]["channel_id"])
        thread_ts = cast(str, req.payload["container"]["thread_ts"])
-        query_event_id = get_query_event_id_from_block_id(block_id)
-        handle_qa_feedback(
-            query_id=query_event_id,
-            feedback_type=feedback_type,
+
+        handle_slack_feedback(
+            block_id=block_id,
+            feedback_type=action_id,
            client=client.web_client,
            user_id_to_post_confirmation=user_id,
            channel_id_to_post_confirmation=channel_id,
            thread_ts_to_post_confirmation=thread_ts,
        )

+        query_event_id, _, _ = decompose_block_id(block_id)
        logger.info(f"Successfully handled QA feedback for event: {query_event_id}")


@@ -181,6 +169,7 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
 if __name__ == "__main__":
    socket_client = _get_socket_client()
    socket_client.socket_mode_request_listeners.append(process_slack_event)  # type: ignore
+
    # Establish a WebSocket connection to the Socket Mode servers
    logger.info("Listening for messages from Slack...")
    socket_client.connect()
--- a/backend/danswer/bots/slack/utils.py
+++ b/backend/danswer/bots/slack/utils.py
@@ -1,5 +1,6 @@
 import logging
 import random
+import re
 import string
 from typing import cast

@@ -9,9 +10,10 @@ from slack_sdk.models.blocks import Block
 from slack_sdk.models.metadata import Metadata

 from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
+from danswer.configs.constants import ID_SEPARATOR
 from danswer.connectors.slack.utils import make_slack_api_rate_limited
 from danswer.utils.logger import setup_logger
-
+from danswer.utils.text_processing import replace_whitespaces_w_space

 logger = setup_logger()

@@ -34,11 +36,6 @@ def respond_in_thread(
    if not text and not blocks:
        raise ValueError("One of `text` or `blocks` must be provided")

-    if text:
-        logger.debug(f"Trying to send message: {text}")
-    if blocks:
-        logger.debug(f"Trying to send blocks: {blocks}")
-
    slack_call = make_slack_api_rate_limited(client.chat_postMessage)
    response = slack_call(
        channel=channel,
@@ -53,9 +50,63 @@ def respond_in_thread(
        raise RuntimeError(f"Unable to post message: {response}")


-def build_block_id_from_query_event_id(query_event_id: int) -> str:
-    return f"{''.join(random.choice(string.ascii_letters) for _ in range(5))}:{query_event_id}"
+def build_feedback_block_id(
+    query_event_id: int,
+    document_id: str | None = None,
+    document_rank: int | None = None,
+) -> str:
+    unique_prefix = "".join(random.choice(string.ascii_letters) for _ in range(10))
+    if document_id is not None:
+        if not document_id or document_rank is None:
+            raise ValueError("Invalid document, missing information")
+        if ID_SEPARATOR in document_id:
+            raise ValueError(
+                "Separator pattern should not already exist in document id"
+            )
+        block_id = ID_SEPARATOR.join(
+            [str(query_event_id), document_id, str(document_rank)]
+        )
+    else:
+        block_id = str(query_event_id)
+
+    return unique_prefix + ID_SEPARATOR + block_id


-def get_query_event_id_from_block_id(block_id: str) -> int:
-    return int(block_id.split(":")[-1])
+def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]:
+    """Decompose into query_id, document_id, document_rank, see above function"""
+    try:
+        components = block_id.split(ID_SEPARATOR)
+        if len(components) != 2 and len(components) != 4:
+            raise ValueError("Block ID does not contain right number of elements")
+
+        if len(components) == 2:
+            return int(components[-1]), None, None
+
+        return int(components[1]), components[2], int(components[3])
+
+    except Exception as e:
+        logger.error(e)
+        raise ValueError("Received invalid Feedback Block Identifier")
+
+
+def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
+    def _replace_highlight(s: str) -> str:
+        s = re.sub(r"</hi>(?=\S)", "", s)
+        s = re.sub(r"(?<=\S)<hi>", "", s)
+        s = s.replace("</hi>", "*").replace("<hi>", "*")
+        return s
+
+    final_matches = [
+        replace_whitespaces_w_space(_replace_highlight(match_str)).strip()
+        for match_str in match_strs
+        if match_str
+    ]
+    combined = "... ".join(final_matches)
+
+    # Slack introduces "Show More" after 300 on desktop which is ugly
+    # But don't trim the message if there is still a highlight after 300 chars
+    remaining = 300 - used_chars
+    if len(combined) > remaining and "*" not in combined[remaining:]:
+        combined = combined[: remaining - 3] + "..."
+
+    return combined
--- a/backend/danswer/chunking/models.py
+++ b/backend/danswer/chunking/models.py
@@ -68,6 +68,18 @@ class InferenceChunk(BaseChunk):
    # ["<hi>the</hi> <hi>answer</hi> is 42", "he couldn't find an <hi>answer</hi>"]
    match_highlights: list[str]

+    def __repr__(self) -> str:
+        blurb_words = self.blurb.split()
+        short_blurb = ""
+        for word in blurb_words:
+            if not short_blurb:
+                short_blurb = word
+                continue
+            if len(short_blurb) > 25:
+                break
+            short_blurb += " " + word
+        return f"Inference Chunk: {self.document_id} - {short_blurb}..."
+
    @classmethod
    def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk":
        init_kwargs = {
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -224,3 +224,6 @@ DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(
 ENABLE_DANSWERBOT_REFLEXION = (
    os.environ.get("ENABLE_DANSWERBOT_REFLEXION", "").lower() == "true"
 )
+ENABLE_SLACK_DOC_FEEDBACK = (
+    os.environ.get("ENABLE_SLACK_DOC_FEEDBACK", "").lower() == "true"
+)
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -23,6 +23,7 @@ PUBLIC_DOC_PAT = "PUBLIC"
 QUOTE = "quote"
 BOOST = "boost"
 SCORE = "score"
+ID_SEPARATOR = ":;:"
 DEFAULT_BOOST = 0

 # Prompt building constants:
--- a/backend/danswer/db/feedback.py
+++ b/backend/danswer/db/feedback.py
@@ -111,6 +111,7 @@ def create_doc_retrieval_feedback(
    clicked: bool = False,
    feedback: SearchFeedbackType | None = None,
 ) -> None:
+    """Creates a new Document feedback row and updates the boost value in Postgres and Vespa"""
    if not clicked and feedback is None:
        raise ValueError("No action taken, not valid feedback")

--- a/backend/danswer/direct_qa/answer_question.py
+++ b/backend/danswer/direct_qa/answer_question.py
@@ -137,17 +137,17 @@ def answer_qa_query(
        if d_answer.answer is not None:
            valid = get_answer_validity(query, d_answer.answer)

-        if not valid:
-            return QAResponse(
-                answer=None,
-                quotes=None,
-                top_ranked_docs=chunks_to_search_docs(ranked_chunks),
-                lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
-                predicted_flow=predicted_flow,
-                predicted_search=predicted_search,
-                error_msg=error_msg,
-                query_event_id=query_event_id,
-            )
+        return QAResponse(
+            answer=d_answer.answer if d_answer else None,
+            quotes=quotes.quotes if quotes else None,
+            top_ranked_docs=chunks_to_search_docs(ranked_chunks),
+            lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
+            predicted_flow=predicted_flow,
+            predicted_search=predicted_search,
+            eval_res_valid=True if valid else False,
+            error_msg=error_msg,
+            query_event_id=query_event_id,
+        )

    return QAResponse(
        answer=d_answer.answer if d_answer else None,
--- a/backend/danswer/direct_qa/qa_block.py
+++ b/backend/danswer/direct_qa/qa_block.py
@@ -1,5 +1,6 @@
 import abc
 import json
+import re
 from collections.abc import Iterator
 from copy import copy

@@ -157,12 +158,11 @@ class SingleMessageScratchpadHandler(QAHandler):

        model_clean = clean_up_code_blocks(model_output)

-        answer_start = model_clean.find('{"answer":')
-        # Only found thoughts, no final answer
-        if answer_start == -1:
+        match = re.search(r'{\s*"answer":', model_clean)
+        if not match:
            return DanswerAnswer(answer=None), DanswerQuotes(quotes=[])

-        final_json = escape_newlines(model_clean[answer_start:])
+        final_json = escape_newlines(model_clean[match.start() :])

        return process_answer(
            final_json, context_chunks, is_json_prompt=self.is_json_output
--- a/backend/danswer/direct_qa/qa_utils.py
+++ b/backend/danswer/direct_qa/qa_utils.py
@@ -88,10 +88,8 @@ def separate_answer_quotes(
        return extract_answer_quotes_json(model_raw_json)
    except ValueError:
        if is_json_prompt:
-            logger.error(
-                "Model did not output in json format as expected, "
-                "trying to parse it regardless"
-            )
+            logger.error("Model did not output in json format as expected.")
+            raise
        return extract_answer_quotes_freeform(answer_raw)


--- a/backend/danswer/search/semantic_search.py
+++ b/backend/danswer/search/semantic_search.py
@@ -107,7 +107,7 @@ def retrieve_ranked_documents(
            f"Semantic search returned no results with filters: {filters_log_msg}"
        )
        return None, None
-    logger.info(top_chunks)
+    logger.debug(top_chunks)
    ranked_chunks = semantic_reranking(query, top_chunks[:num_rerank])

    top_docs = [
--- a/backend/danswer/secondary_llm_flows/answer_validation.py
+++ b/backend/danswer/secondary_llm_flows/answer_validation.py
@@ -1,7 +1,7 @@
 from danswer.configs.constants import ANSWER_PAT
 from danswer.configs.constants import CODE_BLOCK_PAT
+from danswer.configs.constants import FINAL_ANSWER_PAT
 from danswer.configs.constants import GENERAL_SEP_PAT
-from danswer.configs.constants import INVALID_PAT
 from danswer.configs.constants import QUESTION_PAT
 from danswer.configs.constants import THOUGHT_PAT
 from danswer.direct_qa.qa_block import dict_based_prompt_to_langchain_prompt
@@ -12,56 +12,55 @@ from danswer.utils.timing import log_function_time
 logger = setup_logger()


-def get_answer_validation_messages(query: str, answer: str) -> list[dict[str, str]]:
-    cot_block = (
-        f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning "
-        f"about EACH criterion to ensure that your conclusion is correct.\n"
-        f"{INVALID_PAT} True or False"
-    )
-
-    q_a_block = f"{QUESTION_PAT} {query}\n\n" f"{ANSWER_PAT} {answer}"
-
-    messages = [
-        {
-            "role": "user",
-            "content": (
-                f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n"
-                "Determine if the answer is valid for the query.\n"
-                f"The answer is invalid if ANY of the following is true:\n"
-                "- Does not directly answer the user query.\n"
-                "- Answers a related but different question.\n"
-                '- Contains anything meaning "I don\'t know" or "information not found".\n\n'
-                f"You must use the following format:"
-                f"{CODE_BLOCK_PAT.format(cot_block)}"
-                f'Hint: Invalid must be exactly "True" or "False" (without the quotes)'
-            ),
-        },
-    ]
-
-    return messages
-
-
-def extract_validity(model_output: str) -> bool:
-    if INVALID_PAT in model_output:
-        result = model_output.split(INVALID_PAT)[-1].strip()
-        if "true" in result.lower():
-            return False
-    return True  # If something is wrong, let's not toss away the answer
-
-
@log_function_time()
 def get_answer_validity(
    query: str,
    answer: str,
 ) -> bool:
-    messages = get_answer_validation_messages(query, answer)
+    def _get_answer_validation_messages(
+        query: str, answer: str
+    ) -> list[dict[str, str]]:
+        cot_block = (
+            f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning "
+            f"about EACH criterion to ensure that your conclusion is correct. "
+            f"Be brief when evaluating each condition.\n"
+            f"{FINAL_ANSWER_PAT} Valid or Invalid"
+        )
+
+        q_a_block = f"{QUESTION_PAT} {query}\n\n{ANSWER_PAT} {answer}"
+
+        messages = [
+            {
+                "role": "user",
+                "content": (
+                    f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n"
+                    "Determine if the answer is valid for the query.\n"
+                    f"The answer is invalid if ANY of the following is true:\n"
+                    "1. Does not directly answer the user query.\n"
+                    "2. Answers a related but different question.\n"
+                    "3. Query requires a subjective answer or an opinion.\n"
+                    '4. Contains anything meaning "I don\'t know" or "information not found".\n\n'
+                    f"You must use the following format:"
+                    f"{CODE_BLOCK_PAT.format(cot_block)}"
+                    f'Hint: Final Answer must be exactly "Valid" or "Invalid"'
+                ),
+            },
+        ]
+
+        return messages
+
+    def _extract_validity(model_output: str) -> bool:
+        if FINAL_ANSWER_PAT in model_output:
+            result = model_output.split(FINAL_ANSWER_PAT)[-1].strip()
+            if "invalid" in result.lower():
+                return False
+        return True  # If something is wrong, let's not toss away the answer
+
+    messages = _get_answer_validation_messages(query, answer)
    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
    model_output = get_default_llm().invoke(filled_llm_prompt)
    logger.debug(model_output)

-    validity = extract_validity(model_output)
-    logger.info(
-        f'LLM Answer of "{answer}" was determined to be {"valid" if validity else "invalid"}.'
-    )
+    validity = _extract_validity(model_output)

    return validity
--- a/backend/danswer/server/models.py
+++ b/backend/danswer/server/models.py
@@ -224,6 +224,7 @@ class QAResponse(SearchResponse):
    quotes: list[DanswerQuote] | None
    predicted_flow: QueryFlow
    predicted_search: SearchType
+    eval_res_valid: bool | None = None
    error_msg: str | None = None


--- a/backend/danswer/utils/text_processing.py
+++ b/backend/danswer/utils/text_processing.py
@@ -14,6 +14,10 @@ def escape_newlines(s: str) -> str:
    return re.sub(r"(?<!\\)\n", "\\\\n", s)


+def replace_whitespaces_w_space(s: str) -> str:
+    return re.sub(r"\s", " ", s)
+
+
 def extract_embedded_json(s: str) -> dict:
    first_brace_index = s.find("{")
    last_brace_index = s.rfind("}")