mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Slack Bot Interface Rework (#454)
This commit is contained in:
@@ -1,21 +1,31 @@
|
||||
from slack_sdk.models.blocks import ActionsBlock
|
||||
from slack_sdk.models.blocks import Block
|
||||
from slack_sdk.models.blocks import ButtonElement
|
||||
from slack_sdk.models.blocks import ConfirmObject
|
||||
from slack_sdk.models.blocks import DividerBlock
|
||||
from slack_sdk.models.blocks import HeaderBlock
|
||||
from slack_sdk.models.blocks import SectionBlock
|
||||
|
||||
from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.utils import build_block_id_from_query_event_id
|
||||
from danswer.bots.slack.utils import build_feedback_block_id
|
||||
from danswer.bots.slack.utils import translate_vespa_highlight_to_slack
|
||||
from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
|
||||
from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import SearchFeedbackType
|
||||
from danswer.connectors.slack.utils import UserIdReplacer
|
||||
from danswer.direct_qa.interfaces import DanswerQuote
|
||||
from danswer.server.models import SearchDoc
|
||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||
|
||||
|
||||
def build_feedback_block(query_event_id: int) -> Block:
|
||||
_MAX_BLURB_LEN = 75
|
||||
|
||||
|
||||
def build_qa_feedback_block(query_event_id: int) -> Block:
|
||||
return ActionsBlock(
|
||||
block_id=build_block_id_from_query_event_id(query_event_id),
|
||||
block_id=build_feedback_block_id(query_event_id),
|
||||
elements=[
|
||||
ButtonElement(
|
||||
action_id=LIKE_BLOCK_ACTION_ID,
|
||||
@@ -31,11 +41,38 @@ def build_feedback_block(query_event_id: int) -> Block:
|
||||
)
|
||||
|
||||
|
||||
_MAX_BLURB_LEN = 75
|
||||
def build_doc_feedback_block(
|
||||
query_event_id: int,
|
||||
document_id: str,
|
||||
document_rank: int,
|
||||
) -> Block:
|
||||
return ActionsBlock(
|
||||
block_id=build_feedback_block_id(query_event_id, document_id, document_rank),
|
||||
elements=[
|
||||
ButtonElement(
|
||||
action_id=SearchFeedbackType.ENDORSE.value,
|
||||
text="⬆",
|
||||
style="primary",
|
||||
confirm=ConfirmObject(
|
||||
title="Endorse this Document",
|
||||
text="This is a good source of information and should be shown more often!",
|
||||
),
|
||||
),
|
||||
ButtonElement(
|
||||
action_id=SearchFeedbackType.REJECT.value,
|
||||
text="⬇",
|
||||
style="danger",
|
||||
confirm=ConfirmObject(
|
||||
title="Reject this Document",
|
||||
text="This is a bad source of information and should be shown less often.",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _build_custom_semantic_identifier(
|
||||
semantic_identifier: str, blurb: str, source: str
|
||||
semantic_identifier: str, match_str: str, source: str
|
||||
) -> str:
|
||||
"""
|
||||
On slack, since we just show the semantic identifier rather than semantic + blurb, we need
|
||||
@@ -43,7 +80,9 @@ def _build_custom_semantic_identifier(
|
||||
"""
|
||||
if source == DocumentSource.SLACK.value:
|
||||
truncated_blurb = (
|
||||
f"{blurb[:_MAX_BLURB_LEN]}..." if len(blurb) > _MAX_BLURB_LEN else blurb
|
||||
f"{match_str[:_MAX_BLURB_LEN]}..."
|
||||
if len(match_str) > _MAX_BLURB_LEN
|
||||
else match_str
|
||||
)
|
||||
# NOTE: removing tags so that we don't accidentally tag users in Slack +
|
||||
# so that it can be used as part of a <link|text> link
|
||||
@@ -61,37 +100,51 @@ def _build_custom_semantic_identifier(
|
||||
return semantic_identifier
|
||||
|
||||
|
||||
def build_documents_block(
|
||||
def build_documents_blocks(
|
||||
documents: list[SearchDoc],
|
||||
already_displayed_doc_identifiers: list[str],
|
||||
query_event_id: int,
|
||||
num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
|
||||
) -> SectionBlock:
|
||||
seen_docs_identifiers = set(already_displayed_doc_identifiers)
|
||||
top_document_lines: list[str] = []
|
||||
for d in documents:
|
||||
include_feedback: bool = ENABLE_SLACK_DOC_FEEDBACK,
|
||||
) -> list[Block]:
|
||||
seen_docs_identifiers = set()
|
||||
section_blocks: list[Block] = [HeaderBlock(text="Reference Documents")]
|
||||
included_docs = 0
|
||||
for rank, d in enumerate(documents):
|
||||
if d.document_id in seen_docs_identifiers:
|
||||
continue
|
||||
seen_docs_identifiers.add(d.document_id)
|
||||
|
||||
custom_semantic_identifier = _build_custom_semantic_identifier(
|
||||
semantic_identifier=d.semantic_identifier,
|
||||
blurb=d.blurb,
|
||||
source=d.source_type,
|
||||
used_chars = len(d.semantic_identifier) + 3
|
||||
match_str = translate_vespa_highlight_to_slack(d.match_highlights, used_chars)
|
||||
|
||||
included_docs += 1
|
||||
|
||||
section_blocks.append(
|
||||
SectionBlock(
|
||||
fields=[
|
||||
f"<{d.link}|{d.semantic_identifier}>:\n>{match_str}",
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
top_document_lines.append(f"- <{d.link}|{custom_semantic_identifier}>")
|
||||
if len(top_document_lines) >= num_docs_to_display:
|
||||
if include_feedback:
|
||||
section_blocks.append(
|
||||
build_doc_feedback_block(
|
||||
query_event_id=query_event_id,
|
||||
document_id=d.document_id,
|
||||
document_rank=rank,
|
||||
),
|
||||
)
|
||||
|
||||
section_blocks.append(DividerBlock())
|
||||
|
||||
if included_docs >= num_docs_to_display:
|
||||
break
|
||||
|
||||
return SectionBlock(
|
||||
fields=[
|
||||
"*Other potentially relevant docs:*",
|
||||
*top_document_lines,
|
||||
]
|
||||
)
|
||||
return section_blocks
|
||||
|
||||
|
||||
def build_quotes_block(
|
||||
def build_blurb_quotes_block(
|
||||
quotes: list[DanswerQuote],
|
||||
) -> tuple[list[Block], list[str]]:
|
||||
quote_lines: list[str] = []
|
||||
@@ -104,7 +157,7 @@ def build_quotes_block(
|
||||
doc_identifiers.append(doc_id)
|
||||
custom_semantic_identifier = _build_custom_semantic_identifier(
|
||||
semantic_identifier=doc_name,
|
||||
blurb=quote.blurb,
|
||||
match_str=quote.blurb,
|
||||
source=quote.source_type,
|
||||
)
|
||||
quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
|
||||
@@ -125,14 +178,58 @@ def build_quotes_block(
|
||||
)
|
||||
|
||||
|
||||
def build_quotes_block(
|
||||
quotes: list[DanswerQuote],
|
||||
) -> list[Block]:
|
||||
quote_lines: list[str] = []
|
||||
doc_to_quotes: dict[str, list[str]] = {}
|
||||
doc_to_link: dict[str, str] = {}
|
||||
doc_to_sem_id: dict[str, str] = {}
|
||||
for q in quotes:
|
||||
quote = q.quote
|
||||
doc_id = q.document_id
|
||||
doc_link = q.link
|
||||
doc_name = q.semantic_identifier
|
||||
if doc_link and doc_name and doc_id and quote:
|
||||
if doc_id not in doc_to_quotes:
|
||||
doc_to_quotes[doc_id] = [quote]
|
||||
doc_to_link[doc_id] = doc_link
|
||||
doc_to_sem_id[doc_id] = doc_name
|
||||
else:
|
||||
doc_to_quotes[doc_id].append(quote)
|
||||
|
||||
for doc_id, quote_strs in doc_to_quotes.items():
|
||||
quotes_str_clean = [
|
||||
replace_whitespaces_w_space(q_str).strip() for q_str in quote_strs
|
||||
]
|
||||
longest_quotes = sorted(quotes_str_clean, key=len, reverse=True)[:5]
|
||||
single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
|
||||
link = doc_to_link[doc_id]
|
||||
sem_id = doc_to_sem_id[doc_id]
|
||||
quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}")
|
||||
|
||||
if not doc_to_quotes:
|
||||
return []
|
||||
|
||||
return [
|
||||
SectionBlock(
|
||||
fields=[
|
||||
"*Relevant Snippets:*",
|
||||
*quote_lines,
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def build_qa_response_blocks(
|
||||
query_event_id: int,
|
||||
answer: str | None,
|
||||
quotes: list[DanswerQuote] | None,
|
||||
documents: list[SearchDoc],
|
||||
) -> list[Block]:
|
||||
doc_identifiers: list[str] = []
|
||||
quotes_blocks: list[Block] = []
|
||||
|
||||
ai_answer_header = HeaderBlock(text="AI Answer")
|
||||
|
||||
if not answer:
|
||||
answer_block = SectionBlock(
|
||||
text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓"
|
||||
@@ -140,7 +237,7 @@ def build_qa_response_blocks(
|
||||
else:
|
||||
answer_block = SectionBlock(text=answer)
|
||||
if quotes:
|
||||
quotes_blocks, doc_identifiers = build_quotes_block(quotes)
|
||||
quotes_blocks = build_quotes_block(quotes)
|
||||
|
||||
# if no quotes OR `build_quotes_block()` did not give back any blocks
|
||||
if not quotes_blocks:
|
||||
@@ -150,9 +247,13 @@ def build_qa_response_blocks(
|
||||
)
|
||||
]
|
||||
|
||||
documents_block = build_documents_block(documents, doc_identifiers)
|
||||
feedback_block = build_qa_feedback_block(query_event_id=query_event_id)
|
||||
return (
|
||||
[answer_block]
|
||||
[
|
||||
ai_answer_header,
|
||||
answer_block,
|
||||
feedback_block,
|
||||
]
|
||||
+ quotes_blocks
|
||||
+ [documents_block, build_feedback_block(query_event_id=query_event_id)]
|
||||
+ [DividerBlock()]
|
||||
)
|
||||
|
@@ -1,27 +1,56 @@
|
||||
from slack_sdk import WebClient
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.utils import decompose_block_id
|
||||
from danswer.configs.constants import QAFeedbackType
|
||||
from danswer.configs.constants import SearchFeedbackType
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.feedback import create_doc_retrieval_feedback
|
||||
from danswer.db.feedback import update_query_event_feedback
|
||||
|
||||
|
||||
def handle_qa_feedback(
|
||||
query_id: int,
|
||||
feedback_type: QAFeedbackType,
|
||||
def handle_slack_feedback(
|
||||
block_id: str,
|
||||
feedback_type: str,
|
||||
client: WebClient,
|
||||
user_id_to_post_confirmation: str,
|
||||
channel_id_to_post_confirmation: str,
|
||||
thread_ts_to_post_confirmation: str,
|
||||
) -> None:
|
||||
engine = get_sqlalchemy_engine()
|
||||
|
||||
query_id, doc_id, doc_rank = decompose_block_id(block_id)
|
||||
|
||||
with Session(engine) as db_session:
|
||||
update_query_event_feedback(
|
||||
feedback=feedback_type,
|
||||
query_id=query_id,
|
||||
user_id=None, # no "user" for Slack bot for now
|
||||
db_session=db_session,
|
||||
)
|
||||
if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]:
|
||||
update_query_event_feedback(
|
||||
feedback=QAFeedbackType.LIKE
|
||||
if feedback_type == LIKE_BLOCK_ACTION_ID
|
||||
else QAFeedbackType.DISLIKE,
|
||||
query_id=query_id,
|
||||
user_id=None, # no "user" for Slack bot for now
|
||||
db_session=db_session,
|
||||
)
|
||||
if feedback_type in [
|
||||
SearchFeedbackType.ENDORSE.value,
|
||||
SearchFeedbackType.REJECT.value,
|
||||
]:
|
||||
if doc_id is None or doc_rank is None:
|
||||
raise ValueError("Missing information for Document Feedback")
|
||||
|
||||
create_doc_retrieval_feedback(
|
||||
qa_event_id=query_id,
|
||||
document_id=doc_id,
|
||||
document_rank=doc_rank,
|
||||
user_id=None,
|
||||
db_session=db_session,
|
||||
clicked=False, # Not tracking this for Slack
|
||||
feedback=SearchFeedbackType.ENDORSE
|
||||
if feedback_type == SearchFeedbackType.ENDORSE.value
|
||||
else SearchFeedbackType.REJECT,
|
||||
)
|
||||
|
||||
# post message to slack confirming that feedback was received
|
||||
client.chat_postEphemeral(
|
||||
|
@@ -4,6 +4,7 @@ from retry import retry
|
||||
from slack_sdk import WebClient
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.bots.slack.blocks import build_documents_blocks
|
||||
from danswer.bots.slack.blocks import build_qa_response_blocks
|
||||
from danswer.bots.slack.utils import respond_in_thread
|
||||
from danswer.configs.app_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT
|
||||
@@ -37,6 +38,7 @@ def handle_message(
|
||||
def _get_answer(question: QuestionRequest) -> QAResponse:
|
||||
engine = get_sqlalchemy_engine()
|
||||
with Session(engine, expire_on_commit=False) as db_session:
|
||||
# This also handles creating the query event in postgres
|
||||
answer = answer_qa_query(
|
||||
question=question,
|
||||
user=None,
|
||||
@@ -54,7 +56,7 @@ def handle_message(
|
||||
QuestionRequest(
|
||||
query=msg,
|
||||
collection=DOCUMENT_INDEX_NAME,
|
||||
use_keyword=False, # always use semantic search when handling slack messages
|
||||
use_keyword=False, # always use semantic search when handling Slack messages
|
||||
filters=None,
|
||||
offset=None,
|
||||
)
|
||||
@@ -75,6 +77,14 @@ def handle_message(
|
||||
)
|
||||
return
|
||||
|
||||
if answer.eval_res_valid is False:
|
||||
logger.info(
|
||||
"Answer was evaluated to be invalid, throwing it away without responding."
|
||||
)
|
||||
if answer.answer:
|
||||
logger.debug(answer.answer)
|
||||
return
|
||||
|
||||
if not answer.top_ranked_docs:
|
||||
logger.error(f"Unable to answer question: '{msg}' - no documents found")
|
||||
# Optionally, respond in thread with the error message, Used primarily
|
||||
@@ -96,21 +106,26 @@ def handle_message(
|
||||
return
|
||||
|
||||
# convert raw response into "nicely" formatted Slack message
|
||||
blocks = build_qa_response_blocks(
|
||||
answer_blocks = build_qa_response_blocks(
|
||||
query_event_id=answer.query_event_id,
|
||||
answer=answer.answer,
|
||||
documents=answer.top_ranked_docs,
|
||||
quotes=answer.quotes,
|
||||
)
|
||||
|
||||
document_blocks = build_documents_blocks(
|
||||
documents=answer.top_ranked_docs, query_event_id=answer.query_event_id
|
||||
)
|
||||
|
||||
try:
|
||||
respond_in_thread(
|
||||
client=client,
|
||||
channel=channel,
|
||||
blocks=blocks,
|
||||
blocks=answer_blocks + document_blocks,
|
||||
thread_ts=message_ts_to_respond_to,
|
||||
# don't unfurl, since otherwise we will have 5+ previews which makes the message very long
|
||||
unfurl=False,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Unable to process message - could not respond in slack in {num_retries} attempts"
|
||||
|
@@ -9,12 +9,9 @@ from slack_sdk.socket_mode import SocketModeClient
|
||||
from slack_sdk.socket_mode.request import SocketModeRequest
|
||||
from slack_sdk.socket_mode.response import SocketModeResponse
|
||||
|
||||
from danswer.bots.slack.constants import DISLIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.constants import LIKE_BLOCK_ACTION_ID
|
||||
from danswer.bots.slack.handlers.handle_feedback import handle_qa_feedback
|
||||
from danswer.bots.slack.handlers.handle_feedback import handle_slack_feedback
|
||||
from danswer.bots.slack.handlers.handle_message import handle_message
|
||||
from danswer.bots.slack.utils import get_query_event_id_from_block_id
|
||||
from danswer.configs.constants import QAFeedbackType
|
||||
from danswer.bots.slack.utils import decompose_block_id
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@@ -54,7 +51,7 @@ def _get_socket_client() -> SocketModeClient:
|
||||
|
||||
|
||||
def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None:
|
||||
logger.info(f"Received request of type: '{req.type}', with paylod: '{req.payload}'")
|
||||
logger.info(f"Received Slack request of type: '{req.type}'")
|
||||
if req.type == "events_api":
|
||||
# Acknowledge the request immediately
|
||||
response = SocketModeResponse(envelope_id=req.envelope_id)
|
||||
@@ -95,7 +92,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No
|
||||
|
||||
message_ts = event.get("ts")
|
||||
thread_ts = event.get("thread_ts")
|
||||
# pick the root of the thread (if a thread exists)
|
||||
# Pick the root of the thread (if a thread exists)
|
||||
message_ts_to_respond_to = cast(str, thread_ts or message_ts)
|
||||
if thread_ts and message_ts != thread_ts:
|
||||
channel_specific_logger.info(
|
||||
@@ -122,7 +119,7 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No
|
||||
f"Successfully processed message with ts: '{message_ts}'"
|
||||
)
|
||||
|
||||
# handle button clicks
|
||||
# Handle button clicks
|
||||
if req.type == "interactive" and req.payload.get("type") == "block_actions":
|
||||
# Acknowledge the request immediately
|
||||
response = SocketModeResponse(envelope_id=req.envelope_id)
|
||||
@@ -134,31 +131,22 @@ def _process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> No
|
||||
return
|
||||
|
||||
action = cast(dict[str, Any], actions[0])
|
||||
action_id = action.get("action_id")
|
||||
if action_id == LIKE_BLOCK_ACTION_ID:
|
||||
feedback_type = QAFeedbackType.LIKE
|
||||
elif action_id == DISLIKE_BLOCK_ACTION_ID:
|
||||
feedback_type = QAFeedbackType.DISLIKE
|
||||
else:
|
||||
logger.error(
|
||||
f"Unable to process block action - unknown action_id: '{action_id}'"
|
||||
)
|
||||
return
|
||||
|
||||
action_id = cast(str, action.get("action_id"))
|
||||
block_id = cast(str, action.get("block_id"))
|
||||
user_id = cast(str, req.payload["user"]["id"])
|
||||
channel_id = cast(str, req.payload["container"]["channel_id"])
|
||||
thread_ts = cast(str, req.payload["container"]["thread_ts"])
|
||||
query_event_id = get_query_event_id_from_block_id(block_id)
|
||||
handle_qa_feedback(
|
||||
query_id=query_event_id,
|
||||
feedback_type=feedback_type,
|
||||
|
||||
handle_slack_feedback(
|
||||
block_id=block_id,
|
||||
feedback_type=action_id,
|
||||
client=client.web_client,
|
||||
user_id_to_post_confirmation=user_id,
|
||||
channel_id_to_post_confirmation=channel_id,
|
||||
thread_ts_to_post_confirmation=thread_ts,
|
||||
)
|
||||
|
||||
query_event_id, _, _ = decompose_block_id(block_id)
|
||||
logger.info(f"Successfully handled QA feedback for event: {query_event_id}")
|
||||
|
||||
|
||||
@@ -181,6 +169,7 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
|
||||
if __name__ == "__main__":
|
||||
socket_client = _get_socket_client()
|
||||
socket_client.socket_mode_request_listeners.append(process_slack_event) # type: ignore
|
||||
|
||||
# Establish a WebSocket connection to the Socket Mode servers
|
||||
logger.info("Listening for messages from Slack...")
|
||||
socket_client.connect()
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
from typing import cast
|
||||
|
||||
@@ -9,9 +10,10 @@ from slack_sdk.models.blocks import Block
|
||||
from slack_sdk.models.metadata import Metadata
|
||||
|
||||
from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
|
||||
from danswer.configs.constants import ID_SEPARATOR
|
||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -34,11 +36,6 @@ def respond_in_thread(
|
||||
if not text and not blocks:
|
||||
raise ValueError("One of `text` or `blocks` must be provided")
|
||||
|
||||
if text:
|
||||
logger.debug(f"Trying to send message: {text}")
|
||||
if blocks:
|
||||
logger.debug(f"Trying to send blocks: {blocks}")
|
||||
|
||||
slack_call = make_slack_api_rate_limited(client.chat_postMessage)
|
||||
response = slack_call(
|
||||
channel=channel,
|
||||
@@ -53,9 +50,63 @@ def respond_in_thread(
|
||||
raise RuntimeError(f"Unable to post message: {response}")
|
||||
|
||||
|
||||
def build_block_id_from_query_event_id(query_event_id: int) -> str:
|
||||
return f"{''.join(random.choice(string.ascii_letters) for _ in range(5))}:{query_event_id}"
|
||||
def build_feedback_block_id(
|
||||
query_event_id: int,
|
||||
document_id: str | None = None,
|
||||
document_rank: int | None = None,
|
||||
) -> str:
|
||||
unique_prefix = "".join(random.choice(string.ascii_letters) for _ in range(10))
|
||||
if document_id is not None:
|
||||
if not document_id or document_rank is None:
|
||||
raise ValueError("Invalid document, missing information")
|
||||
if ID_SEPARATOR in document_id:
|
||||
raise ValueError(
|
||||
"Separator pattern should not already exist in document id"
|
||||
)
|
||||
block_id = ID_SEPARATOR.join(
|
||||
[str(query_event_id), document_id, str(document_rank)]
|
||||
)
|
||||
else:
|
||||
block_id = str(query_event_id)
|
||||
|
||||
return unique_prefix + ID_SEPARATOR + block_id
|
||||
|
||||
|
||||
def get_query_event_id_from_block_id(block_id: str) -> int:
|
||||
return int(block_id.split(":")[-1])
|
||||
def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]:
|
||||
"""Decompose into query_id, document_id, document_rank, see above function"""
|
||||
try:
|
||||
components = block_id.split(ID_SEPARATOR)
|
||||
if len(components) != 2 and len(components) != 4:
|
||||
raise ValueError("Block ID does not contain right number of elements")
|
||||
|
||||
if len(components) == 2:
|
||||
return int(components[-1]), None, None
|
||||
|
||||
return int(components[1]), components[2], int(components[3])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
raise ValueError("Received invalid Feedback Block Identifier")
|
||||
|
||||
|
||||
def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
|
||||
def _replace_highlight(s: str) -> str:
|
||||
s = re.sub(r"</hi>(?=\S)", "", s)
|
||||
s = re.sub(r"(?<=\S)<hi>", "", s)
|
||||
s = s.replace("</hi>", "*").replace("<hi>", "*")
|
||||
return s
|
||||
|
||||
final_matches = [
|
||||
replace_whitespaces_w_space(_replace_highlight(match_str)).strip()
|
||||
for match_str in match_strs
|
||||
if match_str
|
||||
]
|
||||
combined = "... ".join(final_matches)
|
||||
|
||||
# Slack introduces "Show More" after 300 on desktop which is ugly
|
||||
# But don't trim the message if there is still a highlight after 300 chars
|
||||
remaining = 300 - used_chars
|
||||
if len(combined) > remaining and "*" not in combined[remaining:]:
|
||||
combined = combined[: remaining - 3] + "..."
|
||||
|
||||
return combined
|
||||
|
@@ -68,6 +68,18 @@ class InferenceChunk(BaseChunk):
|
||||
# ["<hi>the</hi> <hi>answer</hi> is 42", "he couldn't find an <hi>answer</hi>"]
|
||||
match_highlights: list[str]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
blurb_words = self.blurb.split()
|
||||
short_blurb = ""
|
||||
for word in blurb_words:
|
||||
if not short_blurb:
|
||||
short_blurb = word
|
||||
continue
|
||||
if len(short_blurb) > 25:
|
||||
break
|
||||
short_blurb += " " + word
|
||||
return f"Inference Chunk: {self.document_id} - {short_blurb}..."
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk":
|
||||
init_kwargs = {
|
||||
|
@@ -224,3 +224,6 @@ DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(
|
||||
ENABLE_DANSWERBOT_REFLEXION = (
|
||||
os.environ.get("ENABLE_DANSWERBOT_REFLEXION", "").lower() == "true"
|
||||
)
|
||||
ENABLE_SLACK_DOC_FEEDBACK = (
|
||||
os.environ.get("ENABLE_SLACK_DOC_FEEDBACK", "").lower() == "true"
|
||||
)
|
||||
|
@@ -23,6 +23,7 @@ PUBLIC_DOC_PAT = "PUBLIC"
|
||||
QUOTE = "quote"
|
||||
BOOST = "boost"
|
||||
SCORE = "score"
|
||||
ID_SEPARATOR = ":;:"
|
||||
DEFAULT_BOOST = 0
|
||||
|
||||
# Prompt building constants:
|
||||
|
@@ -111,6 +111,7 @@ def create_doc_retrieval_feedback(
|
||||
clicked: bool = False,
|
||||
feedback: SearchFeedbackType | None = None,
|
||||
) -> None:
|
||||
"""Creates a new Document feedback row and updates the boost value in Postgres and Vespa"""
|
||||
if not clicked and feedback is None:
|
||||
raise ValueError("No action taken, not valid feedback")
|
||||
|
||||
|
@@ -137,17 +137,17 @@ def answer_qa_query(
|
||||
if d_answer.answer is not None:
|
||||
valid = get_answer_validity(query, d_answer.answer)
|
||||
|
||||
if not valid:
|
||||
return QAResponse(
|
||||
answer=None,
|
||||
quotes=None,
|
||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
||||
predicted_flow=predicted_flow,
|
||||
predicted_search=predicted_search,
|
||||
error_msg=error_msg,
|
||||
query_event_id=query_event_id,
|
||||
)
|
||||
return QAResponse(
|
||||
answer=d_answer.answer if d_answer else None,
|
||||
quotes=quotes.quotes if quotes else None,
|
||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
||||
predicted_flow=predicted_flow,
|
||||
predicted_search=predicted_search,
|
||||
eval_res_valid=True if valid else False,
|
||||
error_msg=error_msg,
|
||||
query_event_id=query_event_id,
|
||||
)
|
||||
|
||||
return QAResponse(
|
||||
answer=d_answer.answer if d_answer else None,
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import abc
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
from copy import copy
|
||||
|
||||
@@ -157,12 +158,11 @@ class SingleMessageScratchpadHandler(QAHandler):
|
||||
|
||||
model_clean = clean_up_code_blocks(model_output)
|
||||
|
||||
answer_start = model_clean.find('{"answer":')
|
||||
# Only found thoughts, no final answer
|
||||
if answer_start == -1:
|
||||
match = re.search(r'{\s*"answer":', model_clean)
|
||||
if not match:
|
||||
return DanswerAnswer(answer=None), DanswerQuotes(quotes=[])
|
||||
|
||||
final_json = escape_newlines(model_clean[answer_start:])
|
||||
final_json = escape_newlines(model_clean[match.start() :])
|
||||
|
||||
return process_answer(
|
||||
final_json, context_chunks, is_json_prompt=self.is_json_output
|
||||
|
@@ -88,10 +88,8 @@ def separate_answer_quotes(
|
||||
return extract_answer_quotes_json(model_raw_json)
|
||||
except ValueError:
|
||||
if is_json_prompt:
|
||||
logger.error(
|
||||
"Model did not output in json format as expected, "
|
||||
"trying to parse it regardless"
|
||||
)
|
||||
logger.error("Model did not output in json format as expected.")
|
||||
raise
|
||||
return extract_answer_quotes_freeform(answer_raw)
|
||||
|
||||
|
||||
|
@@ -107,7 +107,7 @@ def retrieve_ranked_documents(
|
||||
f"Semantic search returned no results with filters: {filters_log_msg}"
|
||||
)
|
||||
return None, None
|
||||
logger.info(top_chunks)
|
||||
logger.debug(top_chunks)
|
||||
ranked_chunks = semantic_reranking(query, top_chunks[:num_rerank])
|
||||
|
||||
top_docs = [
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from danswer.configs.constants import ANSWER_PAT
|
||||
from danswer.configs.constants import CODE_BLOCK_PAT
|
||||
from danswer.configs.constants import FINAL_ANSWER_PAT
|
||||
from danswer.configs.constants import GENERAL_SEP_PAT
|
||||
from danswer.configs.constants import INVALID_PAT
|
||||
from danswer.configs.constants import QUESTION_PAT
|
||||
from danswer.configs.constants import THOUGHT_PAT
|
||||
from danswer.direct_qa.qa_block import dict_based_prompt_to_langchain_prompt
|
||||
@@ -12,56 +12,55 @@ from danswer.utils.timing import log_function_time
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def get_answer_validation_messages(query: str, answer: str) -> list[dict[str, str]]:
|
||||
cot_block = (
|
||||
f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning "
|
||||
f"about EACH criterion to ensure that your conclusion is correct.\n"
|
||||
f"{INVALID_PAT} True or False"
|
||||
)
|
||||
|
||||
q_a_block = f"{QUESTION_PAT} {query}\n\n" f"{ANSWER_PAT} {answer}"
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n"
|
||||
"Determine if the answer is valid for the query.\n"
|
||||
f"The answer is invalid if ANY of the following is true:\n"
|
||||
"- Does not directly answer the user query.\n"
|
||||
"- Answers a related but different question.\n"
|
||||
'- Contains anything meaning "I don\'t know" or "information not found".\n\n'
|
||||
f"You must use the following format:"
|
||||
f"{CODE_BLOCK_PAT.format(cot_block)}"
|
||||
f'Hint: Invalid must be exactly "True" or "False" (without the quotes)'
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def extract_validity(model_output: str) -> bool:
|
||||
if INVALID_PAT in model_output:
|
||||
result = model_output.split(INVALID_PAT)[-1].strip()
|
||||
if "true" in result.lower():
|
||||
return False
|
||||
return True # If something is wrong, let's not toss away the answer
|
||||
|
||||
|
||||
@log_function_time()
|
||||
def get_answer_validity(
|
||||
query: str,
|
||||
answer: str,
|
||||
) -> bool:
|
||||
messages = get_answer_validation_messages(query, answer)
|
||||
def _get_answer_validation_messages(
|
||||
query: str, answer: str
|
||||
) -> list[dict[str, str]]:
|
||||
cot_block = (
|
||||
f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning "
|
||||
f"about EACH criterion to ensure that your conclusion is correct. "
|
||||
f"Be brief when evaluating each condition.\n"
|
||||
f"{FINAL_ANSWER_PAT} Valid or Invalid"
|
||||
)
|
||||
|
||||
q_a_block = f"{QUESTION_PAT} {query}\n\n{ANSWER_PAT} {answer}"
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"{CODE_BLOCK_PAT.format(q_a_block).lstrip()}{GENERAL_SEP_PAT}\n"
|
||||
"Determine if the answer is valid for the query.\n"
|
||||
f"The answer is invalid if ANY of the following is true:\n"
|
||||
"1. Does not directly answer the user query.\n"
|
||||
"2. Answers a related but different question.\n"
|
||||
"3. Query requires a subjective answer or an opinion.\n"
|
||||
'4. Contains anything meaning "I don\'t know" or "information not found".\n\n'
|
||||
f"You must use the following format:"
|
||||
f"{CODE_BLOCK_PAT.format(cot_block)}"
|
||||
f'Hint: Final Answer must be exactly "Valid" or "Invalid"'
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
return messages
|
||||
|
||||
def _extract_validity(model_output: str) -> bool:
|
||||
if FINAL_ANSWER_PAT in model_output:
|
||||
result = model_output.split(FINAL_ANSWER_PAT)[-1].strip()
|
||||
if "invalid" in result.lower():
|
||||
return False
|
||||
return True # If something is wrong, let's not toss away the answer
|
||||
|
||||
messages = _get_answer_validation_messages(query, answer)
|
||||
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
|
||||
model_output = get_default_llm().invoke(filled_llm_prompt)
|
||||
logger.debug(model_output)
|
||||
|
||||
validity = extract_validity(model_output)
|
||||
logger.info(
|
||||
f'LLM Answer of "{answer}" was determined to be {"valid" if validity else "invalid"}.'
|
||||
)
|
||||
validity = _extract_validity(model_output)
|
||||
|
||||
return validity
|
||||
|
@@ -224,6 +224,7 @@ class QAResponse(SearchResponse):
|
||||
quotes: list[DanswerQuote] | None
|
||||
predicted_flow: QueryFlow
|
||||
predicted_search: SearchType
|
||||
eval_res_valid: bool | None = None
|
||||
error_msg: str | None = None
|
||||
|
||||
|
||||
|
@@ -14,6 +14,10 @@ def escape_newlines(s: str) -> str:
|
||||
return re.sub(r"(?<!\\)\n", "\\\\n", s)
|
||||
|
||||
|
||||
def replace_whitespaces_w_space(s: str) -> str:
|
||||
return re.sub(r"\s", " ", s)
|
||||
|
||||
|
||||
def extract_embedded_json(s: str) -> dict:
|
||||
first_brace_index = s.find("{")
|
||||
last_brace_index = s.rfind("}")
|
||||
|
Reference in New Issue
Block a user