mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-20 13:05:49 +02:00
Adjust slack bot (#144)
* Add handling for cases where an answer is not found * Make danswer bot slightly more configurable * Don't respond to messages in thread + add better formatting for slack messages
This commit is contained in:
@@ -136,3 +136,12 @@ DYNAMIC_CONFIG_STORE = os.environ.get(
|
|||||||
"DYNAMIC_CONFIG_STORE", "FileSystemBackedDynamicConfigStore"
|
"DYNAMIC_CONFIG_STORE", "FileSystemBackedDynamicConfigStore"
|
||||||
)
|
)
|
||||||
DYNAMIC_CONFIG_DIR_PATH = os.environ.get("DYNAMIC_CONFIG_DIR_PATH", "/home/storage")
|
DYNAMIC_CONFIG_DIR_PATH = os.environ.get("DYNAMIC_CONFIG_DIR_PATH", "/home/storage")
|
||||||
|
|
||||||
|
|
||||||
|
#####
|
||||||
|
# Danswer Slack Bot Configs
|
||||||
|
#####
|
||||||
|
DANSWER_BOT_NUM_DOCS_TO_DISPLAY = int(
|
||||||
|
os.environ.get("DANSWER_BOT_NUM_DOCS_TO_DISPLAY", "5")
|
||||||
|
)
|
||||||
|
DANSWER_BOT_NUM_RETRIES = int(os.environ.get("DANSWER_BOT_NUM_RETRIES", "5"))
|
||||||
|
@@ -19,7 +19,9 @@ from danswer.utils.logging import setup_logger
|
|||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
def answer_question(question: QuestionRequest, user: User | None) -> QAResponse:
|
def answer_question(
|
||||||
|
question: QuestionRequest, user: User | None, qa_model_timeout: int = QA_TIMEOUT
|
||||||
|
) -> QAResponse:
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
query = question.query
|
query = question.query
|
||||||
@@ -53,7 +55,7 @@ def answer_question(question: QuestionRequest, user: User | None) -> QAResponse:
|
|||||||
predicted_search=predicted_search,
|
predicted_search=predicted_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
qa_model = get_default_backend_qa_model(timeout=QA_TIMEOUT)
|
qa_model = get_default_backend_qa_model(timeout=qa_model_timeout)
|
||||||
chunk_offset = offset_count * NUM_GENERATIVE_AI_INPUT_DOCS
|
chunk_offset = offset_count * NUM_GENERATIVE_AI_INPUT_DOCS
|
||||||
if chunk_offset >= len(ranked_chunks):
|
if chunk_offset >= len(ranked_chunks):
|
||||||
raise ValueError("Chunks offset too large, should not retry this many times")
|
raise ValueError("Chunks offset too large, should not retry this many times")
|
||||||
|
@@ -177,8 +177,11 @@ def process_answer(
|
|||||||
answer_raw: str, chunks: list[InferenceChunk]
|
answer_raw: str, chunks: list[InferenceChunk]
|
||||||
) -> tuple[str | None, dict[str, dict[str, str | int | None]] | None]:
|
) -> tuple[str | None, dict[str, dict[str, str | int | None]] | None]:
|
||||||
answer, quote_strings = separate_answer_quotes(answer_raw)
|
answer, quote_strings = separate_answer_quotes(answer_raw)
|
||||||
if not answer or not quote_strings:
|
if answer == UNCERTAINTY_PAT or not answer:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
if not quote_strings:
|
||||||
|
return answer, None
|
||||||
quotes_dict = match_quotes_to_docs(quote_strings, chunks)
|
quotes_dict = match_quotes_to_docs(quote_strings, chunks)
|
||||||
return answer, quotes_dict
|
return answer, quotes_dict
|
||||||
|
|
||||||
|
@@ -1,12 +1,16 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
|
||||||
|
from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
|
||||||
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
|
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
|
||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||||
from danswer.direct_qa.answer_question import answer_question
|
from danswer.direct_qa.answer_question import answer_question
|
||||||
from danswer.server.models import QAResponse
|
from danswer.server.models import QAResponse
|
||||||
from danswer.server.models import QuestionRequest
|
from danswer.server.models import QuestionRequest
|
||||||
from danswer.server.models import SearchDoc
|
from danswer.server.models import SearchDoc
|
||||||
from danswer.utils.logging import setup_logger
|
from danswer.utils.logging import setup_logger
|
||||||
|
from retry import retry
|
||||||
from slack_sdk import WebClient
|
from slack_sdk import WebClient
|
||||||
from slack_sdk.socket_mode import SocketModeClient
|
from slack_sdk.socket_mode import SocketModeClient
|
||||||
from slack_sdk.socket_mode.request import SocketModeRequest
|
from slack_sdk.socket_mode.request import SocketModeRequest
|
||||||
@@ -14,9 +18,6 @@ from slack_sdk.socket_mode.response import SocketModeResponse
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
_NUM_RETRIES = 3
|
|
||||||
_NUM_DOCS_TO_DISPLAY = 5
|
|
||||||
|
|
||||||
|
|
||||||
def _get_socket_client() -> SocketModeClient:
|
def _get_socket_client() -> SocketModeClient:
|
||||||
# For more info on how to set this up, checkout the docs:
|
# For more info on how to set this up, checkout the docs:
|
||||||
@@ -34,6 +35,28 @@ def _get_socket_client() -> SocketModeClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_MAX_BLURB_LEN = 25
|
||||||
|
|
||||||
|
|
||||||
|
def _build_custom_semantic_identifier(
|
||||||
|
semantic_identifier: str, blurb: str, source: str
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
On slack, since we just show the semantic identifier rather than semantic + blurb, we need
|
||||||
|
to do some custom formatting to make sure the semantic identifier is unique and meaningful.
|
||||||
|
"""
|
||||||
|
if source == DocumentSource.SLACK.value:
|
||||||
|
truncated_blurb = (
|
||||||
|
f"{blurb[:_MAX_BLURB_LEN]}..." if len(blurb) > _MAX_BLURB_LEN else blurb
|
||||||
|
)
|
||||||
|
if truncated_blurb:
|
||||||
|
return f"#{semantic_identifier}: {truncated_blurb}"
|
||||||
|
else:
|
||||||
|
return f"#{semantic_identifier}"
|
||||||
|
|
||||||
|
return semantic_identifier
|
||||||
|
|
||||||
|
|
||||||
def _process_quotes(
|
def _process_quotes(
|
||||||
quotes: dict[str, dict[str, str | int | None]] | None
|
quotes: dict[str, dict[str, str | int | None]] | None
|
||||||
) -> tuple[str | None, list[str]]:
|
) -> tuple[str | None, list[str]]:
|
||||||
@@ -43,11 +66,17 @@ def _process_quotes(
|
|||||||
quote_lines: list[str] = []
|
quote_lines: list[str] = []
|
||||||
doc_identifiers: list[str] = []
|
doc_identifiers: list[str] = []
|
||||||
for quote_dict in quotes.values():
|
for quote_dict in quotes.values():
|
||||||
|
doc_id = str(quote_dict.get("document_id", ""))
|
||||||
doc_link = quote_dict.get("link")
|
doc_link = quote_dict.get("link")
|
||||||
doc_name = quote_dict.get("semantic_identifier")
|
doc_name = quote_dict.get("semantic_identifier")
|
||||||
if doc_link and doc_name and doc_name not in doc_identifiers:
|
if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
|
||||||
doc_identifiers.append(str(doc_name))
|
doc_identifiers.append(str(doc_id))
|
||||||
quote_lines.append(f"- <{doc_link}|{doc_name}>")
|
custom_semantic_identifier = _build_custom_semantic_identifier(
|
||||||
|
semantic_identifier=doc_name,
|
||||||
|
blurb=quote_dict.get("blurb", ""),
|
||||||
|
source=quote_dict.get("source_type", ""),
|
||||||
|
)
|
||||||
|
quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
|
||||||
|
|
||||||
if not quote_lines:
|
if not quote_lines:
|
||||||
return None, []
|
return None, []
|
||||||
@@ -56,25 +85,30 @@ def _process_quotes(
|
|||||||
|
|
||||||
|
|
||||||
def _process_documents(
|
def _process_documents(
|
||||||
documents: list[SearchDoc] | None, already_displayed_doc_identifiers: list[str]
|
documents: list[SearchDoc] | None,
|
||||||
|
already_displayed_doc_identifiers: list[str],
|
||||||
|
num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
if not documents:
|
if not documents:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
seen_docs_identifiers = set(already_displayed_doc_identifiers)
|
seen_docs_identifiers = set(already_displayed_doc_identifiers)
|
||||||
top_documents: list[SearchDoc] = []
|
top_document_lines: list[str] = []
|
||||||
for d in documents:
|
for d in documents:
|
||||||
if d.semantic_identifier in seen_docs_identifiers:
|
if d.document_id in seen_docs_identifiers:
|
||||||
continue
|
continue
|
||||||
seen_docs_identifiers.add(d.semantic_identifier)
|
seen_docs_identifiers.add(d.document_id)
|
||||||
top_documents.append(d)
|
|
||||||
if len(top_documents) >= _NUM_DOCS_TO_DISPLAY:
|
custom_semantic_identifier = _build_custom_semantic_identifier(
|
||||||
|
semantic_identifier=d.semantic_identifier,
|
||||||
|
blurb=d.blurb,
|
||||||
|
source=d.source_type,
|
||||||
|
)
|
||||||
|
top_document_lines.append(f"- <{d.link}|{custom_semantic_identifier}>")
|
||||||
|
if len(top_document_lines) >= num_docs_to_display:
|
||||||
break
|
break
|
||||||
|
|
||||||
top_documents_str = "\n".join(
|
return "\n".join(top_document_lines)
|
||||||
[f"- <{d.link}|{d.semantic_identifier}>" for d in top_documents]
|
|
||||||
)
|
|
||||||
return "*Other potentially relevant documents:*\n" + top_documents_str
|
|
||||||
|
|
||||||
|
|
||||||
def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None:
|
def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None:
|
||||||
@@ -100,8 +134,13 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
|
|||||||
logger.info("Ignoring message from bot")
|
logger.info("Ignoring message from bot")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
message_ts = req.payload.get("event", {}).get("ts")
|
||||||
|
thread_ts = req.payload.get("event", {}).get("thread_ts")
|
||||||
|
if thread_ts and message_ts != thread_ts:
|
||||||
|
logger.info("Skipping message since it is not the root of a thread")
|
||||||
|
return
|
||||||
|
|
||||||
msg = req.payload.get("event", {}).get("text")
|
msg = req.payload.get("event", {}).get("text")
|
||||||
thread_ts = req.payload.get("event", {}).get("ts")
|
|
||||||
if not msg:
|
if not msg:
|
||||||
logger.error("Unable to process empty message")
|
logger.error("Unable to process empty message")
|
||||||
return
|
return
|
||||||
@@ -109,54 +148,52 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
|
|||||||
# TODO: message should be enqueued and processed elsewhere,
|
# TODO: message should be enqueued and processed elsewhere,
|
||||||
# but doing it here for now for simplicity
|
# but doing it here for now for simplicity
|
||||||
|
|
||||||
def _get_answer(question: QuestionRequest) -> QAResponse | None:
|
@retry(tries=DANSWER_BOT_NUM_RETRIES, delay=0.25, backoff=2, logger=logger)
|
||||||
try:
|
def _get_answer(question: QuestionRequest) -> QAResponse:
|
||||||
answer = answer_question(question=question, user=None)
|
answer = answer_question(question=question, user=None)
|
||||||
if not answer.error_msg:
|
if not answer.error_msg:
|
||||||
return answer
|
return answer
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(answer.error_msg)
|
raise RuntimeError(answer.error_msg)
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Unable to process message: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
answer = None
|
answer = None
|
||||||
for _ in range(_NUM_RETRIES):
|
try:
|
||||||
answer = _get_answer(
|
answer = _get_answer(
|
||||||
QuestionRequest(
|
QuestionRequest(
|
||||||
query=req.payload.get("event", {}).get("text"),
|
query=req.payload.get("event", {}).get("text"),
|
||||||
collection=QDRANT_DEFAULT_COLLECTION,
|
collection=QDRANT_DEFAULT_COLLECTION,
|
||||||
use_keyword=False,
|
use_keyword=None,
|
||||||
filters=None,
|
filters=None,
|
||||||
offset=None,
|
offset=None,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if answer:
|
except Exception:
|
||||||
break
|
logger.exception(
|
||||||
|
f"Unable to process message - did not successfully answer in {DANSWER_BOT_NUM_RETRIES} attempts"
|
||||||
if not answer:
|
|
||||||
logger.error(
|
|
||||||
f"Unable to process message - did not successfully answer in {_NUM_RETRIES} attempts"
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
if not answer.answer:
|
|
||||||
logger.error(f"Unable to process message - no answer found")
|
|
||||||
return
|
|
||||||
|
|
||||||
# convert raw response into "nicely" formatted Slack message
|
# convert raw response into "nicely" formatted Slack message
|
||||||
quote_str, doc_identifiers = _process_quotes(answer.quotes)
|
quote_str, doc_identifiers = _process_quotes(answer.quotes)
|
||||||
top_documents_str = _process_documents(answer.top_ranked_docs, doc_identifiers)
|
top_documents_str = _process_documents(answer.top_ranked_docs, doc_identifiers)
|
||||||
if quote_str:
|
|
||||||
text = f"{answer.answer}\n\n*Sources:*\n{quote_str}\n\n{top_documents_str}"
|
|
||||||
else:
|
|
||||||
text = f"{answer.answer}\n\n*Warning*: no sources were quoted for this answer, so it may be unreliable 😔\n\n{top_documents_str}"
|
|
||||||
|
|
||||||
|
if not answer.answer:
|
||||||
|
text = f"Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓\n\n{top_documents_str}"
|
||||||
|
else:
|
||||||
|
top_documents_str_with_header = (
|
||||||
|
f"*Other potentially relevant docs:*\n{top_documents_str}"
|
||||||
|
)
|
||||||
|
if quote_str:
|
||||||
|
text = f"{answer.answer}\n\n*Sources:*\n{quote_str}\n\n{top_documents_str_with_header}"
|
||||||
|
else:
|
||||||
|
text = f"{answer.answer}\n\n*Warning*: no sources were quoted for this answer, so it may be unreliable 😔\n\n{top_documents_str_with_header}"
|
||||||
|
|
||||||
|
@retry(tries=DANSWER_BOT_NUM_RETRIES, delay=0.25, backoff=2, logger=logger)
|
||||||
def _respond_in_thread(
|
def _respond_in_thread(
|
||||||
channel: str,
|
channel: str,
|
||||||
text: str,
|
text: str,
|
||||||
thread_ts: str,
|
thread_ts: str,
|
||||||
) -> str | None:
|
) -> None:
|
||||||
slack_call = make_slack_api_rate_limited(client.web_client.chat_postMessage)
|
slack_call = make_slack_api_rate_limited(client.web_client.chat_postMessage)
|
||||||
response = slack_call(
|
response = slack_call(
|
||||||
channel=channel,
|
channel=channel,
|
||||||
@@ -164,25 +201,18 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
|
|||||||
thread_ts=thread_ts,
|
thread_ts=thread_ts,
|
||||||
)
|
)
|
||||||
if not response.get("ok"):
|
if not response.get("ok"):
|
||||||
return f"Unable to post message: {response}"
|
raise RuntimeError(f"Unable to post message: {response}")
|
||||||
return None
|
|
||||||
|
|
||||||
successfully_answered = False
|
try:
|
||||||
for _ in range(_NUM_RETRIES):
|
_respond_in_thread(
|
||||||
error_msg = _respond_in_thread(
|
|
||||||
channel=req.payload.get("event", {}).get("channel"),
|
channel=req.payload.get("event", {}).get("channel"),
|
||||||
text=text,
|
text=text,
|
||||||
thread_ts=thread_ts,
|
thread_ts=thread_ts
|
||||||
|
or message_ts, # pick the root of the thread (if a thread exists)
|
||||||
)
|
)
|
||||||
if error_msg:
|
except Exception:
|
||||||
logger.error(error_msg)
|
logger.exception(
|
||||||
else:
|
f"Unable to process message - could not respond in slack in {DANSWER_BOT_NUM_RETRIES} attempts"
|
||||||
successfully_answered = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not successfully_answered:
|
|
||||||
logger.error(
|
|
||||||
f"Unable to process message - could not respond in slack in {_NUM_RETRIES} attempts"
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@@ -27,6 +27,7 @@ def chunks_to_search_docs(chunks: list[InferenceChunk] | None) -> list[SearchDoc
|
|||||||
search_docs = (
|
search_docs = (
|
||||||
[
|
[
|
||||||
SearchDoc(
|
SearchDoc(
|
||||||
|
document_id=chunk.document_id,
|
||||||
semantic_identifier=chunk.semantic_identifier,
|
semantic_identifier=chunk.semantic_identifier,
|
||||||
link=chunk.source_links.get(0) if chunk.source_links else None,
|
link=chunk.source_links.get(0) if chunk.source_links else None,
|
||||||
blurb=chunk.blurb,
|
blurb=chunk.blurb,
|
||||||
|
@@ -80,6 +80,7 @@ class UserRoleResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class SearchDoc(BaseModel):
|
class SearchDoc(BaseModel):
|
||||||
|
document_id: str
|
||||||
semantic_identifier: str
|
semantic_identifier: str
|
||||||
link: str | None
|
link: str | None
|
||||||
blurb: str
|
blurb: str
|
||||||
|
@@ -27,6 +27,7 @@ pytest-playwright==0.3.2
|
|||||||
python-multipart==0.0.6
|
python-multipart==0.0.6
|
||||||
qdrant-client==1.2.0
|
qdrant-client==1.2.0
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
|
retry==0.9.2
|
||||||
rfc3986==1.5.0
|
rfc3986==1.5.0
|
||||||
sentence-transformers==2.2.2
|
sentence-transformers==2.2.2
|
||||||
slack-sdk==3.20.2
|
slack-sdk==3.20.2
|
||||||
|
@@ -9,4 +9,5 @@ types-psycopg2==2.9.21.10
|
|||||||
types-python-dateutil==2.8.19.13
|
types-python-dateutil==2.8.19.13
|
||||||
types-regex==2023.3.23.1
|
types-regex==2023.3.23.1
|
||||||
types-requests==2.28.11.17
|
types-requests==2.28.11.17
|
||||||
|
types-retry==0.9.9.3
|
||||||
types-urllib3==1.26.25.11
|
types-urllib3==1.26.25.11
|
||||||
|
Reference in New Issue
Block a user