DanswerBot Update (#2697)

2025-04-11 13:32:16 +02:00 · 2024-10-06 14:27:31 -07:00 · 2024-10-06 14:27:31 -07:00 · 83bc7d4656
commit 83bc7d4656
parent 3206bb27ce
5 changed files with 69 additions and 37 deletions
--- a/backend/danswer/danswerbot/slack/listener.py
+++ b/backend/danswer/danswerbot/slack/listener.py
@ -131,9 +131,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
            )
            return False

+        bot_tag_id = get_danswer_bot_app_id(client.web_client)
        if event_type == "message":
-            bot_tag_id = get_danswer_bot_app_id(client.web_client)
-
            is_dm = event.get("channel_type") == "im"
            is_tagged = bot_tag_id and bot_tag_id in msg
            is_danswer_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "")
@ -159,8 +158,10 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
                slack_bot_config = get_slack_bot_config_for_channel(
                    channel_name=channel_name, db_session=db_session
                )
-            if not slack_bot_config or not slack_bot_config.channel_config.get(
-                "respond_to_bots"
+            # If DanswerBot is not specifically tagged and the channel is not set to respond to bots, ignore the message
+            if (not bot_tag_id or bot_tag_id not in msg) and (
+                not slack_bot_config
+                or not slack_bot_config.channel_config.get("respond_to_bots")
            ):
                channel_specific_logger.info("Ignoring message from bot")
                return False
@ -447,8 +448,9 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
                return view_routing(req, client)
        elif req.type == "events_api" or req.type == "slash_commands":
            return process_message(req, client)
-    except Exception:
-        logger.exception("Failed to process slack event")
+    except Exception as e:
+        logger.exception(f"Failed to process slack event. Error: {e}")
+        logger.error(f"Slack request payload: {req.payload}")


 def _get_socket_client(slack_bot_tokens: SlackBotTokens) -> SocketModeClient:
--- a/backend/danswer/danswerbot/slack/utils.py
+++ b/backend/danswer/danswerbot/slack/utils.py
@ -430,35 +430,58 @@ def read_slack_thread(
    replies = cast(dict, response.data).get("messages", [])
    for reply in replies:
        if "user" in reply and "bot_id" not in reply:
-            message = remove_danswer_bot_tag(reply["text"], client=client)
-            user_sem_id = fetch_user_semantic_id_from_id(reply["user"], client)
+            message = reply["text"]
+            user_sem_id = (
+                fetch_user_semantic_id_from_id(reply.get("user"), client)
+                or "Unknown User"
+            )
            message_type = MessageType.USER
        else:
            self_app_id = get_danswer_bot_app_id(client)

-            # Only include bot messages from Danswer, other bots are not taken in as context
-            if self_app_id != reply.get("user"):
-                continue
+            if reply.get("user") == self_app_id:
+                # DanswerBot response
+                message_type = MessageType.ASSISTANT
+                user_sem_id = "Assistant"

-            blocks = reply["blocks"]
-            if len(blocks) <= 1:
-                continue
-
-            # For the old flow, the useful block is the second one after the header block that says AI Answer
-            if reply["blocks"][0]["text"]["text"] == "AI Answer":
-                message = reply["blocks"][1]["text"]["text"]
-            else:
-                # for the new flow, the answer is the first block
-                message = reply["blocks"][0]["text"]["text"]
-
-            if message.startswith("_Filters"):
-                if len(blocks) <= 2:
+                # DanswerBot responses have both text and blocks
+                # The useful content is in the blocks, specifically the first block unless there are
+                # auto-detected filters
+                blocks = reply.get("blocks")
+                if not blocks:
+                    logger.warning(f"DanswerBot response has no blocks: {reply}")
                    continue
-                message = reply["blocks"][2]["text"]["text"]

-            user_sem_id = "Assistant"
-            message_type = MessageType.ASSISTANT
+                message = blocks[0].get("text", {}).get("text")

+                # If auto-detected filters are on, use the second block for the actual answer
+                # The first block is the auto-detected filters
+                if message.startswith("_Filters"):
+                    if len(blocks) < 2:
+                        logger.warning(f"Only filter blocks found: {reply}")
+                        continue
+                    # This is the DanswerBot answer format, if there is a change to how we respond,
+                    # this will need to be updated to get the correct "answer" portion
+                    message = reply["blocks"][1].get("text", {}).get("text")
+            else:
+                # Other bots are not counted as the LLM response which only comes from Danswer
+                message_type = MessageType.USER
+                bot_user_name = fetch_user_semantic_id_from_id(
+                    reply.get("user"), client
+                )
+                user_sem_id = bot_user_name or "Unknown" + " Bot"
+
+                # For other bots, just use the text as we have no way of knowing that the
+                # useful portion is
+                message = reply.get("text")
+                if not message:
+                    message = blocks[0].get("text", {}).get("text")
+
+            if not message:
+                logger.warning("Skipping Slack thread message, no text found")
+                continue
+
+        message = remove_danswer_bot_tag(message, client=client)
        thread_messages.append(
            ThreadMessage(message=message, sender=user_sem_id, role=message_type)
        )
--- a/backend/danswer/llm/answering/prompts/citations_prompt.py
+++ b/backend/danswer/llm/answering/prompts/citations_prompt.py
@ -18,6 +18,7 @@ from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
 from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
 from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
 from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT_FOR_TOOL_CALLING
+from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
 from danswer.prompts.prompt_utils import add_date_time_to_prompt
 from danswer.prompts.prompt_utils import build_complete_context_str
 from danswer.prompts.prompt_utils import build_task_prompt_reminders
@ -143,6 +144,12 @@ def build_citations_user_message(
        prompt=prompt_config, use_language_hint=bool(multilingual_expansion)
    )

+    history_block = (
+        HISTORY_BLOCK.format(history_str=history_message) + "\n"
+        if history_message
+        else ""
+    )
+
    if context_docs:
        context_docs_str = build_complete_context_str(context_docs)
        optional_ignore = "" if all_doc_useful else DEFAULT_IGNORE_STATEMENT
@ -152,14 +159,14 @@ def build_citations_user_message(
            context_docs_str=context_docs_str,
            task_prompt=task_prompt_with_reminder,
            user_query=question,
-            history_block=history_message,
+            history_block=history_block,
        )
    else:
        # if no context docs provided, assume we're in the tool calling flow
        user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format(
            task_prompt=task_prompt_with_reminder,
            user_query=question,
-            history_block=history_message,
+            history_block=history_block,
        )

    user_prompt = user_prompt.strip()
--- a/backend/danswer/prompts/chat_prompts.py
+++ b/backend/danswer/prompts/chat_prompts.py
@ -110,8 +110,8 @@ Respond "{SKIP_SEARCH}" if:
 and additional information or details would provide little or no value.
 - The query is some task that does not require additional information to handle.

-{GENERAL_SEP_PAT}
 Conversation History:
+{GENERAL_SEP_PAT}
 {{chat_history}}
 {GENERAL_SEP_PAT}

@ -135,8 +135,8 @@ If there is a clear change in topic, disregard the previous messages.
 Strip out any information that is not relevant for the retrieval task.
 If the follow up message is an error or code snippet, repeat the same input back EXACTLY.

-{GENERAL_SEP_PAT}
 Chat History:
+{GENERAL_SEP_PAT}
 {{chat_history}}
 {GENERAL_SEP_PAT}

@ -152,8 +152,8 @@ If a broad query might yield too many results, make it detailed.
 If there is a clear change in topic, ensure the query reflects the new topic accurately.
 Strip out any information that is not relevant for the internet search.

-{GENERAL_SEP_PAT}
 Chat History:
+{GENERAL_SEP_PAT}
 {{chat_history}}
 {GENERAL_SEP_PAT}

@ -210,6 +210,7 @@ IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
 Focus the name on the important keywords to convey the topic of the conversation.

 Chat History:
+{GENERAL_SEP_PAT}
 {{chat_history}}
 {GENERAL_SEP_PAT}

--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@ -72,7 +72,8 @@ EMPTY_SAMPLE_JSON = {
 JSON_PROMPT = f"""
 {{system_prompt}}
 {REQUIRE_JSON}
-{{context_block}}{{history_block}}{{task_prompt}}
+{{context_block}}{{history_block}}
+{{task_prompt}}

 SAMPLE RESPONSE:
 ```
@ -91,6 +92,7 @@ SAMPLE RESPONSE:
 # "conversation history" block
 CITATIONS_PROMPT = f"""
 Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT}
+
 CONTEXT:
 {GENERAL_SEP_PAT}
 {{context_docs_str}}
@ -109,10 +111,7 @@ CITATIONS_PROMPT_FOR_TOOL_CALLING = f"""
 Refer to the provided context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} \
 You should always get right to the point, and never use extraneous language.

-CHAT HISTORY:
-{{history_block}}
-
-{{task_prompt}}
+{{history_block}}{{task_prompt}}

 {QUESTION_PAT.upper()}
 {{user_query}}