Replace IDs with names in Slack connector

2025-09-27 12:29:41 +02:00 · 2023-07-07 18:03:55 -07:00
parent 79013ac9fd
commit 3494d6a13a
3 changed files with 73 additions and 7 deletions
--- a/backend/danswer/connectors/slack/connector.py
+++ b/backend/danswer/connectors/slack/connector.py
@@ -17,6 +17,7 @@ from danswer.connectors.models import Section
 from danswer.connectors.slack.utils import get_message_link
 from danswer.connectors.slack.utils import make_slack_api_call_paginated
 from danswer.connectors.slack.utils import make_slack_api_rate_limited
 from danswer.connectors.slack.utils import UserIdReplacer
 from danswer.utils.logging import setup_logger
 from slack_sdk import WebClient
 from slack_sdk.web import SlackResponse
@@ -83,7 +84,12 @@ def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType
    return threads
-def thread_to_doc(workspace: str, channel: ChannelType, thread: ThreadType) -> Document:
+def thread_to_doc(
    workspace: str,
    channel: ChannelType,
    thread: ThreadType,
    user_id_replacer: UserIdReplacer,
 ) -> Document:
    channel_id = channel["id"]
    return Document(
        id=f"{channel_id}__{thread[0]['ts']}",
@@ -92,7 +98,7 @@ def thread_to_doc(workspace: str, channel: ChannelType, thread: ThreadType) -> D
                link=get_message_link(
                    event=m, workspace=workspace, channel_id=channel_id
                ),
-                text=cast(str, m["text"]),
+                text=user_id_replacer.replace_user_ids_with_names(cast(str, m["text"])),
            )
            for m in thread
        ],
@@ -131,6 +137,8 @@ def get_all_docs(
    msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
 ) -> Generator[Document, None, None]:
    """Get all documents in the workspace, channel by channel"""
    user_id_replacer = UserIdReplacer(client=client)
    channels = get_channels(client)
    for channel in channels:
@@ -156,7 +164,10 @@ def get_all_docs(
                if filtered_thread:
                    channel_docs += 1
                    yield thread_to_doc(
-                        workspace=workspace, channel=channel, thread=filtered_thread
+                        workspace=workspace,
                        channel=channel,
                        thread=filtered_thread,
                        user_id_replacer=user_id_replacer,
                    )
        logger.info(
--- a/backend/danswer/connectors/slack/utils.py
+++ b/backend/danswer/connectors/slack/utils.py
@@ -1,11 +1,16 @@
 import re
 import time
 from collections.abc import Callable
 from typing import Any
 from typing import cast
 from danswer.utils.logging import setup_logger
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
 from slack_sdk.web import SlackResponse
 logger = setup_logger()
 # number of messages we request per page when fetching paginated slack messages
 _SLACK_LIMIT = 900
@@ -72,3 +77,53 @@ def make_slack_api_rate_limited(
        raise Exception(f"Max retries ({max_retries}) exceeded")
    return rate_limited_call
 class UserIdReplacer:
    """Utility class to replace user IDs with usernames in a message.
    Handles caching, so the same request is not made multiple times
    for the same user ID"""
    def __init__(self, client: WebClient) -> None:
        self._client = client
        self._user_id_to_name_map: dict[str, str] = {}
    def _get_slack_user_name(self, user_id: str) -> str:
        if user_id not in self._user_id_to_name_map:
            try:
                response = make_slack_api_rate_limited(self._client.users_info)(
                    user=user_id
                )
                # prefer display name if set, since that is what is shown in Slack
                self._user_id_to_name_map[user_id] = (
                    response["user"]["profile"]["display_name"]
                    or response["user"]["profile"]["real_name"]
                )
            except SlackApiError as e:
                logger.exception(
                    f"Error fetching data for user {user_id}: {e.response['error']}"
                )
                raise
        return self._user_id_to_name_map[user_id]
    def replace_user_ids_with_names(self, message: str) -> str:
        # Find user IDs in the message
        user_ids = re.findall("<@(.*?)>", message)
        # Iterate over each user ID found
        for user_id in user_ids:
            try:
                if user_id in self._user_id_to_name_map:
                    user_name = self._user_id_to_name_map[user_id]
                else:
                    user_name = self._get_slack_user_name(user_id)
                # Replace the user ID with the username in the message
                message = message.replace(f"<@{user_id}>", f"@{user_name}")
            except Exception:
                logger.exception(
                    f"Unable to replace user ID with username for user_id '{user_id}"
                )
        return message
--- a/backend/danswer/listeners/slack_listener.py
+++ b/backend/danswer/listeners/slack_listener.py
@@ -68,13 +68,13 @@ def _process_quotes(
    for quote_dict in quotes.values():
        doc_id = str(quote_dict.get("document_id", ""))
        doc_link = quote_dict.get("link")
-        doc_name = quote_dict.get("semantic_identifier")
+        doc_name = str(quote_dict.get("semantic_identifier", ""))
        if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
-            doc_identifiers.append(str(doc_id))
+            doc_identifiers.append(doc_id)
            custom_semantic_identifier = _build_custom_semantic_identifier(
                semantic_identifier=doc_name,
-                blurb=quote_dict.get("blurb", ""),
+                blurb=str(quote_dict.get("blurb", "")),
-                source=quote_dict.get("source_type", ""),
+                source=str(quote_dict.get("source_type", "")),
            )
            quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")