mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-27 12:29:41 +02:00
Replace IDs with names in Slack connector
This commit is contained in:
@@ -17,6 +17,7 @@ from danswer.connectors.models import Section
|
|||||||
from danswer.connectors.slack.utils import get_message_link
|
from danswer.connectors.slack.utils import get_message_link
|
||||||
from danswer.connectors.slack.utils import make_slack_api_call_paginated
|
from danswer.connectors.slack.utils import make_slack_api_call_paginated
|
||||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||||
|
from danswer.connectors.slack.utils import UserIdReplacer
|
||||||
from danswer.utils.logging import setup_logger
|
from danswer.utils.logging import setup_logger
|
||||||
from slack_sdk import WebClient
|
from slack_sdk import WebClient
|
||||||
from slack_sdk.web import SlackResponse
|
from slack_sdk.web import SlackResponse
|
||||||
@@ -83,7 +84,12 @@ def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType
|
|||||||
return threads
|
return threads
|
||||||
|
|
||||||
|
|
||||||
def thread_to_doc(workspace: str, channel: ChannelType, thread: ThreadType) -> Document:
|
def thread_to_doc(
|
||||||
|
workspace: str,
|
||||||
|
channel: ChannelType,
|
||||||
|
thread: ThreadType,
|
||||||
|
user_id_replacer: UserIdReplacer,
|
||||||
|
) -> Document:
|
||||||
channel_id = channel["id"]
|
channel_id = channel["id"]
|
||||||
return Document(
|
return Document(
|
||||||
id=f"{channel_id}__{thread[0]['ts']}",
|
id=f"{channel_id}__{thread[0]['ts']}",
|
||||||
@@ -92,7 +98,7 @@ def thread_to_doc(workspace: str, channel: ChannelType, thread: ThreadType) -> D
|
|||||||
link=get_message_link(
|
link=get_message_link(
|
||||||
event=m, workspace=workspace, channel_id=channel_id
|
event=m, workspace=workspace, channel_id=channel_id
|
||||||
),
|
),
|
||||||
text=cast(str, m["text"]),
|
text=user_id_replacer.replace_user_ids_with_names(cast(str, m["text"])),
|
||||||
)
|
)
|
||||||
for m in thread
|
for m in thread
|
||||||
],
|
],
|
||||||
@@ -131,6 +137,8 @@ def get_all_docs(
|
|||||||
msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
|
msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
|
||||||
) -> Generator[Document, None, None]:
|
) -> Generator[Document, None, None]:
|
||||||
"""Get all documents in the workspace, channel by channel"""
|
"""Get all documents in the workspace, channel by channel"""
|
||||||
|
user_id_replacer = UserIdReplacer(client=client)
|
||||||
|
|
||||||
channels = get_channels(client)
|
channels = get_channels(client)
|
||||||
|
|
||||||
for channel in channels:
|
for channel in channels:
|
||||||
@@ -156,7 +164,10 @@ def get_all_docs(
|
|||||||
if filtered_thread:
|
if filtered_thread:
|
||||||
channel_docs += 1
|
channel_docs += 1
|
||||||
yield thread_to_doc(
|
yield thread_to_doc(
|
||||||
workspace=workspace, channel=channel, thread=filtered_thread
|
workspace=workspace,
|
||||||
|
channel=channel,
|
||||||
|
thread=filtered_thread,
|
||||||
|
user_id_replacer=user_id_replacer,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@@ -1,11 +1,16 @@
|
|||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
|
from danswer.utils.logging import setup_logger
|
||||||
|
from slack_sdk import WebClient
|
||||||
from slack_sdk.errors import SlackApiError
|
from slack_sdk.errors import SlackApiError
|
||||||
from slack_sdk.web import SlackResponse
|
from slack_sdk.web import SlackResponse
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
# number of messages we request per page when fetching paginated slack messages
|
# number of messages we request per page when fetching paginated slack messages
|
||||||
_SLACK_LIMIT = 900
|
_SLACK_LIMIT = 900
|
||||||
|
|
||||||
@@ -72,3 +77,53 @@ def make_slack_api_rate_limited(
|
|||||||
raise Exception(f"Max retries ({max_retries}) exceeded")
|
raise Exception(f"Max retries ({max_retries}) exceeded")
|
||||||
|
|
||||||
return rate_limited_call
|
return rate_limited_call
|
||||||
|
|
||||||
|
|
||||||
|
class UserIdReplacer:
|
||||||
|
"""Utility class to replace user IDs with usernames in a message.
|
||||||
|
Handles caching, so the same request is not made multiple times
|
||||||
|
for the same user ID"""
|
||||||
|
|
||||||
|
def __init__(self, client: WebClient) -> None:
|
||||||
|
self._client = client
|
||||||
|
self._user_id_to_name_map: dict[str, str] = {}
|
||||||
|
|
||||||
|
def _get_slack_user_name(self, user_id: str) -> str:
|
||||||
|
if user_id not in self._user_id_to_name_map:
|
||||||
|
try:
|
||||||
|
response = make_slack_api_rate_limited(self._client.users_info)(
|
||||||
|
user=user_id
|
||||||
|
)
|
||||||
|
# prefer display name if set, since that is what is shown in Slack
|
||||||
|
self._user_id_to_name_map[user_id] = (
|
||||||
|
response["user"]["profile"]["display_name"]
|
||||||
|
or response["user"]["profile"]["real_name"]
|
||||||
|
)
|
||||||
|
except SlackApiError as e:
|
||||||
|
logger.exception(
|
||||||
|
f"Error fetching data for user {user_id}: {e.response['error']}"
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
return self._user_id_to_name_map[user_id]
|
||||||
|
|
||||||
|
def replace_user_ids_with_names(self, message: str) -> str:
|
||||||
|
# Find user IDs in the message
|
||||||
|
user_ids = re.findall("<@(.*?)>", message)
|
||||||
|
|
||||||
|
# Iterate over each user ID found
|
||||||
|
for user_id in user_ids:
|
||||||
|
try:
|
||||||
|
if user_id in self._user_id_to_name_map:
|
||||||
|
user_name = self._user_id_to_name_map[user_id]
|
||||||
|
else:
|
||||||
|
user_name = self._get_slack_user_name(user_id)
|
||||||
|
|
||||||
|
# Replace the user ID with the username in the message
|
||||||
|
message = message.replace(f"<@{user_id}>", f"@{user_name}")
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
f"Unable to replace user ID with username for user_id '{user_id}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return message
|
||||||
|
@@ -68,13 +68,13 @@ def _process_quotes(
|
|||||||
for quote_dict in quotes.values():
|
for quote_dict in quotes.values():
|
||||||
doc_id = str(quote_dict.get("document_id", ""))
|
doc_id = str(quote_dict.get("document_id", ""))
|
||||||
doc_link = quote_dict.get("link")
|
doc_link = quote_dict.get("link")
|
||||||
doc_name = quote_dict.get("semantic_identifier")
|
doc_name = str(quote_dict.get("semantic_identifier", ""))
|
||||||
if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
|
if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
|
||||||
doc_identifiers.append(str(doc_id))
|
doc_identifiers.append(doc_id)
|
||||||
custom_semantic_identifier = _build_custom_semantic_identifier(
|
custom_semantic_identifier = _build_custom_semantic_identifier(
|
||||||
semantic_identifier=doc_name,
|
semantic_identifier=doc_name,
|
||||||
blurb=quote_dict.get("blurb", ""),
|
blurb=str(quote_dict.get("blurb", "")),
|
||||||
source=quote_dict.get("source_type", ""),
|
source=str(quote_dict.get("source_type", "")),
|
||||||
)
|
)
|
||||||
quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
|
quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user