remove slack workspace (#3394)

* remove slack workspace * update client tokens * fix up * clean up docs * fix up tests
2025-06-21 13:30:59 +02:00 · 2024-12-11 17:01:43 -08:00 · 2024-12-11 17:01:43 -08:00 · 0770a587f1
commit 0770a587f1
parent 748b79b0ef
7 changed files with 16 additions and 175 deletions
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@ -41,7 +41,6 @@ from danswer.connectors.salesforce.connector import SalesforceConnector
 from danswer.connectors.sharepoint.connector import SharepointConnector
 from danswer.connectors.slab.connector import SlabConnector
 from danswer.connectors.slack.connector import SlackPollConnector
 from danswer.connectors.slack.load_connector import SlackLoadConnector
 from danswer.connectors.teams.connector import TeamsConnector
 from danswer.connectors.web.connector import WebConnector
 from danswer.connectors.wikipedia.connector import WikipediaConnector
@ -64,7 +63,6 @@ def identify_connector_class(
        DocumentSource.WEB: WebConnector,
        DocumentSource.FILE: LocalFileConnector,
        DocumentSource.SLACK: {
            InputType.LOAD_STATE: SlackLoadConnector,
            InputType.POLL: SlackPollConnector,
            InputType.SLIM_RETRIEVAL: SlackPollConnector,
        },
--- a/backend/danswer/connectors/slack/connector.py
+++ b/backend/danswer/connectors/slack/connector.py
@ -134,7 +134,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime:
 def thread_to_doc(
    workspace: str,
    channel: ChannelType,
    thread: ThreadType,
    slack_cleaner: SlackTextCleaner,
@ -179,9 +178,7 @@ def thread_to_doc(
        id=f"{channel_id}__{thread[0]['ts']}",
        sections=[
            Section(
-                link=get_message_link(
+                link=get_message_link(event=m, client=client, channel_id=channel_id),
                    event=m, workspace=workspace, channel_id=channel_id
                ),
                text=slack_cleaner.index_clean(cast(str, m["text"])),
            )
            for m in thread
@ -265,7 +262,6 @@ def filter_channels(
 def _get_all_docs(
    client: WebClient,
    workspace: str,
    channels: list[str] | None = None,
    channel_name_regex_enabled: bool = False,
    oldest: str | None = None,
@ -312,7 +308,6 @@ def _get_all_docs(
                if filtered_thread:
                    channel_docs += 1
                    yield thread_to_doc(
                        workspace=workspace,
                        channel=channel,
                        thread=filtered_thread,
                        slack_cleaner=slack_cleaner,
@ -375,14 +370,12 @@ def _get_all_doc_ids(
 class SlackPollConnector(PollConnector, SlimConnector):
    def __init__(
        self,
        workspace: str,
        channels: list[str] | None = None,
        # if specified, will treat the specified channel strings as
        # regexes, and will only index channels that fully match the regexes
        channel_regex_enabled: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.workspace = workspace
        self.channels = channels
        self.channel_regex_enabled = channel_regex_enabled
        self.batch_size = batch_size
@ -416,7 +409,6 @@ class SlackPollConnector(PollConnector, SlimConnector):
        documents: list[Document] = []
        for document in _get_all_docs(
            client=self.client,
            workspace=self.workspace,
            channels=self.channels,
            channel_name_regex_enabled=self.channel_regex_enabled,
            # NOTE: need to impute to `None` instead of using 0.0, since Slack will
@ -440,7 +432,6 @@ if __name__ == "__main__":
    slack_channel = os.environ.get("SLACK_CHANNEL")
    connector = SlackPollConnector(
        workspace=os.environ["SLACK_WORKSPACE"],
        channels=[slack_channel] if slack_channel else None,
    )
    connector.load_credentials({"slack_bot_token": os.environ["SLACK_BOT_TOKEN"]})
--- a/backend/danswer/connectors/slack/load_connector.py
+++ b/backend/danswer/connectors/slack/load_connector.py
@ -1,140 +0,0 @@
 import json
 import os
 from datetime import datetime
 from datetime import timezone
 from pathlib import Path
 from typing import Any
 from typing import cast
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 from danswer.connectors.slack.connector import filter_channels
 from danswer.connectors.slack.utils import get_message_link
 from danswer.utils.logger import setup_logger
 logger = setup_logger()
 def get_event_time(event: dict[str, Any]) -> datetime | None:
    ts = event.get("ts")
    if not ts:
        return None
    return datetime.fromtimestamp(float(ts), tz=timezone.utc)
 class SlackLoadConnector(LoadConnector):
    # WARNING: DEPRECATED, DO NOT USE
    def __init__(
        self,
        workspace: str,
        export_path_str: str,
        channels: list[str] | None = None,
        # if specified, will treat the specified channel strings as
        # regexes, and will only index channels that fully match the regexes
        channel_regex_enabled: bool = False,
        batch_size: int = INDEX_BATCH_SIZE,
    ) -> None:
        self.workspace = workspace
        self.channels = channels
        self.channel_regex_enabled = channel_regex_enabled
        self.export_path_str = export_path_str
        self.batch_size = batch_size
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        if credentials:
            logger.warning("Unexpected credentials provided for Slack Load Connector")
        return None
    @staticmethod
    def _process_batch_event(
        slack_event: dict[str, Any],
        channel: dict[str, Any],
        matching_doc: Document | None,
        workspace: str,
    ) -> Document | None:
        if (
            slack_event["type"] == "message"
            and slack_event.get("subtype") != "channel_join"
        ):
            if matching_doc:
                return Document(
                    id=matching_doc.id,
                    sections=matching_doc.sections
                    + [
                        Section(
                            link=get_message_link(
                                event=slack_event,
                                workspace=workspace,
                                channel_id=channel["id"],
                            ),
                            text=slack_event["text"],
                        )
                    ],
                    source=matching_doc.source,
                    semantic_identifier=matching_doc.semantic_identifier,
                    title="",  # slack docs don't really have a "title"
                    doc_updated_at=get_event_time(slack_event),
                    metadata=matching_doc.metadata,
                )
            return Document(
                id=slack_event["ts"],
                sections=[
                    Section(
                        link=get_message_link(
                            event=slack_event,
                            workspace=workspace,
                            channel_id=channel["id"],
                        ),
                        text=slack_event["text"],
                    )
                ],
                source=DocumentSource.SLACK,
                semantic_identifier=channel["name"],
                title="",  # slack docs don't really have a "title"
                doc_updated_at=get_event_time(slack_event),
                metadata={},
            )
        return None
    def load_from_state(self) -> GenerateDocumentsOutput:
        export_path = Path(self.export_path_str)
        with open(export_path / "channels.json") as f:
            all_channels = json.load(f)
        filtered_channels = filter_channels(
            all_channels, self.channels, self.channel_regex_enabled
        )
        document_batch: dict[str, Document] = {}
        for channel_info in filtered_channels:
            channel_dir_path = export_path / cast(str, channel_info["name"])
            channel_file_paths = [
                channel_dir_path / file_name
                for file_name in os.listdir(channel_dir_path)
            ]
            for path in channel_file_paths:
                with open(path) as f:
                    events = cast(list[dict[str, Any]], json.load(f))
                for slack_event in events:
                    doc = self._process_batch_event(
                        slack_event=slack_event,
                        channel=channel_info,
                        matching_doc=document_batch.get(
                            slack_event.get("thread_ts", "")
                        ),
                        workspace=self.workspace,
                    )
                    if doc:
                        document_batch[doc.id] = doc
                        if len(document_batch) >= self.batch_size:
                            yield list(document_batch.values())
        yield list(document_batch.values())
--- a/backend/danswer/connectors/slack/utils.py
+++ b/backend/danswer/connectors/slack/utils.py
@ -2,6 +2,7 @@ import re
 import time
 from collections.abc import Callable
 from collections.abc import Generator
 from functools import lru_cache
 from functools import wraps
 from typing import Any
 from typing import cast
@ -21,19 +22,21 @@ basic_retry_wrapper = retry_builder()
 _SLACK_LIMIT = 900
@lru_cache()
 def get_base_url(token: str) -> str:
    """Retrieve and cache the base URL of the Slack workspace based on the client token."""
    client = WebClient(token=token)
    return client.auth_test()["url"]
 def get_message_link(
-    event: dict[str, Any], workspace: str, channel_id: str | None = None
+    event: dict[str, Any], client: WebClient, channel_id: str | None = None
 ) -> str:
-    channel_id = channel_id or cast(
+    channel_id = channel_id or event["channel"]
-        str, event["channel"]
+    message_ts = event["ts"]
-    )  # channel must either be present in the event or passed in
+    response = client.chat_getPermalink(channel=channel_id, message_ts=message_ts)
-    message_ts = cast(str, event["ts"])
+    permalink = response["permalink"]
-    message_ts_without_dot = message_ts.replace(".", "")
+    return permalink
    thread_ts = cast(str | None, event.get("thread_ts"))
    return (
        f"https://{workspace}.slack.com/archives/{channel_id}/p{message_ts_without_dot}"
        + (f"?thread_ts={thread_ts}" if thread_ts else "")
    )
 def _make_slack_api_call_logged(
--- a/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py
+++ b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py
@ -67,7 +67,6 @@ def test_slack_permission_sync(
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "workspace": "onyx-test-workspace",
            "channels": [public_channel["name"], private_channel["name"]],
        },
        access_type=AccessType.SYNC,
@ -281,7 +280,6 @@ def test_slack_group_permission_sync(
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "workspace": "onyx-test-workspace",
            "channels": [private_channel["name"]],
        },
        access_type=AccessType.SYNC,
--- a/backend/tests/integration/connector_job_tests/slack/test_prune.py
+++ b/backend/tests/integration/connector_job_tests/slack/test_prune.py
@ -61,7 +61,6 @@ def test_slack_prune(
        input_type=InputType.POLL,
        source=DocumentSource.SLACK,
        connector_specific_config={
            "workspace": "onyx-test-workspace",
            "channels": [public_channel["name"], private_channel["name"]],
        },
        access_type=AccessType.PUBLIC,
--- a/web/src/lib/connectors/connectors.tsx
+++ b/web/src/lib/connectors/connectors.tsx
@ -546,15 +546,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
  },
  slack: {
    description: "Configure Slack connector",
-    values: [
+    values: [],
      {
        type: "text",
        query: "Enter the Slack workspace:",
        label: "Workspace",
        name: "workspace",
        optional: false,
      },
    ],
    advanced_values: [
      {
        type: "list",