diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 87d1539d3d..241d5ed81c 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -41,7 +41,6 @@ from danswer.connectors.salesforce.connector import SalesforceConnector from danswer.connectors.sharepoint.connector import SharepointConnector from danswer.connectors.slab.connector import SlabConnector from danswer.connectors.slack.connector import SlackPollConnector -from danswer.connectors.slack.load_connector import SlackLoadConnector from danswer.connectors.teams.connector import TeamsConnector from danswer.connectors.web.connector import WebConnector from danswer.connectors.wikipedia.connector import WikipediaConnector @@ -64,7 +63,6 @@ def identify_connector_class( DocumentSource.WEB: WebConnector, DocumentSource.FILE: LocalFileConnector, DocumentSource.SLACK: { - InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, InputType.SLIM_RETRIEVAL: SlackPollConnector, }, diff --git a/backend/danswer/connectors/slack/connector.py b/backend/danswer/connectors/slack/connector.py index b550e42d21..9135be7775 100644 --- a/backend/danswer/connectors/slack/connector.py +++ b/backend/danswer/connectors/slack/connector.py @@ -134,7 +134,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime: def thread_to_doc( - workspace: str, channel: ChannelType, thread: ThreadType, slack_cleaner: SlackTextCleaner, @@ -179,9 +178,7 @@ def thread_to_doc( id=f"{channel_id}__{thread[0]['ts']}", sections=[ Section( - link=get_message_link( - event=m, workspace=workspace, channel_id=channel_id - ), + link=get_message_link(event=m, client=client, channel_id=channel_id), text=slack_cleaner.index_clean(cast(str, m["text"])), ) for m in thread @@ -265,7 +262,6 @@ def filter_channels( def _get_all_docs( client: WebClient, - workspace: str, channels: list[str] | None = None, channel_name_regex_enabled: bool = False, oldest: str | None = None, @@ -312,7 +308,6 @@ def _get_all_docs( if filtered_thread: channel_docs += 1 yield thread_to_doc( - workspace=workspace, channel=channel, thread=filtered_thread, slack_cleaner=slack_cleaner, @@ -375,14 +370,12 @@ def _get_all_doc_ids( class SlackPollConnector(PollConnector, SlimConnector): def __init__( self, - workspace: str, channels: list[str] | None = None, # if specified, will treat the specified channel strings as # regexes, and will only index channels that fully match the regexes channel_regex_enabled: bool = False, batch_size: int = INDEX_BATCH_SIZE, ) -> None: - self.workspace = workspace self.channels = channels self.channel_regex_enabled = channel_regex_enabled self.batch_size = batch_size @@ -416,7 +409,6 @@ class SlackPollConnector(PollConnector, SlimConnector): documents: list[Document] = [] for document in _get_all_docs( client=self.client, - workspace=self.workspace, channels=self.channels, channel_name_regex_enabled=self.channel_regex_enabled, # NOTE: need to impute to `None` instead of using 0.0, since Slack will @@ -440,7 +432,6 @@ if __name__ == "__main__": slack_channel = os.environ.get("SLACK_CHANNEL") connector = SlackPollConnector( - workspace=os.environ["SLACK_WORKSPACE"], channels=[slack_channel] if slack_channel else None, ) connector.load_credentials({"slack_bot_token": os.environ["SLACK_BOT_TOKEN"]}) diff --git a/backend/danswer/connectors/slack/load_connector.py b/backend/danswer/connectors/slack/load_connector.py deleted file mode 100644 index 7350ac6284..0000000000 --- a/backend/danswer/connectors/slack/load_connector.py +++ /dev/null @@ -1,140 +0,0 @@ -import json -import os -from datetime import datetime -from datetime import timezone -from pathlib import Path -from typing import Any -from typing import cast - -from danswer.configs.app_configs import INDEX_BATCH_SIZE -from danswer.configs.constants import DocumentSource -from danswer.connectors.interfaces import GenerateDocumentsOutput -from danswer.connectors.interfaces import LoadConnector -from danswer.connectors.models import Document -from danswer.connectors.models import Section -from danswer.connectors.slack.connector import filter_channels -from danswer.connectors.slack.utils import get_message_link -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def get_event_time(event: dict[str, Any]) -> datetime | None: - ts = event.get("ts") - if not ts: - return None - return datetime.fromtimestamp(float(ts), tz=timezone.utc) - - -class SlackLoadConnector(LoadConnector): - # WARNING: DEPRECATED, DO NOT USE - def __init__( - self, - workspace: str, - export_path_str: str, - channels: list[str] | None = None, - # if specified, will treat the specified channel strings as - # regexes, and will only index channels that fully match the regexes - channel_regex_enabled: bool = False, - batch_size: int = INDEX_BATCH_SIZE, - ) -> None: - self.workspace = workspace - self.channels = channels - self.channel_regex_enabled = channel_regex_enabled - self.export_path_str = export_path_str - self.batch_size = batch_size - - def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - if credentials: - logger.warning("Unexpected credentials provided for Slack Load Connector") - return None - - @staticmethod - def _process_batch_event( - slack_event: dict[str, Any], - channel: dict[str, Any], - matching_doc: Document | None, - workspace: str, - ) -> Document | None: - if ( - slack_event["type"] == "message" - and slack_event.get("subtype") != "channel_join" - ): - if matching_doc: - return Document( - id=matching_doc.id, - sections=matching_doc.sections - + [ - Section( - link=get_message_link( - event=slack_event, - workspace=workspace, - channel_id=channel["id"], - ), - text=slack_event["text"], - ) - ], - source=matching_doc.source, - semantic_identifier=matching_doc.semantic_identifier, - title="", # slack docs don't really have a "title" - doc_updated_at=get_event_time(slack_event), - metadata=matching_doc.metadata, - ) - - return Document( - id=slack_event["ts"], - sections=[ - Section( - link=get_message_link( - event=slack_event, - workspace=workspace, - channel_id=channel["id"], - ), - text=slack_event["text"], - ) - ], - source=DocumentSource.SLACK, - semantic_identifier=channel["name"], - title="", # slack docs don't really have a "title" - doc_updated_at=get_event_time(slack_event), - metadata={}, - ) - - return None - - def load_from_state(self) -> GenerateDocumentsOutput: - export_path = Path(self.export_path_str) - - with open(export_path / "channels.json") as f: - all_channels = json.load(f) - - filtered_channels = filter_channels( - all_channels, self.channels, self.channel_regex_enabled - ) - - document_batch: dict[str, Document] = {} - for channel_info in filtered_channels: - channel_dir_path = export_path / cast(str, channel_info["name"]) - channel_file_paths = [ - channel_dir_path / file_name - for file_name in os.listdir(channel_dir_path) - ] - for path in channel_file_paths: - with open(path) as f: - events = cast(list[dict[str, Any]], json.load(f)) - for slack_event in events: - doc = self._process_batch_event( - slack_event=slack_event, - channel=channel_info, - matching_doc=document_batch.get( - slack_event.get("thread_ts", "") - ), - workspace=self.workspace, - ) - if doc: - document_batch[doc.id] = doc - if len(document_batch) >= self.batch_size: - yield list(document_batch.values()) - - yield list(document_batch.values()) diff --git a/backend/danswer/connectors/slack/utils.py b/backend/danswer/connectors/slack/utils.py index 78bc42a092..62ac749c16 100644 --- a/backend/danswer/connectors/slack/utils.py +++ b/backend/danswer/connectors/slack/utils.py @@ -2,6 +2,7 @@ import re import time from collections.abc import Callable from collections.abc import Generator +from functools import lru_cache from functools import wraps from typing import Any from typing import cast @@ -21,19 +22,21 @@ basic_retry_wrapper = retry_builder() _SLACK_LIMIT = 900 +@lru_cache() +def get_base_url(token: str) -> str: + """Retrieve and cache the base URL of the Slack workspace based on the client token.""" + client = WebClient(token=token) + return client.auth_test()["url"] + + def get_message_link( - event: dict[str, Any], workspace: str, channel_id: str | None = None + event: dict[str, Any], client: WebClient, channel_id: str | None = None ) -> str: - channel_id = channel_id or cast( - str, event["channel"] - ) # channel must either be present in the event or passed in - message_ts = cast(str, event["ts"]) - message_ts_without_dot = message_ts.replace(".", "") - thread_ts = cast(str | None, event.get("thread_ts")) - return ( - f"https://{workspace}.slack.com/archives/{channel_id}/p{message_ts_without_dot}" - + (f"?thread_ts={thread_ts}" if thread_ts else "") - ) + channel_id = channel_id or event["channel"] + message_ts = event["ts"] + response = client.chat_getPermalink(channel=channel_id, message_ts=message_ts) + permalink = response["permalink"] + return permalink def _make_slack_api_call_logged( diff --git a/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py index 74763045a0..6396d7ca05 100644 --- a/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py +++ b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py @@ -67,7 +67,6 @@ def test_slack_permission_sync( input_type=InputType.POLL, source=DocumentSource.SLACK, connector_specific_config={ - "workspace": "onyx-test-workspace", "channels": [public_channel["name"], private_channel["name"]], }, access_type=AccessType.SYNC, @@ -281,7 +280,6 @@ def test_slack_group_permission_sync( input_type=InputType.POLL, source=DocumentSource.SLACK, connector_specific_config={ - "workspace": "onyx-test-workspace", "channels": [private_channel["name"]], }, access_type=AccessType.SYNC, diff --git a/backend/tests/integration/connector_job_tests/slack/test_prune.py b/backend/tests/integration/connector_job_tests/slack/test_prune.py index b2decb6584..774cf39e2e 100644 --- a/backend/tests/integration/connector_job_tests/slack/test_prune.py +++ b/backend/tests/integration/connector_job_tests/slack/test_prune.py @@ -61,7 +61,6 @@ def test_slack_prune( input_type=InputType.POLL, source=DocumentSource.SLACK, connector_specific_config={ - "workspace": "onyx-test-workspace", "channels": [public_channel["name"], private_channel["name"]], }, access_type=AccessType.PUBLIC, diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index 81330bedf6..3fa1fda821 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -546,15 +546,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of ' }, slack: { description: "Configure Slack connector", - values: [ - { - type: "text", - query: "Enter the Slack workspace:", - label: "Workspace", - name: "workspace", - optional: false, - }, - ], + values: [], advanced_values: [ { type: "list",