mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-21 05:20:55 +02:00
remove slack workspace (#3394)
* remove slack workspace * update client tokens * fix up * clean up docs * fix up tests
This commit is contained in:
parent
748b79b0ef
commit
0770a587f1
@ -41,7 +41,6 @@ from danswer.connectors.salesforce.connector import SalesforceConnector
|
||||
from danswer.connectors.sharepoint.connector import SharepointConnector
|
||||
from danswer.connectors.slab.connector import SlabConnector
|
||||
from danswer.connectors.slack.connector import SlackPollConnector
|
||||
from danswer.connectors.slack.load_connector import SlackLoadConnector
|
||||
from danswer.connectors.teams.connector import TeamsConnector
|
||||
from danswer.connectors.web.connector import WebConnector
|
||||
from danswer.connectors.wikipedia.connector import WikipediaConnector
|
||||
@ -64,7 +63,6 @@ def identify_connector_class(
|
||||
DocumentSource.WEB: WebConnector,
|
||||
DocumentSource.FILE: LocalFileConnector,
|
||||
DocumentSource.SLACK: {
|
||||
InputType.LOAD_STATE: SlackLoadConnector,
|
||||
InputType.POLL: SlackPollConnector,
|
||||
InputType.SLIM_RETRIEVAL: SlackPollConnector,
|
||||
},
|
||||
|
@ -134,7 +134,6 @@ def get_latest_message_time(thread: ThreadType) -> datetime:
|
||||
|
||||
|
||||
def thread_to_doc(
|
||||
workspace: str,
|
||||
channel: ChannelType,
|
||||
thread: ThreadType,
|
||||
slack_cleaner: SlackTextCleaner,
|
||||
@ -179,9 +178,7 @@ def thread_to_doc(
|
||||
id=f"{channel_id}__{thread[0]['ts']}",
|
||||
sections=[
|
||||
Section(
|
||||
link=get_message_link(
|
||||
event=m, workspace=workspace, channel_id=channel_id
|
||||
),
|
||||
link=get_message_link(event=m, client=client, channel_id=channel_id),
|
||||
text=slack_cleaner.index_clean(cast(str, m["text"])),
|
||||
)
|
||||
for m in thread
|
||||
@ -265,7 +262,6 @@ def filter_channels(
|
||||
|
||||
def _get_all_docs(
|
||||
client: WebClient,
|
||||
workspace: str,
|
||||
channels: list[str] | None = None,
|
||||
channel_name_regex_enabled: bool = False,
|
||||
oldest: str | None = None,
|
||||
@ -312,7 +308,6 @@ def _get_all_docs(
|
||||
if filtered_thread:
|
||||
channel_docs += 1
|
||||
yield thread_to_doc(
|
||||
workspace=workspace,
|
||||
channel=channel,
|
||||
thread=filtered_thread,
|
||||
slack_cleaner=slack_cleaner,
|
||||
@ -375,14 +370,12 @@ def _get_all_doc_ids(
|
||||
class SlackPollConnector(PollConnector, SlimConnector):
|
||||
def __init__(
|
||||
self,
|
||||
workspace: str,
|
||||
channels: list[str] | None = None,
|
||||
# if specified, will treat the specified channel strings as
|
||||
# regexes, and will only index channels that fully match the regexes
|
||||
channel_regex_enabled: bool = False,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.workspace = workspace
|
||||
self.channels = channels
|
||||
self.channel_regex_enabled = channel_regex_enabled
|
||||
self.batch_size = batch_size
|
||||
@ -416,7 +409,6 @@ class SlackPollConnector(PollConnector, SlimConnector):
|
||||
documents: list[Document] = []
|
||||
for document in _get_all_docs(
|
||||
client=self.client,
|
||||
workspace=self.workspace,
|
||||
channels=self.channels,
|
||||
channel_name_regex_enabled=self.channel_regex_enabled,
|
||||
# NOTE: need to impute to `None` instead of using 0.0, since Slack will
|
||||
@ -440,7 +432,6 @@ if __name__ == "__main__":
|
||||
|
||||
slack_channel = os.environ.get("SLACK_CHANNEL")
|
||||
connector = SlackPollConnector(
|
||||
workspace=os.environ["SLACK_WORKSPACE"],
|
||||
channels=[slack_channel] if slack_channel else None,
|
||||
)
|
||||
connector.load_credentials({"slack_bot_token": os.environ["SLACK_BOT_TOKEN"]})
|
||||
|
@ -1,140 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.connectors.slack.connector import filter_channels
|
||||
from danswer.connectors.slack.utils import get_message_link
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def get_event_time(event: dict[str, Any]) -> datetime | None:
|
||||
ts = event.get("ts")
|
||||
if not ts:
|
||||
return None
|
||||
return datetime.fromtimestamp(float(ts), tz=timezone.utc)
|
||||
|
||||
|
||||
class SlackLoadConnector(LoadConnector):
|
||||
# WARNING: DEPRECATED, DO NOT USE
|
||||
def __init__(
|
||||
self,
|
||||
workspace: str,
|
||||
export_path_str: str,
|
||||
channels: list[str] | None = None,
|
||||
# if specified, will treat the specified channel strings as
|
||||
# regexes, and will only index channels that fully match the regexes
|
||||
channel_regex_enabled: bool = False,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.workspace = workspace
|
||||
self.channels = channels
|
||||
self.channel_regex_enabled = channel_regex_enabled
|
||||
self.export_path_str = export_path_str
|
||||
self.batch_size = batch_size
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
if credentials:
|
||||
logger.warning("Unexpected credentials provided for Slack Load Connector")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _process_batch_event(
|
||||
slack_event: dict[str, Any],
|
||||
channel: dict[str, Any],
|
||||
matching_doc: Document | None,
|
||||
workspace: str,
|
||||
) -> Document | None:
|
||||
if (
|
||||
slack_event["type"] == "message"
|
||||
and slack_event.get("subtype") != "channel_join"
|
||||
):
|
||||
if matching_doc:
|
||||
return Document(
|
||||
id=matching_doc.id,
|
||||
sections=matching_doc.sections
|
||||
+ [
|
||||
Section(
|
||||
link=get_message_link(
|
||||
event=slack_event,
|
||||
workspace=workspace,
|
||||
channel_id=channel["id"],
|
||||
),
|
||||
text=slack_event["text"],
|
||||
)
|
||||
],
|
||||
source=matching_doc.source,
|
||||
semantic_identifier=matching_doc.semantic_identifier,
|
||||
title="", # slack docs don't really have a "title"
|
||||
doc_updated_at=get_event_time(slack_event),
|
||||
metadata=matching_doc.metadata,
|
||||
)
|
||||
|
||||
return Document(
|
||||
id=slack_event["ts"],
|
||||
sections=[
|
||||
Section(
|
||||
link=get_message_link(
|
||||
event=slack_event,
|
||||
workspace=workspace,
|
||||
channel_id=channel["id"],
|
||||
),
|
||||
text=slack_event["text"],
|
||||
)
|
||||
],
|
||||
source=DocumentSource.SLACK,
|
||||
semantic_identifier=channel["name"],
|
||||
title="", # slack docs don't really have a "title"
|
||||
doc_updated_at=get_event_time(slack_event),
|
||||
metadata={},
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
export_path = Path(self.export_path_str)
|
||||
|
||||
with open(export_path / "channels.json") as f:
|
||||
all_channels = json.load(f)
|
||||
|
||||
filtered_channels = filter_channels(
|
||||
all_channels, self.channels, self.channel_regex_enabled
|
||||
)
|
||||
|
||||
document_batch: dict[str, Document] = {}
|
||||
for channel_info in filtered_channels:
|
||||
channel_dir_path = export_path / cast(str, channel_info["name"])
|
||||
channel_file_paths = [
|
||||
channel_dir_path / file_name
|
||||
for file_name in os.listdir(channel_dir_path)
|
||||
]
|
||||
for path in channel_file_paths:
|
||||
with open(path) as f:
|
||||
events = cast(list[dict[str, Any]], json.load(f))
|
||||
for slack_event in events:
|
||||
doc = self._process_batch_event(
|
||||
slack_event=slack_event,
|
||||
channel=channel_info,
|
||||
matching_doc=document_batch.get(
|
||||
slack_event.get("thread_ts", "")
|
||||
),
|
||||
workspace=self.workspace,
|
||||
)
|
||||
if doc:
|
||||
document_batch[doc.id] = doc
|
||||
if len(document_batch) >= self.batch_size:
|
||||
yield list(document_batch.values())
|
||||
|
||||
yield list(document_batch.values())
|
@ -2,6 +2,7 @@ import re
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from functools import lru_cache
|
||||
from functools import wraps
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
@ -21,19 +22,21 @@ basic_retry_wrapper = retry_builder()
|
||||
_SLACK_LIMIT = 900
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_base_url(token: str) -> str:
|
||||
"""Retrieve and cache the base URL of the Slack workspace based on the client token."""
|
||||
client = WebClient(token=token)
|
||||
return client.auth_test()["url"]
|
||||
|
||||
|
||||
def get_message_link(
|
||||
event: dict[str, Any], workspace: str, channel_id: str | None = None
|
||||
event: dict[str, Any], client: WebClient, channel_id: str | None = None
|
||||
) -> str:
|
||||
channel_id = channel_id or cast(
|
||||
str, event["channel"]
|
||||
) # channel must either be present in the event or passed in
|
||||
message_ts = cast(str, event["ts"])
|
||||
message_ts_without_dot = message_ts.replace(".", "")
|
||||
thread_ts = cast(str | None, event.get("thread_ts"))
|
||||
return (
|
||||
f"https://{workspace}.slack.com/archives/{channel_id}/p{message_ts_without_dot}"
|
||||
+ (f"?thread_ts={thread_ts}" if thread_ts else "")
|
||||
)
|
||||
channel_id = channel_id or event["channel"]
|
||||
message_ts = event["ts"]
|
||||
response = client.chat_getPermalink(channel=channel_id, message_ts=message_ts)
|
||||
permalink = response["permalink"]
|
||||
return permalink
|
||||
|
||||
|
||||
def _make_slack_api_call_logged(
|
||||
|
@ -67,7 +67,6 @@ def test_slack_permission_sync(
|
||||
input_type=InputType.POLL,
|
||||
source=DocumentSource.SLACK,
|
||||
connector_specific_config={
|
||||
"workspace": "onyx-test-workspace",
|
||||
"channels": [public_channel["name"], private_channel["name"]],
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
@ -281,7 +280,6 @@ def test_slack_group_permission_sync(
|
||||
input_type=InputType.POLL,
|
||||
source=DocumentSource.SLACK,
|
||||
connector_specific_config={
|
||||
"workspace": "onyx-test-workspace",
|
||||
"channels": [private_channel["name"]],
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
|
@ -61,7 +61,6 @@ def test_slack_prune(
|
||||
input_type=InputType.POLL,
|
||||
source=DocumentSource.SLACK,
|
||||
connector_specific_config={
|
||||
"workspace": "onyx-test-workspace",
|
||||
"channels": [public_channel["name"], private_channel["name"]],
|
||||
},
|
||||
access_type=AccessType.PUBLIC,
|
||||
|
@ -546,15 +546,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
|
||||
},
|
||||
slack: {
|
||||
description: "Configure Slack connector",
|
||||
values: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the Slack workspace:",
|
||||
label: "Workspace",
|
||||
name: "workspace",
|
||||
optional: false,
|
||||
},
|
||||
],
|
||||
values: [],
|
||||
advanced_values: [
|
||||
{
|
||||
type: "list",
|
||||
|
Loading…
x
Reference in New Issue
Block a user