mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-03 03:31:09 +02:00
Add regex support for Slack channels
This commit is contained in:
parent
d75ca0542a
commit
b076c3d1ea
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
@ -204,11 +205,23 @@ def _default_msg_filter(message: MessageType) -> bool:
|
||||
|
||||
|
||||
def _filter_channels(
|
||||
all_channels: list[dict[str, Any]], channels_to_connect: list[str] | None
|
||||
all_channels: list[dict[str, Any]],
|
||||
channels_to_connect: list[str] | None,
|
||||
regex_enabled: bool,
|
||||
) -> list[dict[str, Any]]:
|
||||
if not channels_to_connect:
|
||||
return all_channels
|
||||
|
||||
if regex_enabled:
|
||||
return [
|
||||
channel
|
||||
for channel in all_channels
|
||||
if any(
|
||||
re.fullmatch(channel_to_connect, channel["name"])
|
||||
for channel_to_connect in channels_to_connect
|
||||
)
|
||||
]
|
||||
|
||||
# validate that all channels in `channels_to_connect` are valid
|
||||
# fail loudly in the case of an invalid channel so that the user
|
||||
# knows that one of the channels they've specified is typo'd or private
|
||||
@ -229,6 +242,7 @@ def get_all_docs(
|
||||
client: WebClient,
|
||||
workspace: str,
|
||||
channels: list[str] | None = None,
|
||||
channel_name_regex_enabled: bool = False,
|
||||
oldest: str | None = None,
|
||||
latest: str | None = None,
|
||||
msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
|
||||
@ -237,7 +251,9 @@ def get_all_docs(
|
||||
slack_cleaner = SlackTextCleaner(client=client)
|
||||
|
||||
all_channels = get_channels(client)
|
||||
filtered_channels = _filter_channels(all_channels, channels)
|
||||
filtered_channels = _filter_channels(
|
||||
all_channels, channels, channel_name_regex_enabled
|
||||
)
|
||||
|
||||
for channel in filtered_channels:
|
||||
channel_docs = 0
|
||||
@ -285,13 +301,14 @@ class SlackLoadConnector(LoadConnector):
|
||||
workspace: str,
|
||||
export_path_str: str,
|
||||
channels: list[str] | None = None,
|
||||
# if specified, will only include channels that match at least one of these
|
||||
# regexes OR are in `channels`
|
||||
channel_regexes: list[str] | None = None,
|
||||
# if specified, will treat the specified channel strings as
|
||||
# regexes, and will only index channels that fully match the regexes
|
||||
channel_regex_enabled: bool = False,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.workspace = workspace
|
||||
self.channels = channels
|
||||
self.channel_regex_enabled = channel_regex_enabled
|
||||
self.export_path_str = export_path_str
|
||||
self.batch_size = batch_size
|
||||
|
||||
@ -359,7 +376,9 @@ class SlackLoadConnector(LoadConnector):
|
||||
with open(export_path / "channels.json") as f:
|
||||
all_channels = json.load(f)
|
||||
|
||||
filtered_channels = _filter_channels(all_channels, self.channels)
|
||||
filtered_channels = _filter_channels(
|
||||
all_channels, self.channels, self.channel_regex_enabled
|
||||
)
|
||||
|
||||
document_batch: dict[str, Document] = {}
|
||||
for channel_info in filtered_channels:
|
||||
@ -393,10 +412,14 @@ class SlackPollConnector(PollConnector):
|
||||
self,
|
||||
workspace: str,
|
||||
channels: list[str] | None = None,
|
||||
# if specified, will treat the specified channel strings as
|
||||
# regexes, and will only index channels that fully match the regexes
|
||||
channel_regex_enabled: bool = False,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.workspace = workspace
|
||||
self.channels = channels
|
||||
self.channel_regex_enabled = channel_regex_enabled
|
||||
self.batch_size = batch_size
|
||||
self.client: WebClient | None = None
|
||||
|
||||
@ -416,6 +439,7 @@ class SlackPollConnector(PollConnector):
|
||||
client=self.client,
|
||||
workspace=self.workspace,
|
||||
channels=self.channels,
|
||||
channel_name_regex_enabled=self.channel_regex_enabled,
|
||||
# NOTE: need to impute to `None` instead of using 0.0, since Slack will
|
||||
# throw an error if we use 0.0 on an account without infinite data
|
||||
# retention
|
||||
|
@ -17,6 +17,8 @@ import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import {
|
||||
TextFormField,
|
||||
TextArrayFieldBuilder,
|
||||
BooleanFormField,
|
||||
TextArrayField,
|
||||
} from "@/components/admin/connectors/Field";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
@ -207,15 +209,38 @@ const MainSection = () => {
|
||||
<TextFormField name="workspace" label="Workspace" />
|
||||
</>
|
||||
}
|
||||
formBodyBuilder={TextArrayFieldBuilder({
|
||||
name: "channels",
|
||||
label: "Channels:",
|
||||
subtext:
|
||||
"Specify 0 or more channels to index. For example, specifying the channel " +
|
||||
"'support' will cause us to only index all content " +
|
||||
"within the '#support' channel. " +
|
||||
"If no channels are specified, all channels in your workspace will be indexed.",
|
||||
})}
|
||||
formBodyBuilder={(values) => {
|
||||
return (
|
||||
<>
|
||||
<Divider />
|
||||
{TextArrayFieldBuilder({
|
||||
name: "channels",
|
||||
label: "Channels:",
|
||||
subtext: `
|
||||
Specify 0 or more channels to index. For example, specifying the channel
|
||||
"support" will cause us to only index all content within the "#support" channel.
|
||||
If no channels are specified, all channels in your workspace will be indexed.`,
|
||||
})(values)}
|
||||
<BooleanFormField
|
||||
name="channel_regex_enabled"
|
||||
label="Regex Enabled?"
|
||||
subtext={
|
||||
<div>
|
||||
If enabled, we will treat the "channels"
|
||||
specified above as regular expressions. A channel's
|
||||
messages will be pulled in by the connector if the name
|
||||
of the channel fully matches any of the specified
|
||||
regular expressions.
|
||||
<br />
|
||||
For example, specifying <i>.*-support.*</i> as a
|
||||
"channel" will cause the connector to include
|
||||
any channels with "-support" in the name.
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
</>
|
||||
);
|
||||
}}
|
||||
validationSchema={Yup.object().shape({
|
||||
workspace: Yup.string().required(
|
||||
"Please enter the workspace to index"
|
||||
@ -223,10 +248,12 @@ const MainSection = () => {
|
||||
channels: Yup.array()
|
||||
.of(Yup.string().required("Channel names must be strings"))
|
||||
.required(),
|
||||
channel_regex_enabled: Yup.boolean().required(),
|
||||
})}
|
||||
initialValues={{
|
||||
workspace: "",
|
||||
channels: [],
|
||||
channel_regex_enabled: false,
|
||||
}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
credentialId={slackCredential.id}
|
||||
|
@ -83,6 +83,9 @@ export const ConnectorTitle = ({
|
||||
typedConnector.connector_specific_config.channels.join(", ")
|
||||
);
|
||||
}
|
||||
if (typedConnector.connector_specific_config.channel_regex_enabled) {
|
||||
additionalMetadata.set("Channel Regex Enabled", "True");
|
||||
}
|
||||
} else if (connector.source === "zulip") {
|
||||
const typedConnector = connector as Connector<ZulipConfig>;
|
||||
additionalMetadata.set(
|
||||
|
@ -114,6 +114,7 @@ export interface ProductboardConfig {}
|
||||
export interface SlackConfig {
|
||||
workspace: string;
|
||||
channels?: string[];
|
||||
channel_regex_enabled?: boolean;
|
||||
}
|
||||
|
||||
export interface SlabConfig {
|
||||
|
Loading…
x
Reference in New Issue
Block a user