Add regex support for Slack channels

This commit is contained in:
Weves 2024-01-29 19:54:42 -08:00 committed by Chris Weaver
parent d75ca0542a
commit b076c3d1ea
4 changed files with 70 additions and 15 deletions

View File

@ -1,4 +1,5 @@
import json
import re
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
@ -204,11 +205,23 @@ def _default_msg_filter(message: MessageType) -> bool:
def _filter_channels(
all_channels: list[dict[str, Any]], channels_to_connect: list[str] | None
all_channels: list[dict[str, Any]],
channels_to_connect: list[str] | None,
regex_enabled: bool,
) -> list[dict[str, Any]]:
if not channels_to_connect:
return all_channels
if regex_enabled:
return [
channel
for channel in all_channels
if any(
re.fullmatch(channel_to_connect, channel["name"])
for channel_to_connect in channels_to_connect
)
]
# validate that all channels in `channels_to_connect` are valid
# fail loudly in the case of an invalid channel so that the user
# knows that one of the channels they've specified is typo'd or private
@ -229,6 +242,7 @@ def get_all_docs(
client: WebClient,
workspace: str,
channels: list[str] | None = None,
channel_name_regex_enabled: bool = False,
oldest: str | None = None,
latest: str | None = None,
msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
@ -237,7 +251,9 @@ def get_all_docs(
slack_cleaner = SlackTextCleaner(client=client)
all_channels = get_channels(client)
filtered_channels = _filter_channels(all_channels, channels)
filtered_channels = _filter_channels(
all_channels, channels, channel_name_regex_enabled
)
for channel in filtered_channels:
channel_docs = 0
@ -285,13 +301,14 @@ class SlackLoadConnector(LoadConnector):
workspace: str,
export_path_str: str,
channels: list[str] | None = None,
# if specified, will only include channels that match at least one of these
# regexes OR are in `channels`
channel_regexes: list[str] | None = None,
# if specified, will treat the specified channel strings as
# regexes, and will only index channels that fully match the regexes
channel_regex_enabled: bool = False,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.workspace = workspace
self.channels = channels
self.channel_regex_enabled = channel_regex_enabled
self.export_path_str = export_path_str
self.batch_size = batch_size
@ -359,7 +376,9 @@ class SlackLoadConnector(LoadConnector):
with open(export_path / "channels.json") as f:
all_channels = json.load(f)
filtered_channels = _filter_channels(all_channels, self.channels)
filtered_channels = _filter_channels(
all_channels, self.channels, self.channel_regex_enabled
)
document_batch: dict[str, Document] = {}
for channel_info in filtered_channels:
@ -393,10 +412,14 @@ class SlackPollConnector(PollConnector):
self,
workspace: str,
channels: list[str] | None = None,
# if specified, will treat the specified channel strings as
# regexes, and will only index channels that fully match the regexes
channel_regex_enabled: bool = False,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.workspace = workspace
self.channels = channels
self.channel_regex_enabled = channel_regex_enabled
self.batch_size = batch_size
self.client: WebClient | None = None
@ -416,6 +439,7 @@ class SlackPollConnector(PollConnector):
client=self.client,
workspace=self.workspace,
channels=self.channels,
channel_name_regex_enabled=self.channel_regex_enabled,
# NOTE: need to impute to `None` instead of using 0.0, since Slack will
# throw an error if we use 0.0 on an account without infinite data
# retention

View File

@ -17,6 +17,8 @@ import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
TextFormField,
TextArrayFieldBuilder,
BooleanFormField,
TextArrayField,
} from "@/components/admin/connectors/Field";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
@ -207,15 +209,38 @@ const MainSection = () => {
<TextFormField name="workspace" label="Workspace" />
</>
}
formBodyBuilder={TextArrayFieldBuilder({
name: "channels",
label: "Channels:",
subtext:
"Specify 0 or more channels to index. For example, specifying the channel " +
"'support' will cause us to only index all content " +
"within the '#support' channel. " +
"If no channels are specified, all channels in your workspace will be indexed.",
})}
formBodyBuilder={(values) => {
return (
<>
<Divider />
{TextArrayFieldBuilder({
name: "channels",
label: "Channels:",
subtext: `
Specify 0 or more channels to index. For example, specifying the channel
"support" will cause us to only index all content within the "#support" channel.
If no channels are specified, all channels in your workspace will be indexed.`,
})(values)}
<BooleanFormField
name="channel_regex_enabled"
label="Regex Enabled?"
subtext={
<div>
If enabled, we will treat the &quot;channels&quot;
specified above as regular expressions. A channel&apos;s
messages will be pulled in by the connector if the name
of the channel fully matches any of the specified
regular expressions.
<br />
For example, specifying <i>.*-support.*</i> as a
&quot;channel&quot; will cause the connector to include
any channels with &quot;-support&quot; in the name.
</div>
}
/>
</>
);
}}
validationSchema={Yup.object().shape({
workspace: Yup.string().required(
"Please enter the workspace to index"
@ -223,10 +248,12 @@ const MainSection = () => {
channels: Yup.array()
.of(Yup.string().required("Channel names must be strings"))
.required(),
channel_regex_enabled: Yup.boolean().required(),
})}
initialValues={{
workspace: "",
channels: [],
channel_regex_enabled: false,
}}
refreshFreq={10 * 60} // 10 minutes
credentialId={slackCredential.id}

View File

@ -83,6 +83,9 @@ export const ConnectorTitle = ({
typedConnector.connector_specific_config.channels.join(", ")
);
}
if (typedConnector.connector_specific_config.channel_regex_enabled) {
additionalMetadata.set("Channel Regex Enabled", "True");
}
} else if (connector.source === "zulip") {
const typedConnector = connector as Connector<ZulipConfig>;
additionalMetadata.set(

View File

@ -114,6 +114,7 @@ export interface ProductboardConfig {}
export interface SlackConfig {
workspace: string;
channels?: string[];
channel_regex_enabled?: boolean;
}
export interface SlabConfig {