Add cql support for confluence connector (#2679)

* Added CQL support for Confluence

* changed string substitutions for CQL

* final cleanup

* updated string fixes

* remove print statements

* Update description
This commit is contained in:
hagen-danswer 2024-10-10 12:16:56 -07:00 committed by GitHub
parent 101b010c5c
commit 1f4fe42f4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 339 additions and 198 deletions

View File

@ -1,5 +1,6 @@
import io
import os
import re
from collections.abc import Callable
from collections.abc import Collection
from datetime import datetime
@ -56,8 +57,101 @@ NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = (
)
class DanswerConfluence(Confluence):
"""
This is a custom Confluence class that overrides the default Confluence class to add a custom CQL method.
This is necessary because the default Confluence class does not properly support cql expansions.
"""
def __init__(self, url: str, *args: Any, **kwargs: Any) -> None:
super(DanswerConfluence, self).__init__(url, *args, **kwargs)
def danswer_cql(
self,
cql: str,
expand: str | None = None,
start: int = 0,
limit: int = 500,
include_archived_spaces: bool = False,
) -> list[dict[str, Any]]:
# Performs the query expansion and start/limit url additions
url_suffix = f"rest/api/content/search?cql={cql}"
if expand:
url_suffix += f"&expand={expand}"
url_suffix += f"&start={start}&limit={limit}"
if include_archived_spaces:
url_suffix += "&includeArchivedSpaces=true"
try:
response = self.get(url_suffix)
return response.get("results", [])
except Exception as e:
raise e
def _replace_cql_time_filter(
cql_query: str, start_time: datetime, end_time: datetime
) -> str:
"""
This function replaces the lastmodified filter in the CQL query with the start and end times.
This selects the more restrictive time range.
"""
# Extract existing lastmodified >= and <= filters
existing_start_match = re.search(
r'lastmodified\s*>=\s*["\']?(\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)["\']?',
cql_query,
flags=re.IGNORECASE,
)
existing_end_match = re.search(
r'lastmodified\s*<=\s*["\']?(\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)["\']?',
cql_query,
flags=re.IGNORECASE,
)
# Remove all existing lastmodified and updated filters
cql_query = re.sub(
r'\s*AND\s+(lastmodified|updated)\s*[<>=]+\s*["\']?[\d-]+(?:\s+[\d:]+)?["\']?',
"",
cql_query,
flags=re.IGNORECASE,
)
# Determine the start time to use
if existing_start_match:
existing_start_str = existing_start_match.group(1)
existing_start = datetime.strptime(
existing_start_str,
"%Y-%m-%d %H:%M" if " " in existing_start_str else "%Y-%m-%d",
)
existing_start = existing_start.replace(
tzinfo=timezone.utc
) # Make offset-aware
start_time_to_use = max(start_time.astimezone(timezone.utc), existing_start)
else:
start_time_to_use = start_time.astimezone(timezone.utc)
# Determine the end time to use
if existing_end_match:
existing_end_str = existing_end_match.group(1)
existing_end = datetime.strptime(
existing_end_str,
"%Y-%m-%d %H:%M" if " " in existing_end_str else "%Y-%m-%d",
)
existing_end = existing_end.replace(tzinfo=timezone.utc) # Make offset-aware
end_time_to_use = min(end_time.astimezone(timezone.utc), existing_end)
else:
end_time_to_use = end_time.astimezone(timezone.utc)
# Add new time filters
cql_query += (
f" and lastmodified >= '{start_time_to_use.strftime('%Y-%m-%d %H:%M')}'"
)
cql_query += f" and lastmodified <= '{end_time_to_use.strftime('%Y-%m-%d %H:%M')}'"
return cql_query.strip()
@lru_cache()
def _get_user(user_id: str, confluence_client: Confluence) -> str:
def _get_user(user_id: str, confluence_client: DanswerConfluence) -> str:
"""Get Confluence Display Name based on the account-id or userkey value
Args:
@ -81,7 +175,7 @@ def _get_user(user_id: str, confluence_client: Confluence) -> str:
return user_not_found
def parse_html_page(text: str, confluence_client: Confluence) -> str:
def parse_html_page(text: str, confluence_client: DanswerConfluence) -> str:
"""Parse a Confluence html page and replace the 'user Id' by the real
User Display Name
@ -112,7 +206,7 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str:
def _comment_dfs(
comments_str: str,
comment_pages: Collection[dict[str, Any]],
confluence_client: Confluence,
confluence_client: DanswerConfluence,
) -> str:
get_page_child_by_type = make_confluence_call_handle_rate_limit(
confluence_client.get_page_child_by_type
@ -159,7 +253,7 @@ class RecursiveIndexer:
def __init__(
self,
batch_size: int,
confluence_client: Confluence,
confluence_client: DanswerConfluence,
index_recursively: bool,
origin_page_id: str,
) -> None:
@ -285,8 +379,8 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def __init__(
self,
wiki_base: str,
space: str,
is_cloud: bool,
space: str = "",
page_id: str = "",
index_recursively: bool = True,
batch_size: int = INDEX_BATCH_SIZE,
@ -295,35 +389,44 @@ class ConfluenceConnector(LoadConnector, PollConnector):
# skip it. This is generally used to avoid indexing extra sensitive
# pages.
labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
cql_query: str | None = None,
) -> None:
self.batch_size = batch_size
self.continue_on_failure = continue_on_failure
self.labels_to_skip = set(labels_to_skip)
self.recursive_indexer: RecursiveIndexer | None = None
self.index_recursively = index_recursively
self.index_recursively = False if cql_query else index_recursively
# Remove trailing slash from wiki_base if present
self.wiki_base = wiki_base.rstrip("/")
self.space = space
self.page_id = page_id
self.page_id = "" if cql_query else page_id
self.space_level_scan = bool(not self.page_id)
self.is_cloud = is_cloud
self.space_level_scan = False
self.confluence_client: Confluence | None = None
self.confluence_client: DanswerConfluence | None = None
if self.page_id is None or self.page_id == "":
self.space_level_scan = True
# if a cql_query is provided, we will use it to fetch the pages
# if no cql_query is provided, we will use the space to fetch the pages
# if no space is provided, we will default to fetching all pages, regardless of space
if cql_query:
self.cql_query = cql_query
elif self.space:
self.cql_query = f"type=page and space={self.space}"
else:
self.cql_query = "type=page"
logger.info(
f"wiki_base: {self.wiki_base}, space: {self.space}, page_id: {self.page_id},"
+ f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively}"
+ f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively},"
+ f" cql_query: {self.cql_query}"
)
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
username = credentials["confluence_username"]
access_token = credentials["confluence_access_token"]
self.confluence_client = Confluence(
self.confluence_client = DanswerConfluence(
url=self.wiki_base,
# passing in username causes issues for Confluence data center
username=username if self.is_cloud else None,
@ -334,26 +437,33 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def _fetch_pages(
self,
confluence_client: Confluence,
start_ind: int,
) -> list[dict[str, Any]]:
def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]:
get_all_pages_from_space = make_confluence_call_handle_rate_limit(
confluence_client.get_all_pages_from_space
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
get_all_pages = make_confluence_call_handle_rate_limit(
self.confluence_client.danswer_cql
)
include_archived_spaces = (
CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES
if not self.is_cloud
else False
)
try:
return get_all_pages_from_space(
self.space,
return get_all_pages(
cql=self.cql_query,
start=start_ind,
limit=batch_size,
status=(
None if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES else "current"
),
expand="body.storage.value,version",
include_archived_spaces=include_archived_spaces,
)
except Exception:
logger.warning(
f"Batch failed with space {self.space} at offset {start_ind} "
f"Batch failed with cql {self.cql_query} at offset {start_ind} "
f"with size {batch_size}, processing pages individually..."
)
@ -363,27 +473,23 @@ class ConfluenceConnector(LoadConnector, PollConnector):
# Could be that one of the pages here failed due to this bug:
# https://jira.atlassian.com/browse/CONFCLOUD-76433
view_pages.extend(
get_all_pages_from_space(
self.space,
get_all_pages(
cql=self.cql_query,
start=start_ind + i,
limit=1,
status=(
None
if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES
else "current"
),
expand="body.storage.value,version",
include_archived_spaces=include_archived_spaces,
)
)
except HTTPError as e:
logger.warning(
f"Page failed with space {self.space} at offset {start_ind + i}, "
f"Page failed with cql {self.cql_query} at offset {start_ind + i}, "
f"trying alternative expand option: {e}"
)
# Use view instead, which captures most info but is less complete
view_pages.extend(
get_all_pages_from_space(
self.space,
get_all_pages(
cql=self.cql_query,
start=start_ind + i,
limit=1,
expand="body.view.value,version",
@ -393,6 +499,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return view_pages
def _fetch_page(start_ind: int, batch_size: int) -> list[dict[str, Any]]:
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
if self.recursive_indexer is None:
self.recursive_indexer = RecursiveIndexer(
origin_page_id=self.page_id,
@ -421,7 +530,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
raise e
# error checking phase, only reachable if `self.continue_on_failure=True`
for i in range(self.batch_size):
for _ in range(self.batch_size):
try:
pages = (
_fetch_space(start_ind, self.batch_size)
@ -437,7 +546,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return pages
def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str:
def _fetch_comments(
self, confluence_client: DanswerConfluence, page_id: str
) -> str:
get_page_child_by_type = make_confluence_call_handle_rate_limit(
confluence_client.get_page_child_by_type
)
@ -463,7 +574,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
)
return ""
def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str]:
def _fetch_labels(
self, confluence_client: DanswerConfluence, page_id: str
) -> list[str]:
get_page_labels = make_confluence_call_handle_rate_limit(
confluence_client.get_page_labels
)
@ -577,22 +690,20 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return "\n".join(files_attachment_content), unused_attachments
def _get_doc_batch(
self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
self, start_ind: int
) -> tuple[list[Document], list[dict[str, Any]], int]:
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
doc_batch: list[Document] = []
unused_attachments: list[dict[str, Any]] = []
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
batch = self._fetch_pages(self.confluence_client, start_ind)
batch = self._fetch_pages(start_ind)
for page in batch:
last_modified = _datetime_from_string(page["version"]["when"])
author = cast(str | None, page["version"].get("by", {}).get("email"))
if time_filter and not time_filter(last_modified):
continue
page_id = page["id"]
if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING:
@ -715,17 +826,12 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return doc_batch, end_ind - start_ind
def load_from_state(self) -> GenerateDocumentsOutput:
unused_attachments = []
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
unused_attachments: list[dict[str, Any]] = []
start_ind = 0
while True:
doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch(
start_ind
)
unused_attachments.extend(unused_attachments_batch)
doc_batch, unused_attachments, num_pages = self._get_doc_batch(start_ind)
unused_attachments.extend(unused_attachments)
start_ind += num_pages
if doc_batch:
yield doc_batch
@ -748,7 +854,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
unused_attachments = []
unused_attachments: list[dict[str, Any]] = []
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
@ -756,12 +862,12 @@ class ConfluenceConnector(LoadConnector, PollConnector):
start_time = datetime.fromtimestamp(start, tz=timezone.utc)
end_time = datetime.fromtimestamp(end, tz=timezone.utc)
self.cql_query = _replace_cql_time_filter(self.cql_query, start_time, end_time)
start_ind = 0
while True:
doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch(
start_ind, time_filter=lambda t: start_time <= t <= end_time
)
unused_attachments.extend(unused_attachments_batch)
doc_batch, unused_attachments, num_pages = self._get_doc_batch(start_ind)
unused_attachments.extend(unused_attachments)
start_ind += num_pages
if doc_batch:

View File

@ -1,6 +1,6 @@
"use client";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { FetchError, errorHandlingFetcher } from "@/lib/fetcher";
import useSWR, { mutate } from "swr";
import { HealthCheckBanner } from "@/components/health/healthcheck";
@ -209,7 +209,15 @@ export default function AddConnector({
return (
<Formik
initialValues={createConnectorInitialValues(connector)}
initialValues={{
...createConnectorInitialValues(connector),
...Object.fromEntries(
connectorConfigs[connector].advanced_values.map((field) => [
field.name,
field.default || "",
])
),
}}
validationSchema={createConnectorValidationSchema(connector)}
onSubmit={async (values) => {
const {

View File

@ -1,74 +1,24 @@
import CredentialSubText from "@/components/credentials/CredentialFields";
import { TrashIcon } from "@/components/icons/icons";
import React from "react";
import { ListOption } from "@/lib/connectors/connectors";
import { Field, FieldArray, useField } from "formik";
import { FaPlus } from "react-icons/fa";
import { TextArrayField } from "@/components/admin/connectors/Field";
import { useFormikContext } from "formik";
export default function ListInput({
field,
onUpdate,
}: {
interface ListInputProps {
field: ListOption;
onUpdate?: (values: string[]) => void;
}) {
const [fieldProps, , helpers] = useField(field.name);
}
const ListInput: React.FC<ListInputProps> = ({ field }) => {
const { values } = useFormikContext();
return (
<FieldArray name={field.name}>
{({ push, remove }) => (
<div>
<label
htmlFor={field.name}
className="block text-sm font-medium text-text-700 mb-1"
>
{field.label}
{field.optional && (
<span className="text-text-500 ml-1">(optional)</span>
)}
</label>
{field.description && (
<CredentialSubText>{field.description}</CredentialSubText>
)}
{fieldProps.value.map((value: string, index: number) => (
<div key={index} className="w-full flex mb-4">
<Field
name={`${field.name}.${index}`}
className="w-full bg-input text-sm p-2 border border-border-medium rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 mr-2"
/>
<button
className="p-2 my-auto bg-input flex-none rounded-md bg-red-500 text-white hover:bg-red-600 focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-opacity-50"
type="button"
onClick={() => {
remove(index);
if (onUpdate) {
const newValue = fieldProps.value.filter(
(_: any, i: number) => i !== index
);
onUpdate(newValue);
}
}}
>
<TrashIcon className="text-white my-auto" />
</button>
</div>
))}
<button
type="button"
onClick={() => {
push("");
if (onUpdate) {
onUpdate([...fieldProps.value, ""]);
}
}}
className="mt-2 p-2 bg-rose-500 text-xs text-white rounded-md hover:bg-rose-600 focus:outline-none focus:ring-2 focus:ring-rose-500 focus:ring-opacity-50 flex items-center"
>
<FaPlus className="mr-2" />
Add {field.label}
</button>
</div>
)}
</FieldArray>
<TextArrayField
name={field.name}
label={field.label}
values={Array.isArray(values) ? values : []}
subtext={field.description}
placeholder={`Enter ${field.label.toLowerCase()}`}
/>
);
}
};
export default ListInput;

View File

@ -1,4 +1,4 @@
import React, { Dispatch, FC, SetStateAction } from "react";
import React, { Dispatch, FC, SetStateAction, useState } from "react";
import CredentialSubText, {
AdminBooleanFormField,
} from "@/components/credentials/CredentialFields";
@ -9,6 +9,7 @@ import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField } from "@/components/admin/connectors/Field";
import ListInput from "./ConnectorInput/ListInput";
import FileInput from "./ConnectorInput/FileInput";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
export interface DynamicConnectionFormProps {
config: ConnectionConfiguration;
@ -23,6 +24,61 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
setSelectedFiles,
values,
}) => {
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
const renderField = (field: any) => (
<div key={field.name}>
{field.type === "file" ? (
<FileUpload
name={field.name}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "zip" ? (
<FileInput
name={field.name}
label={field.label}
optional={field.optional}
description={field.description}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "list" ? (
<ListInput field={field} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={field.description}
options={field.options || []}
label={field.label}
/>
) : field.type === "number" ? (
<NumberInput
label={field.label}
optional={field.optional}
description={field.description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={field.description}
name={field.name}
label={field.label}
/>
) : (
<TextFormField
subtext={field.description}
optional={field.optional}
type={field.type}
label={field.label}
name={field.name}
/>
)}
</div>
);
return (
<>
<h2 className="text-2xl font-bold text-text-800">{config.description}</h2>
@ -38,62 +94,17 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
name={"name"}
/>
{config.values.map((field) => {
if (!field.hidden) {
return (
<div key={field.name}>
{field.type == "file" ? (
<FileUpload
name={field.name}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type == "zip" ? (
<FileInput
name={field.name}
label={field.label}
optional={field.optional}
description={field.description}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "list" ? (
<ListInput field={field} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={field.description}
options={field.options || []}
label={field.label}
/>
) : field.type === "number" ? (
<NumberInput
label={field.label}
optional={field.optional}
description={field.description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={field.description}
name={field.name}
label={field.label}
/>
) : (
<TextFormField
subtext={field.description}
optional={field.optional}
type={field.type}
label={field.label}
name={field.name}
/>
)}
</div>
);
}
})}
{config.values.map((field) => !field.hidden && renderField(field))}
{config.advanced_values.length > 0 && (
<>
<AdvancedOptionsToggle
showAdvancedOptions={showAdvancedOptions}
setShowAdvancedOptions={setShowAdvancedOptions}
/>
{showAdvancedOptions && config.advanced_values.map(renderField)}
</>
)}
</>
);
};

View File

@ -86,6 +86,15 @@ export interface ConnectionConfiguration {
| FileOption
| ZipOption
)[];
advanced_values: (
| BooleanOption
| ListOption
| TextOption
| NumberOption
| SelectOption
| FileOption
| ZipOption
)[];
overrideDefaultFreq?: number;
}
@ -116,6 +125,17 @@ export const connectorConfigs: Record<
],
},
],
advanced_values: [
{
type: "number",
query: "Enter the maximum depth to crawl:",
label: "Max Depth",
name: "max_depth",
optional: true,
description:
"The maximum depth to crawl from the base URL. Default is 2.",
},
],
overrideDefaultFreq: 60 * 60 * 24,
},
github: {
@ -152,6 +172,7 @@ export const connectorConfigs: Record<
optional: true,
},
],
advanced_values: [],
},
gitlab: {
description: "Configure GitLab connector",
@ -187,6 +208,7 @@ export const connectorConfigs: Record<
hidden: true,
},
],
advanced_values: [],
},
google_drive: {
description: "Configure Google Drive connector",
@ -223,22 +245,21 @@ export const connectorConfigs: Record<
default: false,
},
],
advanced_values: [],
},
gmail: {
description: "Configure Gmail connector",
values: [],
advanced_values: [],
},
bookstack: {
description: "Configure Bookstack connector",
values: [],
advanced_values: [],
},
confluence: {
description: "Configure Confluence connector",
subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed.
For example, entering "https://your-company.atlassian.net/wiki" as the Wiki Base URL, "KB" as the Space, and "164331" as the Page ID will index the specific page at https:///your-company.atlassian.net/wiki/spaces/KB/pages/164331/Page. If you leave the Page ID empty, it will index the entire KB space.
Selecting the "Index Recursively" checkbox will index the specified page and all of its children.`,
subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed. If no space is specified, all available Confluence spaces will be indexed.`,
values: [
{
type: "text",
@ -254,9 +275,22 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
query: "Enter the space:",
label: "Space",
name: "space",
optional: false,
description: "The Confluence space name to index (e.g. `KB`)",
optional: true,
description:
"The Confluence space name to index (e.g. `KB`). If no space is specified, all available Confluence spaces will be indexed.",
},
{
type: "checkbox",
query: "Is this a Confluence Cloud instance?",
label: "Is Cloud",
name: "is_cloud",
optional: false,
default: true,
description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
},
],
advanced_values: [
{
type: "text",
query: "Enter the page ID (optional):",
@ -276,14 +310,13 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
optional: false,
},
{
type: "checkbox",
query: "Is this a Confluence Cloud instance?",
label: "Is Cloud",
name: "is_cloud",
optional: false,
default: true,
type: "text",
query: "Enter the CQL query (optional):",
label: "CQL Query",
name: "cql_query",
optional: true,
description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
"IMPORTANT: This will overwrite all other selected connector settings (besides Wiki Base URL). We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
},
],
},
@ -308,6 +341,7 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
optional: true,
},
],
advanced_values: [],
},
salesforce: {
description: "Configure Salesforce connector",
@ -323,6 +357,7 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of 'Opportunities').`,
},
],
advanced_values: [],
},
sharepoint: {
description: "Configure SharePoint connector",
@ -339,6 +374,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
`,
},
],
advanced_values: [],
},
teams: {
description: "Configure Teams connector",
@ -352,6 +388,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
description: `Specify 0 or more Teams to index. For example, specifying the Team 'Support' for the 'danswerai' Org will cause us to only index messages sent in channels belonging to the 'Support' Team. If no Teams are specified, all Teams in your organization will be indexed.`,
},
],
advanced_values: [],
},
discourse: {
description: "Configure Discourse connector",
@ -371,6 +408,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
optional: true,
},
],
advanced_values: [],
},
axero: {
description: "Configure Axero connector",
@ -385,11 +423,13 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
"Specify zero or more Spaces to index (by the Space IDs). If no Space IDs are specified, all Spaces will be indexed.",
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
productboard: {
description: "Configure Productboard connector",
values: [],
advanced_values: [],
},
slack: {
description: "Configure Slack connector",
@ -401,6 +441,8 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
name: "workspace",
optional: false,
},
],
advanced_values: [
{
type: "list",
query: "Enter channels to include:",
@ -434,10 +476,12 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
description: `Specify the base URL for your Slab team. This will look something like: https://danswer.slab.com/`,
},
],
advanced_values: [],
},
guru: {
description: "Configure Guru connector",
values: [],
advanced_values: [],
},
gong: {
description: "Configure Gong connector",
@ -452,6 +496,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace name from Gong. If no workspaces are specified, transcripts from all workspaces will be indexed.",
},
],
advanced_values: [],
},
loopio: {
description: "Configure Loopio connector",
@ -466,6 +511,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: true,
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
file: {
@ -479,6 +525,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false,
},
],
advanced_values: [],
},
zulip: {
description: "Configure Zulip connector",
@ -498,6 +545,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false,
},
],
advanced_values: [],
},
notion: {
description: "Configure Notion connector",
@ -512,14 +560,17 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"If specified, will only index the specified page + all of its child pages. If left blank, will index all pages the integration has been given access to.",
},
],
advanced_values: [],
},
requesttracker: {
description: "Configure HubSpot connector",
values: [],
advanced_values: [],
},
hubspot: {
description: "Configure HubSpot connector",
values: [],
advanced_values: [],
},
document360: {
description: "Configure Document360 connector",
@ -541,6 +592,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"Specify 0 or more categories to index. For instance, specifying the category 'Help' will cause us to only index all content within the 'Help' category. If no categories are specified, all categories in your workspace will be indexed.",
},
],
advanced_values: [],
},
clickup: {
description: "Configure ClickUp connector",
@ -576,6 +628,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false,
},
],
advanced_values: [],
},
google_sites: {
description: "Configure Google Sites connector",
@ -597,6 +650,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false,
},
],
advanced_values: [],
},
zendesk: {
description: "Configure Zendesk connector",
@ -614,14 +668,17 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
default: "articles",
},
],
advanced_values: [],
},
linear: {
description: "Configure Linear connector",
description: "Configure Dropbox connector",
values: [],
advanced_values: [],
},
dropbox: {
description: "Configure Dropbox connector",
values: [],
advanced_values: [],
},
s3: {
description: "Configure S3 connector",
@ -649,6 +706,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true,
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
r2: {
@ -677,6 +735,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true,
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
google_cloud_storage: {
@ -706,6 +765,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true,
},
],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24,
},
oci_storage: {
@ -734,6 +794,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true,
},
],
advanced_values: [],
},
wikipedia: {
description: "Configure Wikipedia connector",
@ -773,6 +834,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false,
},
],
advanced_values: [],
},
xenforo: {
description: "Configure Xenforo connector",
@ -787,6 +849,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"The XenForo v2.2 forum URL to index. Can be board or thread.",
},
],
advanced_values: [],
},
asana: {
description: "Configure Asana connector",
@ -819,6 +882,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"ID of a team to use for accessing team-visible tasks. This allows indexing of team-visible tasks in addition to public tasks. Leave empty if you don't want to use this feature.",
},
],
advanced_values: [],
},
mediawiki: {
description: "Configure MediaWiki connector",
@ -866,6 +930,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: true,
},
],
advanced_values: [],
},
};
export function createConnectorInitialValues(
@ -987,10 +1052,11 @@ export interface BookstackConfig {}
export interface ConfluenceConfig {
wiki_base: string;
space: string;
space?: string;
page_id?: string;
is_cloud?: boolean;
index_recursively?: boolean;
cql_query?: string;
}
export interface JiraConfig {