Add cql support for confluence connector (#2679)

* Added CQL support for Confluence

* changed string substitutions for CQL

* final cleanup

* updated string fixes

* remove print statements

* Update description
This commit is contained in:
hagen-danswer 2024-10-10 12:16:56 -07:00 committed by GitHub
parent 101b010c5c
commit 1f4fe42f4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 339 additions and 198 deletions

View File

@ -1,5 +1,6 @@
import io import io
import os import os
import re
from collections.abc import Callable from collections.abc import Callable
from collections.abc import Collection from collections.abc import Collection
from datetime import datetime from datetime import datetime
@ -56,8 +57,101 @@ NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = (
) )
class DanswerConfluence(Confluence):
"""
This is a custom Confluence class that overrides the default Confluence class to add a custom CQL method.
This is necessary because the default Confluence class does not properly support cql expansions.
"""
def __init__(self, url: str, *args: Any, **kwargs: Any) -> None:
super(DanswerConfluence, self).__init__(url, *args, **kwargs)
def danswer_cql(
self,
cql: str,
expand: str | None = None,
start: int = 0,
limit: int = 500,
include_archived_spaces: bool = False,
) -> list[dict[str, Any]]:
# Performs the query expansion and start/limit url additions
url_suffix = f"rest/api/content/search?cql={cql}"
if expand:
url_suffix += f"&expand={expand}"
url_suffix += f"&start={start}&limit={limit}"
if include_archived_spaces:
url_suffix += "&includeArchivedSpaces=true"
try:
response = self.get(url_suffix)
return response.get("results", [])
except Exception as e:
raise e
def _replace_cql_time_filter(
cql_query: str, start_time: datetime, end_time: datetime
) -> str:
"""
This function replaces the lastmodified filter in the CQL query with the start and end times.
This selects the more restrictive time range.
"""
# Extract existing lastmodified >= and <= filters
existing_start_match = re.search(
r'lastmodified\s*>=\s*["\']?(\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)["\']?',
cql_query,
flags=re.IGNORECASE,
)
existing_end_match = re.search(
r'lastmodified\s*<=\s*["\']?(\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)["\']?',
cql_query,
flags=re.IGNORECASE,
)
# Remove all existing lastmodified and updated filters
cql_query = re.sub(
r'\s*AND\s+(lastmodified|updated)\s*[<>=]+\s*["\']?[\d-]+(?:\s+[\d:]+)?["\']?',
"",
cql_query,
flags=re.IGNORECASE,
)
# Determine the start time to use
if existing_start_match:
existing_start_str = existing_start_match.group(1)
existing_start = datetime.strptime(
existing_start_str,
"%Y-%m-%d %H:%M" if " " in existing_start_str else "%Y-%m-%d",
)
existing_start = existing_start.replace(
tzinfo=timezone.utc
) # Make offset-aware
start_time_to_use = max(start_time.astimezone(timezone.utc), existing_start)
else:
start_time_to_use = start_time.astimezone(timezone.utc)
# Determine the end time to use
if existing_end_match:
existing_end_str = existing_end_match.group(1)
existing_end = datetime.strptime(
existing_end_str,
"%Y-%m-%d %H:%M" if " " in existing_end_str else "%Y-%m-%d",
)
existing_end = existing_end.replace(tzinfo=timezone.utc) # Make offset-aware
end_time_to_use = min(end_time.astimezone(timezone.utc), existing_end)
else:
end_time_to_use = end_time.astimezone(timezone.utc)
# Add new time filters
cql_query += (
f" and lastmodified >= '{start_time_to_use.strftime('%Y-%m-%d %H:%M')}'"
)
cql_query += f" and lastmodified <= '{end_time_to_use.strftime('%Y-%m-%d %H:%M')}'"
return cql_query.strip()
@lru_cache() @lru_cache()
def _get_user(user_id: str, confluence_client: Confluence) -> str: def _get_user(user_id: str, confluence_client: DanswerConfluence) -> str:
"""Get Confluence Display Name based on the account-id or userkey value """Get Confluence Display Name based on the account-id or userkey value
Args: Args:
@ -81,7 +175,7 @@ def _get_user(user_id: str, confluence_client: Confluence) -> str:
return user_not_found return user_not_found
def parse_html_page(text: str, confluence_client: Confluence) -> str: def parse_html_page(text: str, confluence_client: DanswerConfluence) -> str:
"""Parse a Confluence html page and replace the 'user Id' by the real """Parse a Confluence html page and replace the 'user Id' by the real
User Display Name User Display Name
@ -112,7 +206,7 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str:
def _comment_dfs( def _comment_dfs(
comments_str: str, comments_str: str,
comment_pages: Collection[dict[str, Any]], comment_pages: Collection[dict[str, Any]],
confluence_client: Confluence, confluence_client: DanswerConfluence,
) -> str: ) -> str:
get_page_child_by_type = make_confluence_call_handle_rate_limit( get_page_child_by_type = make_confluence_call_handle_rate_limit(
confluence_client.get_page_child_by_type confluence_client.get_page_child_by_type
@ -159,7 +253,7 @@ class RecursiveIndexer:
def __init__( def __init__(
self, self,
batch_size: int, batch_size: int,
confluence_client: Confluence, confluence_client: DanswerConfluence,
index_recursively: bool, index_recursively: bool,
origin_page_id: str, origin_page_id: str,
) -> None: ) -> None:
@ -285,8 +379,8 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def __init__( def __init__(
self, self,
wiki_base: str, wiki_base: str,
space: str,
is_cloud: bool, is_cloud: bool,
space: str = "",
page_id: str = "", page_id: str = "",
index_recursively: bool = True, index_recursively: bool = True,
batch_size: int = INDEX_BATCH_SIZE, batch_size: int = INDEX_BATCH_SIZE,
@ -295,35 +389,44 @@ class ConfluenceConnector(LoadConnector, PollConnector):
# skip it. This is generally used to avoid indexing extra sensitive # skip it. This is generally used to avoid indexing extra sensitive
# pages. # pages.
labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP, labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
cql_query: str | None = None,
) -> None: ) -> None:
self.batch_size = batch_size self.batch_size = batch_size
self.continue_on_failure = continue_on_failure self.continue_on_failure = continue_on_failure
self.labels_to_skip = set(labels_to_skip) self.labels_to_skip = set(labels_to_skip)
self.recursive_indexer: RecursiveIndexer | None = None self.recursive_indexer: RecursiveIndexer | None = None
self.index_recursively = index_recursively self.index_recursively = False if cql_query else index_recursively
# Remove trailing slash from wiki_base if present # Remove trailing slash from wiki_base if present
self.wiki_base = wiki_base.rstrip("/") self.wiki_base = wiki_base.rstrip("/")
self.space = space self.space = space
self.page_id = page_id self.page_id = "" if cql_query else page_id
self.space_level_scan = bool(not self.page_id)
self.is_cloud = is_cloud self.is_cloud = is_cloud
self.space_level_scan = False self.confluence_client: DanswerConfluence | None = None
self.confluence_client: Confluence | None = None
if self.page_id is None or self.page_id == "": # if a cql_query is provided, we will use it to fetch the pages
self.space_level_scan = True # if no cql_query is provided, we will use the space to fetch the pages
# if no space is provided, we will default to fetching all pages, regardless of space
if cql_query:
self.cql_query = cql_query
elif self.space:
self.cql_query = f"type=page and space={self.space}"
else:
self.cql_query = "type=page"
logger.info( logger.info(
f"wiki_base: {self.wiki_base}, space: {self.space}, page_id: {self.page_id}," f"wiki_base: {self.wiki_base}, space: {self.space}, page_id: {self.page_id},"
+ f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively}" + f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively},"
+ f" cql_query: {self.cql_query}"
) )
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
username = credentials["confluence_username"] username = credentials["confluence_username"]
access_token = credentials["confluence_access_token"] access_token = credentials["confluence_access_token"]
self.confluence_client = Confluence( self.confluence_client = DanswerConfluence(
url=self.wiki_base, url=self.wiki_base,
# passing in username causes issues for Confluence data center # passing in username causes issues for Confluence data center
username=username if self.is_cloud else None, username=username if self.is_cloud else None,
@ -334,26 +437,33 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def _fetch_pages( def _fetch_pages(
self, self,
confluence_client: Confluence,
start_ind: int, start_ind: int,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]: def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]:
get_all_pages_from_space = make_confluence_call_handle_rate_limit( if self.confluence_client is None:
confluence_client.get_all_pages_from_space raise ConnectorMissingCredentialError("Confluence")
get_all_pages = make_confluence_call_handle_rate_limit(
self.confluence_client.danswer_cql
) )
include_archived_spaces = (
CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES
if not self.is_cloud
else False
)
try: try:
return get_all_pages_from_space( return get_all_pages(
self.space, cql=self.cql_query,
start=start_ind, start=start_ind,
limit=batch_size, limit=batch_size,
status=(
None if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES else "current"
),
expand="body.storage.value,version", expand="body.storage.value,version",
include_archived_spaces=include_archived_spaces,
) )
except Exception: except Exception:
logger.warning( logger.warning(
f"Batch failed with space {self.space} at offset {start_ind} " f"Batch failed with cql {self.cql_query} at offset {start_ind} "
f"with size {batch_size}, processing pages individually..." f"with size {batch_size}, processing pages individually..."
) )
@ -363,27 +473,23 @@ class ConfluenceConnector(LoadConnector, PollConnector):
# Could be that one of the pages here failed due to this bug: # Could be that one of the pages here failed due to this bug:
# https://jira.atlassian.com/browse/CONFCLOUD-76433 # https://jira.atlassian.com/browse/CONFCLOUD-76433
view_pages.extend( view_pages.extend(
get_all_pages_from_space( get_all_pages(
self.space, cql=self.cql_query,
start=start_ind + i, start=start_ind + i,
limit=1, limit=1,
status=(
None
if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES
else "current"
),
expand="body.storage.value,version", expand="body.storage.value,version",
include_archived_spaces=include_archived_spaces,
) )
) )
except HTTPError as e: except HTTPError as e:
logger.warning( logger.warning(
f"Page failed with space {self.space} at offset {start_ind + i}, " f"Page failed with cql {self.cql_query} at offset {start_ind + i}, "
f"trying alternative expand option: {e}" f"trying alternative expand option: {e}"
) )
# Use view instead, which captures most info but is less complete # Use view instead, which captures most info but is less complete
view_pages.extend( view_pages.extend(
get_all_pages_from_space( get_all_pages(
self.space, cql=self.cql_query,
start=start_ind + i, start=start_ind + i,
limit=1, limit=1,
expand="body.view.value,version", expand="body.view.value,version",
@ -393,6 +499,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return view_pages return view_pages
def _fetch_page(start_ind: int, batch_size: int) -> list[dict[str, Any]]: def _fetch_page(start_ind: int, batch_size: int) -> list[dict[str, Any]]:
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
if self.recursive_indexer is None: if self.recursive_indexer is None:
self.recursive_indexer = RecursiveIndexer( self.recursive_indexer = RecursiveIndexer(
origin_page_id=self.page_id, origin_page_id=self.page_id,
@ -421,7 +530,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
raise e raise e
# error checking phase, only reachable if `self.continue_on_failure=True` # error checking phase, only reachable if `self.continue_on_failure=True`
for i in range(self.batch_size): for _ in range(self.batch_size):
try: try:
pages = ( pages = (
_fetch_space(start_ind, self.batch_size) _fetch_space(start_ind, self.batch_size)
@ -437,7 +546,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return pages return pages
def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str: def _fetch_comments(
self, confluence_client: DanswerConfluence, page_id: str
) -> str:
get_page_child_by_type = make_confluence_call_handle_rate_limit( get_page_child_by_type = make_confluence_call_handle_rate_limit(
confluence_client.get_page_child_by_type confluence_client.get_page_child_by_type
) )
@ -463,7 +574,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
) )
return "" return ""
def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str]: def _fetch_labels(
self, confluence_client: DanswerConfluence, page_id: str
) -> list[str]:
get_page_labels = make_confluence_call_handle_rate_limit( get_page_labels = make_confluence_call_handle_rate_limit(
confluence_client.get_page_labels confluence_client.get_page_labels
) )
@ -577,22 +690,20 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return "\n".join(files_attachment_content), unused_attachments return "\n".join(files_attachment_content), unused_attachments
def _get_doc_batch( def _get_doc_batch(
self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None self, start_ind: int
) -> tuple[list[Document], list[dict[str, Any]], int]: ) -> tuple[list[Document], list[dict[str, Any]], int]:
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
doc_batch: list[Document] = [] doc_batch: list[Document] = []
unused_attachments: list[dict[str, Any]] = [] unused_attachments: list[dict[str, Any]] = []
if self.confluence_client is None: batch = self._fetch_pages(start_ind)
raise ConnectorMissingCredentialError("Confluence")
batch = self._fetch_pages(self.confluence_client, start_ind)
for page in batch: for page in batch:
last_modified = _datetime_from_string(page["version"]["when"]) last_modified = _datetime_from_string(page["version"]["when"])
author = cast(str | None, page["version"].get("by", {}).get("email")) author = cast(str | None, page["version"].get("by", {}).get("email"))
if time_filter and not time_filter(last_modified):
continue
page_id = page["id"] page_id = page["id"]
if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING:
@ -715,17 +826,12 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return doc_batch, end_ind - start_ind return doc_batch, end_ind - start_ind
def load_from_state(self) -> GenerateDocumentsOutput: def load_from_state(self) -> GenerateDocumentsOutput:
unused_attachments = [] unused_attachments: list[dict[str, Any]] = []
if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence")
start_ind = 0 start_ind = 0
while True: while True:
doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( doc_batch, unused_attachments, num_pages = self._get_doc_batch(start_ind)
start_ind unused_attachments.extend(unused_attachments)
)
unused_attachments.extend(unused_attachments_batch)
start_ind += num_pages start_ind += num_pages
if doc_batch: if doc_batch:
yield doc_batch yield doc_batch
@ -748,7 +854,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
def poll_source( def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput: ) -> GenerateDocumentsOutput:
unused_attachments = [] unused_attachments: list[dict[str, Any]] = []
if self.confluence_client is None: if self.confluence_client is None:
raise ConnectorMissingCredentialError("Confluence") raise ConnectorMissingCredentialError("Confluence")
@ -756,12 +862,12 @@ class ConfluenceConnector(LoadConnector, PollConnector):
start_time = datetime.fromtimestamp(start, tz=timezone.utc) start_time = datetime.fromtimestamp(start, tz=timezone.utc)
end_time = datetime.fromtimestamp(end, tz=timezone.utc) end_time = datetime.fromtimestamp(end, tz=timezone.utc)
self.cql_query = _replace_cql_time_filter(self.cql_query, start_time, end_time)
start_ind = 0 start_ind = 0
while True: while True:
doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( doc_batch, unused_attachments, num_pages = self._get_doc_batch(start_ind)
start_ind, time_filter=lambda t: start_time <= t <= end_time unused_attachments.extend(unused_attachments)
)
unused_attachments.extend(unused_attachments_batch)
start_ind += num_pages start_ind += num_pages
if doc_batch: if doc_batch:

View File

@ -1,6 +1,6 @@
"use client"; "use client";
import { errorHandlingFetcher } from "@/lib/fetcher"; import { FetchError, errorHandlingFetcher } from "@/lib/fetcher";
import useSWR, { mutate } from "swr"; import useSWR, { mutate } from "swr";
import { HealthCheckBanner } from "@/components/health/healthcheck"; import { HealthCheckBanner } from "@/components/health/healthcheck";
@ -209,7 +209,15 @@ export default function AddConnector({
return ( return (
<Formik <Formik
initialValues={createConnectorInitialValues(connector)} initialValues={{
...createConnectorInitialValues(connector),
...Object.fromEntries(
connectorConfigs[connector].advanced_values.map((field) => [
field.name,
field.default || "",
])
),
}}
validationSchema={createConnectorValidationSchema(connector)} validationSchema={createConnectorValidationSchema(connector)}
onSubmit={async (values) => { onSubmit={async (values) => {
const { const {

View File

@ -1,74 +1,24 @@
import CredentialSubText from "@/components/credentials/CredentialFields"; import React from "react";
import { TrashIcon } from "@/components/icons/icons";
import { ListOption } from "@/lib/connectors/connectors"; import { ListOption } from "@/lib/connectors/connectors";
import { Field, FieldArray, useField } from "formik"; import { TextArrayField } from "@/components/admin/connectors/Field";
import { FaPlus } from "react-icons/fa"; import { useFormikContext } from "formik";
export default function ListInput({ interface ListInputProps {
field,
onUpdate,
}: {
field: ListOption; field: ListOption;
onUpdate?: (values: string[]) => void; }
}) {
const [fieldProps, , helpers] = useField(field.name); const ListInput: React.FC<ListInputProps> = ({ field }) => {
const { values } = useFormikContext();
return ( return (
<FieldArray name={field.name}> <TextArrayField
{({ push, remove }) => ( name={field.name}
<div> label={field.label}
<label values={Array.isArray(values) ? values : []}
htmlFor={field.name} subtext={field.description}
className="block text-sm font-medium text-text-700 mb-1" placeholder={`Enter ${field.label.toLowerCase()}`}
> />
{field.label}
{field.optional && (
<span className="text-text-500 ml-1">(optional)</span>
)}
</label>
{field.description && (
<CredentialSubText>{field.description}</CredentialSubText>
)}
{fieldProps.value.map((value: string, index: number) => (
<div key={index} className="w-full flex mb-4">
<Field
name={`${field.name}.${index}`}
className="w-full bg-input text-sm p-2 border border-border-medium rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 mr-2"
/>
<button
className="p-2 my-auto bg-input flex-none rounded-md bg-red-500 text-white hover:bg-red-600 focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-opacity-50"
type="button"
onClick={() => {
remove(index);
if (onUpdate) {
const newValue = fieldProps.value.filter(
(_: any, i: number) => i !== index
);
onUpdate(newValue);
}
}}
>
<TrashIcon className="text-white my-auto" />
</button>
</div>
))}
<button
type="button"
onClick={() => {
push("");
if (onUpdate) {
onUpdate([...fieldProps.value, ""]);
}
}}
className="mt-2 p-2 bg-rose-500 text-xs text-white rounded-md hover:bg-rose-600 focus:outline-none focus:ring-2 focus:ring-rose-500 focus:ring-opacity-50 flex items-center"
>
<FaPlus className="mr-2" />
Add {field.label}
</button>
</div>
)}
</FieldArray>
); );
} };
export default ListInput;

View File

@ -1,4 +1,4 @@
import React, { Dispatch, FC, SetStateAction } from "react"; import React, { Dispatch, FC, SetStateAction, useState } from "react";
import CredentialSubText, { import CredentialSubText, {
AdminBooleanFormField, AdminBooleanFormField,
} from "@/components/credentials/CredentialFields"; } from "@/components/credentials/CredentialFields";
@ -9,6 +9,7 @@ import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField } from "@/components/admin/connectors/Field"; import { TextFormField } from "@/components/admin/connectors/Field";
import ListInput from "./ConnectorInput/ListInput"; import ListInput from "./ConnectorInput/ListInput";
import FileInput from "./ConnectorInput/FileInput"; import FileInput from "./ConnectorInput/FileInput";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
export interface DynamicConnectionFormProps { export interface DynamicConnectionFormProps {
config: ConnectionConfiguration; config: ConnectionConfiguration;
@ -23,6 +24,61 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
setSelectedFiles, setSelectedFiles,
values, values,
}) => { }) => {
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
const renderField = (field: any) => (
<div key={field.name}>
{field.type === "file" ? (
<FileUpload
name={field.name}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "zip" ? (
<FileInput
name={field.name}
label={field.label}
optional={field.optional}
description={field.description}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "list" ? (
<ListInput field={field} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={field.description}
options={field.options || []}
label={field.label}
/>
) : field.type === "number" ? (
<NumberInput
label={field.label}
optional={field.optional}
description={field.description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={field.description}
name={field.name}
label={field.label}
/>
) : (
<TextFormField
subtext={field.description}
optional={field.optional}
type={field.type}
label={field.label}
name={field.name}
/>
)}
</div>
);
return ( return (
<> <>
<h2 className="text-2xl font-bold text-text-800">{config.description}</h2> <h2 className="text-2xl font-bold text-text-800">{config.description}</h2>
@ -38,62 +94,17 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
name={"name"} name={"name"}
/> />
{config.values.map((field) => { {config.values.map((field) => !field.hidden && renderField(field))}
if (!field.hidden) {
return ( {config.advanced_values.length > 0 && (
<div key={field.name}> <>
{field.type == "file" ? ( <AdvancedOptionsToggle
<FileUpload showAdvancedOptions={showAdvancedOptions}
name={field.name} setShowAdvancedOptions={setShowAdvancedOptions}
selectedFiles={selectedFiles} />
setSelectedFiles={setSelectedFiles} {showAdvancedOptions && config.advanced_values.map(renderField)}
/> </>
) : field.type == "zip" ? ( )}
<FileInput
name={field.name}
label={field.label}
optional={field.optional}
description={field.description}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "list" ? (
<ListInput field={field} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={field.description}
options={field.options || []}
label={field.label}
/>
) : field.type === "number" ? (
<NumberInput
label={field.label}
optional={field.optional}
description={field.description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={field.description}
name={field.name}
label={field.label}
/>
) : (
<TextFormField
subtext={field.description}
optional={field.optional}
type={field.type}
label={field.label}
name={field.name}
/>
)}
</div>
);
}
})}
</> </>
); );
}; };

View File

@ -86,6 +86,15 @@ export interface ConnectionConfiguration {
| FileOption | FileOption
| ZipOption | ZipOption
)[]; )[];
advanced_values: (
| BooleanOption
| ListOption
| TextOption
| NumberOption
| SelectOption
| FileOption
| ZipOption
)[];
overrideDefaultFreq?: number; overrideDefaultFreq?: number;
} }
@ -116,6 +125,17 @@ export const connectorConfigs: Record<
], ],
}, },
], ],
advanced_values: [
{
type: "number",
query: "Enter the maximum depth to crawl:",
label: "Max Depth",
name: "max_depth",
optional: true,
description:
"The maximum depth to crawl from the base URL. Default is 2.",
},
],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
github: { github: {
@ -152,6 +172,7 @@ export const connectorConfigs: Record<
optional: true, optional: true,
}, },
], ],
advanced_values: [],
}, },
gitlab: { gitlab: {
description: "Configure GitLab connector", description: "Configure GitLab connector",
@ -187,6 +208,7 @@ export const connectorConfigs: Record<
hidden: true, hidden: true,
}, },
], ],
advanced_values: [],
}, },
google_drive: { google_drive: {
description: "Configure Google Drive connector", description: "Configure Google Drive connector",
@ -223,22 +245,21 @@ export const connectorConfigs: Record<
default: false, default: false,
}, },
], ],
advanced_values: [],
}, },
gmail: { gmail: {
description: "Configure Gmail connector", description: "Configure Gmail connector",
values: [], values: [],
advanced_values: [],
}, },
bookstack: { bookstack: {
description: "Configure Bookstack connector", description: "Configure Bookstack connector",
values: [], values: [],
advanced_values: [],
}, },
confluence: { confluence: {
description: "Configure Confluence connector", description: "Configure Confluence connector",
subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed. subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed. If no space is specified, all available Confluence spaces will be indexed.`,
For example, entering "https://your-company.atlassian.net/wiki" as the Wiki Base URL, "KB" as the Space, and "164331" as the Page ID will index the specific page at https:///your-company.atlassian.net/wiki/spaces/KB/pages/164331/Page. If you leave the Page ID empty, it will index the entire KB space.
Selecting the "Index Recursively" checkbox will index the specified page and all of its children.`,
values: [ values: [
{ {
type: "text", type: "text",
@ -254,9 +275,22 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
query: "Enter the space:", query: "Enter the space:",
label: "Space", label: "Space",
name: "space", name: "space",
optional: false, optional: true,
description: "The Confluence space name to index (e.g. `KB`)", description:
"The Confluence space name to index (e.g. `KB`). If no space is specified, all available Confluence spaces will be indexed.",
}, },
{
type: "checkbox",
query: "Is this a Confluence Cloud instance?",
label: "Is Cloud",
name: "is_cloud",
optional: false,
default: true,
description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
},
],
advanced_values: [
{ {
type: "text", type: "text",
query: "Enter the page ID (optional):", query: "Enter the page ID (optional):",
@ -276,14 +310,13 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
optional: false, optional: false,
}, },
{ {
type: "checkbox", type: "text",
query: "Is this a Confluence Cloud instance?", query: "Enter the CQL query (optional):",
label: "Is Cloud", label: "CQL Query",
name: "is_cloud", name: "cql_query",
optional: false, optional: true,
default: true,
description: description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center", "IMPORTANT: This will overwrite all other selected connector settings (besides Wiki Base URL). We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
}, },
], ],
}, },
@ -308,6 +341,7 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
optional: true, optional: true,
}, },
], ],
advanced_values: [],
}, },
salesforce: { salesforce: {
description: "Configure Salesforce connector", description: "Configure Salesforce connector",
@ -323,6 +357,7 @@ Selecting the "Index Recursively" checkbox will index the specified page and all
Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of 'Opportunities').`, Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of 'Opportunities').`,
}, },
], ],
advanced_values: [],
}, },
sharepoint: { sharepoint: {
description: "Configure SharePoint connector", description: "Configure SharePoint connector",
@ -339,6 +374,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
`, `,
}, },
], ],
advanced_values: [],
}, },
teams: { teams: {
description: "Configure Teams connector", description: "Configure Teams connector",
@ -352,6 +388,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
description: `Specify 0 or more Teams to index. For example, specifying the Team 'Support' for the 'danswerai' Org will cause us to only index messages sent in channels belonging to the 'Support' Team. If no Teams are specified, all Teams in your organization will be indexed.`, description: `Specify 0 or more Teams to index. For example, specifying the Team 'Support' for the 'danswerai' Org will cause us to only index messages sent in channels belonging to the 'Support' Team. If no Teams are specified, all Teams in your organization will be indexed.`,
}, },
], ],
advanced_values: [],
}, },
discourse: { discourse: {
description: "Configure Discourse connector", description: "Configure Discourse connector",
@ -371,6 +408,7 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
optional: true, optional: true,
}, },
], ],
advanced_values: [],
}, },
axero: { axero: {
description: "Configure Axero connector", description: "Configure Axero connector",
@ -385,11 +423,13 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
"Specify zero or more Spaces to index (by the Space IDs). If no Space IDs are specified, all Spaces will be indexed.", "Specify zero or more Spaces to index (by the Space IDs). If no Space IDs are specified, all Spaces will be indexed.",
}, },
], ],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
productboard: { productboard: {
description: "Configure Productboard connector", description: "Configure Productboard connector",
values: [], values: [],
advanced_values: [],
}, },
slack: { slack: {
description: "Configure Slack connector", description: "Configure Slack connector",
@ -401,6 +441,8 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
name: "workspace", name: "workspace",
optional: false, optional: false,
}, },
],
advanced_values: [
{ {
type: "list", type: "list",
query: "Enter channels to include:", query: "Enter channels to include:",
@ -434,10 +476,12 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
description: `Specify the base URL for your Slab team. This will look something like: https://danswer.slab.com/`, description: `Specify the base URL for your Slab team. This will look something like: https://danswer.slab.com/`,
}, },
], ],
advanced_values: [],
}, },
guru: { guru: {
description: "Configure Guru connector", description: "Configure Guru connector",
values: [], values: [],
advanced_values: [],
}, },
gong: { gong: {
description: "Configure Gong connector", description: "Configure Gong connector",
@ -452,6 +496,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace name from Gong. If no workspaces are specified, transcripts from all workspaces will be indexed.", "Specify 0 or more workspaces to index. Provide the workspace ID or the EXACT workspace name from Gong. If no workspaces are specified, transcripts from all workspaces will be indexed.",
}, },
], ],
advanced_values: [],
}, },
loopio: { loopio: {
description: "Configure Loopio connector", description: "Configure Loopio connector",
@ -466,6 +511,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: true, optional: true,
}, },
], ],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
file: { file: {
@ -479,6 +525,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [],
}, },
zulip: { zulip: {
description: "Configure Zulip connector", description: "Configure Zulip connector",
@ -498,6 +545,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [],
}, },
notion: { notion: {
description: "Configure Notion connector", description: "Configure Notion connector",
@ -512,14 +560,17 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"If specified, will only index the specified page + all of its child pages. If left blank, will index all pages the integration has been given access to.", "If specified, will only index the specified page + all of its child pages. If left blank, will index all pages the integration has been given access to.",
}, },
], ],
advanced_values: [],
}, },
requesttracker: { requesttracker: {
description: "Configure HubSpot connector", description: "Configure HubSpot connector",
values: [], values: [],
advanced_values: [],
}, },
hubspot: { hubspot: {
description: "Configure HubSpot connector", description: "Configure HubSpot connector",
values: [], values: [],
advanced_values: [],
}, },
document360: { document360: {
description: "Configure Document360 connector", description: "Configure Document360 connector",
@ -541,6 +592,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"Specify 0 or more categories to index. For instance, specifying the category 'Help' will cause us to only index all content within the 'Help' category. If no categories are specified, all categories in your workspace will be indexed.", "Specify 0 or more categories to index. For instance, specifying the category 'Help' will cause us to only index all content within the 'Help' category. If no categories are specified, all categories in your workspace will be indexed.",
}, },
], ],
advanced_values: [],
}, },
clickup: { clickup: {
description: "Configure ClickUp connector", description: "Configure ClickUp connector",
@ -576,6 +628,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [],
}, },
google_sites: { google_sites: {
description: "Configure Google Sites connector", description: "Configure Google Sites connector",
@ -597,6 +650,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [],
}, },
zendesk: { zendesk: {
description: "Configure Zendesk connector", description: "Configure Zendesk connector",
@ -614,14 +668,17 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
default: "articles", default: "articles",
}, },
], ],
advanced_values: [],
}, },
linear: { linear: {
description: "Configure Linear connector", description: "Configure Dropbox connector",
values: [], values: [],
advanced_values: [],
}, },
dropbox: { dropbox: {
description: "Configure Dropbox connector", description: "Configure Dropbox connector",
values: [], values: [],
advanced_values: [],
}, },
s3: { s3: {
description: "Configure S3 connector", description: "Configure S3 connector",
@ -649,6 +706,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true, hidden: true,
}, },
], ],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
r2: { r2: {
@ -677,6 +735,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true, hidden: true,
}, },
], ],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
google_cloud_storage: { google_cloud_storage: {
@ -706,6 +765,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true, hidden: true,
}, },
], ],
advanced_values: [],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
oci_storage: { oci_storage: {
@ -734,6 +794,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
hidden: true, hidden: true,
}, },
], ],
advanced_values: [],
}, },
wikipedia: { wikipedia: {
description: "Configure Wikipedia connector", description: "Configure Wikipedia connector",
@ -773,6 +834,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [],
}, },
xenforo: { xenforo: {
description: "Configure Xenforo connector", description: "Configure Xenforo connector",
@ -787,6 +849,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"The XenForo v2.2 forum URL to index. Can be board or thread.", "The XenForo v2.2 forum URL to index. Can be board or thread.",
}, },
], ],
advanced_values: [],
}, },
asana: { asana: {
description: "Configure Asana connector", description: "Configure Asana connector",
@ -819,6 +882,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
"ID of a team to use for accessing team-visible tasks. This allows indexing of team-visible tasks in addition to public tasks. Leave empty if you don't want to use this feature.", "ID of a team to use for accessing team-visible tasks. This allows indexing of team-visible tasks in addition to public tasks. Leave empty if you don't want to use this feature.",
}, },
], ],
advanced_values: [],
}, },
mediawiki: { mediawiki: {
description: "Configure MediaWiki connector", description: "Configure MediaWiki connector",
@ -866,6 +930,7 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: true, optional: true,
}, },
], ],
advanced_values: [],
}, },
}; };
export function createConnectorInitialValues( export function createConnectorInitialValues(
@ -987,10 +1052,11 @@ export interface BookstackConfig {}
export interface ConfluenceConfig { export interface ConfluenceConfig {
wiki_base: string; wiki_base: string;
space: string; space?: string;
page_id?: string; page_id?: string;
is_cloud?: boolean; is_cloud?: boolean;
index_recursively?: boolean; index_recursively?: boolean;
cql_query?: string;
} }
export interface JiraConfig { export interface JiraConfig {