From bfde5fd809894ef96c859167af0bf887503d6b92 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 6 Jul 2023 10:50:27 +0100 Subject: [PATCH] Got basic bookstack connector setup UI/backend working --- backend/danswer/configs/constants.py | 1 + .../danswer/connectors/bookstack/__init__.py | 0 .../danswer/connectors/bookstack/connector.py | 118 +++++++++ backend/danswer/connectors/factory.py | 2 + .../app/admin/connectors/bookstack/page.tsx | 243 ++++++++++++++++++ web/src/app/admin/layout.tsx | 10 + web/src/components/icons/icons.tsx | 8 + web/src/components/source.tsx | 7 + web/src/lib/types.ts | 10 + 9 files changed, 399 insertions(+) create mode 100644 backend/danswer/connectors/bookstack/__init__.py create mode 100644 backend/danswer/connectors/bookstack/connector.py create mode 100644 web/src/app/admin/connectors/bookstack/page.tsx diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index cb73f8146..9ede56eed 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -21,6 +21,7 @@ class DocumentSource(str, Enum): WEB = "web" GOOGLE_DRIVE = "google_drive" GITHUB = "github" + BOOKSTACK = "bookstack" CONFLUENCE = "confluence" SLAB = "slab" JIRA = "jira" diff --git a/backend/danswer/connectors/bookstack/__init__.py b/backend/danswer/connectors/bookstack/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py new file mode 100644 index 000000000..9a6dee65f --- /dev/null +++ b/backend/danswer/connectors/bookstack/connector.py @@ -0,0 +1,118 @@ +from collections.abc import Callable +from collections.abc import Generator +from datetime import datetime +from datetime import timezone +from typing import Any +from urllib.parse import urlparse + +from atlassian import Confluence # type:ignore +from bs4 import BeautifulSoup +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import HTML_SEPARATOR +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import Document +from danswer.connectors.models import Section + +class BookstackClientNotSetUpError(PermissionError): + def __init__(self) -> None: + super().__init__( + "Confluence Client is not set up, was load_credentials called?" + ) + +class BookstackConnector(LoadConnector, PollConnector): + def __init__( + self, + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.batch_size = batch_size + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + base_url = credentials["bookstack_base_url"] + api_token_id = credentials["bookstack_api_token_id"] + api_token_secret = credentials["bookstack_api_token_secret"] + return None + + def _get_doc_batch( + self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None + ) -> tuple[list[Document], int]: + doc_batch: list[Document] = [] + + if self.confluence_client is None: + raise BookstackClientNotSetUpError() + + batch = self.confluence_client.get_all_pages_from_space( + self.space, + start=start_ind, + limit=self.batch_size, + expand="body.storage.value,version", + ) + + for page in batch: + last_modified_str = page["version"]["when"] + last_modified = datetime.fromisoformat(last_modified_str) + + if time_filter is None or time_filter(last_modified): + page_html = page["body"]["storage"]["value"] + soup = BeautifulSoup(page_html, "html.parser") + page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR) + comment_pages = self.confluence_client.get_page_child_by_type( + page["id"], + type="comment", + start=None, + limit=None, + expand="body.storage.value", + ) + comments_text = _comment_dfs("", comment_pages, self.confluence_client) + page_text += comments_text + + page_url = self.wiki_base + page["_links"]["webui"] + + doc_batch.append( + Document( + id=page_url, + sections=[Section(link=page_url, text=page_text)], + source=DocumentSource.CONFLUENCE, + semantic_identifier=page["title"], + metadata={}, + ) + ) + return doc_batch, len(batch) + + def load_from_state(self) -> GenerateDocumentsOutput: + if self.confluence_client is None: + raise BookstackClientNotSetUpError() + + start_ind = 0 + while True: + doc_batch, num_pages = self._get_doc_batch(start_ind) + start_ind += num_pages + if doc_batch: + yield doc_batch + + if num_pages < self.batch_size: + break + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + if self.confluence_client is None: + raise BookstackClientNotSetUpError() + + start_time = datetime.fromtimestamp(start, tz=timezone.utc) + end_time = datetime.fromtimestamp(end, tz=timezone.utc) + + start_ind = 0 + while True: + doc_batch, num_pages = self._get_doc_batch( + start_ind, time_filter=lambda t: start_time <= t <= end_time + ) + start_ind += num_pages + if doc_batch: + yield doc_batch + + if num_pages < self.batch_size: + break diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index abacb0174..0cce12848 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -2,6 +2,7 @@ from typing import Any from typing import Type from danswer.configs.constants import DocumentSource +from danswer.connectors.bookstack.connector import BookstackConnector from danswer.connectors.confluence.connector import ConfluenceConnector from danswer.connectors.danswer_jira.connector import JiraConnector from danswer.connectors.file.connector import LocalFileConnector @@ -37,6 +38,7 @@ def identify_connector_class( }, DocumentSource.GITHUB: GithubConnector, DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector, + DocumentSource.BOOKSTACK: BookstackConnector, DocumentSource.CONFLUENCE: ConfluenceConnector, DocumentSource.JIRA: JiraConnector, DocumentSource.SLAB: SlabConnector, diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx new file mode 100644 index 000000000..2c38bfe6a --- /dev/null +++ b/web/src/app/admin/connectors/bookstack/page.tsx @@ -0,0 +1,243 @@ +"use client"; + +import * as Yup from "yup"; +import { BookstackIcon, TrashIcon } from "@/components/icons/icons"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { + BookstackCredentialJson, + BookstackConfig, + Credential, + ConnectorIndexingStatus, ConfluenceConfig, +} from "@/lib/types"; +import useSWR, { useSWRConfig } from "swr"; +import { fetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { deleteCredential, linkCredential } from "@/lib/credential"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; + +const Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + } = useSWR[]>( + "/api/manage/credential", + fetcher + ); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const bookstackConnectorIndexingStatuses = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "bookstack" + ); + const bookstackCredential = credentialsData.filter( + (credential) => credential.credential_json?.bookstack_api_token_id + )[0]; + + return ( + <> + {popup} +

+ Step 1: Provide your access token +

+ + {bookstackCredential ? ( + <> +
+

Existing API Token:

+

+ {bookstackCredential.credential_json?.bookstack_api_token_id} +

+ +
+ + ) : ( + <> +

+ To get started you'll need API token details for your BookStack instance. + You can get these by editing your (or another) user account in BookStack + and creating a token via the "API Tokens" section at the bottom. + Your user account will require to be assigned a BookStack role which + has the "Access system API" system permission assigned. +

+
+ + formBody={ + <> + + + + + } + validationSchema={Yup.object().shape({ + bookstack_base_url: Yup.string().required( + "Please enter the base URL for your BookStack instance" + ), + bookstack_api_token_id: Yup.string().required( + "Please enter your BookStack API token ID" + ), + bookstack_api_token_secret: Yup.string().required( + "Please enter your BookStack API token secret" + ), + })} + initialValues={{ + bookstack_base_url: "", + bookstack_api_token_id: "", + bookstack_api_token_secret: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + mutate("/api/manage/credential"); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> +
+ + )} + + {bookstackConnectorIndexingStatuses.length > 0 && ( + <> +

+ BookStack indexing status +

+

+ The latest page, chapter, book and shelf changes are fetched + every 10 minutes. +

+
+ + connectorIndexingStatuses={ + bookstackConnectorIndexingStatuses + } + liveCredential={bookstackCredential} + getCredential={(credential) => { + return ( +
+

+ {credential.credential_json.bookstack_api_token_id} +

+
+ ); + }} + onCredentialLink={async (connectorId) => { + if (bookstackCredential) { + await linkCredential( + connectorId, + bookstackCredential.id + ); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + +
+

Setup Connector

+ + nameBuilder={(values) => + `BookStackConnector` + } + source="bookstack" + inputType="load_state" + formBody={ + <> + + } + validationSchema={Yup.object().shape({ + })} + initialValues={{ + }} + refreshFreq={10 * 60} // 10 minutes + onSubmit={async (isSuccess, responseJson) => { + if (isSuccess && responseJson) { + await linkCredential( + responseJson.id, + bookstackCredential.id + ); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> +
+ + {!bookstackCredential && ( + <> +

+ Please provide your API details in Step 1 first! Once done with that, + you'll be able to see indexing status. +

+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+
+ +

BookStack

+
+
+
+ ); +} diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx index 3a0f4c42b..e8e89db62 100644 --- a/web/src/app/admin/layout.tsx +++ b/web/src/app/admin/layout.tsx @@ -7,6 +7,7 @@ import { GoogleDriveIcon, SlackIcon, KeyIcon, + BookstackIcon, ConfluenceIcon, FileIcon, JiraIcon, @@ -83,6 +84,15 @@ export default async function AdminLayout({ ), link: "/admin/connectors/google-drive", }, + { + name: ( +
+ +
BookStack
+
+ ), + link: "/admin/connectors/bookstack", + }, { name: (
diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index c6c6cdcc7..b8593b7f4 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -13,6 +13,7 @@ import { Brain, } from "@phosphor-icons/react"; import { + SiBookstack, SiConfluence, SiGithub, SiGoogledrive, @@ -113,6 +114,13 @@ export const GoogleDriveIcon = ({ return ; }; +export const BookstackIcon = ({ + size = "16", + className = defaultTailwindCSS, +}: IconProps) => { + return ; +}; + export const ConfluenceIcon = ({ size = "16", className = defaultTailwindCSS, diff --git a/web/src/components/source.tsx b/web/src/components/source.tsx index b9ef9e094..360ef6677 100644 --- a/web/src/components/source.tsx +++ b/web/src/components/source.tsx @@ -1,5 +1,6 @@ import { ValidSources } from "@/lib/types"; import { + BookstackIcon, ConfluenceIcon, FileIcon, GithubIcon, @@ -48,6 +49,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => { displayName: "Github PRs", adminPageLink: "/admin/connectors/github", }; + case "bookstack": + return { + icon: BookstackIcon, + displayName: "BookStack", + adminPageLink: "/admin/connectors/bookstack", + }; case "confluence": return { icon: ConfluenceIcon, diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 9c3453275..68274bb50 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -12,6 +12,7 @@ export type ValidSources = | "github" | "slack" | "google_drive" + | "bookstack" | "confluence" | "jira" | "slab" @@ -44,6 +45,9 @@ export interface GithubConfig { repo_name: string; } +export interface BookstackConfig { +} + export interface ConfluenceConfig { wiki_page_url: string; } @@ -90,6 +94,12 @@ export interface GithubCredentialJson { github_access_token: string; } +export interface BookstackCredentialJson { + bookstack_base_url: string; + bookstack_api_token_id: string; + bookstack_api_token_secret: string; +} + export interface ConfluenceCredentialJson { confluence_username: string; confluence_access_token: string;