Got basic bookstack connector setup UI/backend working

This commit is contained in:
Dan Brown
2023-07-06 10:50:27 +01:00
parent 7f222f376d
commit bfde5fd809
9 changed files with 399 additions and 0 deletions

View File

@@ -21,6 +21,7 @@ class DocumentSource(str, Enum):
WEB = "web" WEB = "web"
GOOGLE_DRIVE = "google_drive" GOOGLE_DRIVE = "google_drive"
GITHUB = "github" GITHUB = "github"
BOOKSTACK = "bookstack"
CONFLUENCE = "confluence" CONFLUENCE = "confluence"
SLAB = "slab" SLAB = "slab"
JIRA = "jira" JIRA = "jira"

View File

@@ -0,0 +1,118 @@
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from urllib.parse import urlparse
from atlassian import Confluence # type:ignore
from bs4 import BeautifulSoup
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import HTML_SEPARATOR
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import Document
from danswer.connectors.models import Section
class BookstackClientNotSetUpError(PermissionError):
def __init__(self) -> None:
super().__init__(
"Confluence Client is not set up, was load_credentials called?"
)
class BookstackConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
base_url = credentials["bookstack_base_url"]
api_token_id = credentials["bookstack_api_token_id"]
api_token_secret = credentials["bookstack_api_token_secret"]
return None
def _get_doc_batch(
self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
) -> tuple[list[Document], int]:
doc_batch: list[Document] = []
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
batch = self.confluence_client.get_all_pages_from_space(
self.space,
start=start_ind,
limit=self.batch_size,
expand="body.storage.value,version",
)
for page in batch:
last_modified_str = page["version"]["when"]
last_modified = datetime.fromisoformat(last_modified_str)
if time_filter is None or time_filter(last_modified):
page_html = page["body"]["storage"]["value"]
soup = BeautifulSoup(page_html, "html.parser")
page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR)
comment_pages = self.confluence_client.get_page_child_by_type(
page["id"],
type="comment",
start=None,
limit=None,
expand="body.storage.value",
)
comments_text = _comment_dfs("", comment_pages, self.confluence_client)
page_text += comments_text
page_url = self.wiki_base + page["_links"]["webui"]
doc_batch.append(
Document(
id=page_url,
sections=[Section(link=page_url, text=page_text)],
source=DocumentSource.CONFLUENCE,
semantic_identifier=page["title"],
metadata={},
)
)
return doc_batch, len(batch)
def load_from_state(self) -> GenerateDocumentsOutput:
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
start_ind = 0
while True:
doc_batch, num_pages = self._get_doc_batch(start_ind)
start_ind += num_pages
if doc_batch:
yield doc_batch
if num_pages < self.batch_size:
break
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
start_time = datetime.fromtimestamp(start, tz=timezone.utc)
end_time = datetime.fromtimestamp(end, tz=timezone.utc)
start_ind = 0
while True:
doc_batch, num_pages = self._get_doc_batch(
start_ind, time_filter=lambda t: start_time <= t <= end_time
)
start_ind += num_pages
if doc_batch:
yield doc_batch
if num_pages < self.batch_size:
break

View File

@@ -2,6 +2,7 @@ from typing import Any
from typing import Type from typing import Type
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.connectors.bookstack.connector import BookstackConnector
from danswer.connectors.confluence.connector import ConfluenceConnector from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.danswer_jira.connector import JiraConnector from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.file.connector import LocalFileConnector
@@ -37,6 +38,7 @@ def identify_connector_class(
}, },
DocumentSource.GITHUB: GithubConnector, DocumentSource.GITHUB: GithubConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector, DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.BOOKSTACK: BookstackConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector, DocumentSource.CONFLUENCE: ConfluenceConnector,
DocumentSource.JIRA: JiraConnector, DocumentSource.JIRA: JiraConnector,
DocumentSource.SLAB: SlabConnector, DocumentSource.SLAB: SlabConnector,

View File

@@ -0,0 +1,243 @@
"use client";
import * as Yup from "yup";
import { BookstackIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
BookstackCredentialJson,
BookstackConfig,
Credential,
ConnectorIndexingStatus, ConfluenceConfig,
} from "@/lib/types";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePopup } from "@/components/admin/connectors/Popup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
} = useSWR<Credential<BookstackCredentialJson>[]>(
"/api/manage/credential",
fetcher
);
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const bookstackConnectorIndexingStatuses = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "bookstack"
);
const bookstackCredential = credentialsData.filter(
(credential) => credential.credential_json?.bookstack_api_token_id
)[0];
return (
<>
{popup}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access token
</h2>
{bookstackCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing API Token: </p>
<p className="ml-1 italic my-auto max-w-md">
{bookstackCredential.credential_json?.bookstack_api_token_id}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
if (bookstackConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await deleteCredential(bookstackCredential.id);
mutate("/api/manage/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
To get started you'll need API token details for your BookStack instance.
You can get these by editing your (or another) user account in BookStack
and creating a token via the "API Tokens" section at the bottom.
Your user account will require to be assigned a BookStack role which
has the "Access system API" system permission assigned.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
<CredentialForm<BookstackCredentialJson>
formBody={
<>
<TextFormField name="bookstack_base_url" label="Instance Base URL:" />
<TextFormField name="bookstack_api_token_id" label="API Token ID:" />
<TextFormField
name="bookstack_api_token_secret"
label="API Token Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
bookstack_base_url: Yup.string().required(
"Please enter the base URL for your BookStack instance"
),
bookstack_api_token_id: Yup.string().required(
"Please enter your BookStack API token ID"
),
bookstack_api_token_secret: Yup.string().required(
"Please enter your BookStack API token secret"
),
})}
initialValues={{
bookstack_base_url: "",
bookstack_api_token_id: "",
bookstack_api_token_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/manage/credential");
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
{bookstackConnectorIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
BookStack indexing status
</h2>
<p className="text-sm mb-2">
The latest page, chapter, book and shelf changes are fetched
every 10 minutes.
</p>
<div className="mb-2">
<ConnectorsTable<BookstackConfig, BookstackCredentialJson>
connectorIndexingStatuses={
bookstackConnectorIndexingStatuses
}
liveCredential={bookstackCredential}
getCredential={(credential) => {
return (
<div>
<p>
{credential.credential_json.bookstack_api_token_id}
</p>
</div>
);
}}
onCredentialLink={async (connectorId) => {
if (bookstackCredential) {
await linkCredential(
connectorId,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Setup Connector</h2>
<ConnectorForm<BookstackConfig>
nameBuilder={(values) =>
`BookStackConnector`
}
source="bookstack"
inputType="load_state"
formBody={
<>
</>
}
validationSchema={Yup.object().shape({
})}
initialValues={{
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(
responseJson.id,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
{!bookstackCredential && (
<>
<p className="text-sm mb-4">
Please provide your API details in Step 1 first! Once done with that,
you'll be able to see indexing status.
</p>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<BookstackIcon size="32" />
<h1 className="text-3xl font-bold pl-2">BookStack</h1>
</div>
<Main />
</div>
);
}

View File

@@ -7,6 +7,7 @@ import {
GoogleDriveIcon, GoogleDriveIcon,
SlackIcon, SlackIcon,
KeyIcon, KeyIcon,
BookstackIcon,
ConfluenceIcon, ConfluenceIcon,
FileIcon, FileIcon,
JiraIcon, JiraIcon,
@@ -83,6 +84,15 @@ export default async function AdminLayout({
), ),
link: "/admin/connectors/google-drive", link: "/admin/connectors/google-drive",
}, },
{
name: (
<div className="flex">
<BookstackIcon size="16" />
<div className="ml-1">BookStack</div>
</div>
),
link: "/admin/connectors/bookstack",
},
{ {
name: ( name: (
<div className="flex"> <div className="flex">

View File

@@ -13,6 +13,7 @@ import {
Brain, Brain,
} from "@phosphor-icons/react"; } from "@phosphor-icons/react";
import { import {
SiBookstack,
SiConfluence, SiConfluence,
SiGithub, SiGithub,
SiGoogledrive, SiGoogledrive,
@@ -113,6 +114,13 @@ export const GoogleDriveIcon = ({
return <SiGoogledrive size={size} className={className} />; return <SiGoogledrive size={size} className={className} />;
}; };
export const BookstackIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <SiBookstack size={size} className={className} />;
};
export const ConfluenceIcon = ({ export const ConfluenceIcon = ({
size = "16", size = "16",
className = defaultTailwindCSS, className = defaultTailwindCSS,

View File

@@ -1,5 +1,6 @@
import { ValidSources } from "@/lib/types"; import { ValidSources } from "@/lib/types";
import { import {
BookstackIcon,
ConfluenceIcon, ConfluenceIcon,
FileIcon, FileIcon,
GithubIcon, GithubIcon,
@@ -48,6 +49,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "Github PRs", displayName: "Github PRs",
adminPageLink: "/admin/connectors/github", adminPageLink: "/admin/connectors/github",
}; };
case "bookstack":
return {
icon: BookstackIcon,
displayName: "BookStack",
adminPageLink: "/admin/connectors/bookstack",
};
case "confluence": case "confluence":
return { return {
icon: ConfluenceIcon, icon: ConfluenceIcon,

View File

@@ -12,6 +12,7 @@ export type ValidSources =
| "github" | "github"
| "slack" | "slack"
| "google_drive" | "google_drive"
| "bookstack"
| "confluence" | "confluence"
| "jira" | "jira"
| "slab" | "slab"
@@ -44,6 +45,9 @@ export interface GithubConfig {
repo_name: string; repo_name: string;
} }
export interface BookstackConfig {
}
export interface ConfluenceConfig { export interface ConfluenceConfig {
wiki_page_url: string; wiki_page_url: string;
} }
@@ -90,6 +94,12 @@ export interface GithubCredentialJson {
github_access_token: string; github_access_token: string;
} }
export interface BookstackCredentialJson {
bookstack_base_url: string;
bookstack_api_token_id: string;
bookstack_api_token_secret: string;
}
export interface ConfluenceCredentialJson { export interface ConfluenceCredentialJson {
confluence_username: string; confluence_username: string;
confluence_access_token: string; confluence_access_token: string;