Got basic bookstack connector setup UI/backend working

This commit is contained in:
Dan Brown 2023-07-06 10:50:27 +01:00
parent 7f222f376d
commit bfde5fd809
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
9 changed files with 399 additions and 0 deletions

View File

@ -21,6 +21,7 @@ class DocumentSource(str, Enum):
WEB = "web"
GOOGLE_DRIVE = "google_drive"
GITHUB = "github"
BOOKSTACK = "bookstack"
CONFLUENCE = "confluence"
SLAB = "slab"
JIRA = "jira"

View File

@ -0,0 +1,118 @@
from collections.abc import Callable
from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from typing import Any
from urllib.parse import urlparse
from atlassian import Confluence # type:ignore
from bs4 import BeautifulSoup
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import HTML_SEPARATOR
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import Document
from danswer.connectors.models import Section
class BookstackClientNotSetUpError(PermissionError):
def __init__(self) -> None:
super().__init__(
"Confluence Client is not set up, was load_credentials called?"
)
class BookstackConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
base_url = credentials["bookstack_base_url"]
api_token_id = credentials["bookstack_api_token_id"]
api_token_secret = credentials["bookstack_api_token_secret"]
return None
def _get_doc_batch(
self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
) -> tuple[list[Document], int]:
doc_batch: list[Document] = []
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
batch = self.confluence_client.get_all_pages_from_space(
self.space,
start=start_ind,
limit=self.batch_size,
expand="body.storage.value,version",
)
for page in batch:
last_modified_str = page["version"]["when"]
last_modified = datetime.fromisoformat(last_modified_str)
if time_filter is None or time_filter(last_modified):
page_html = page["body"]["storage"]["value"]
soup = BeautifulSoup(page_html, "html.parser")
page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR)
comment_pages = self.confluence_client.get_page_child_by_type(
page["id"],
type="comment",
start=None,
limit=None,
expand="body.storage.value",
)
comments_text = _comment_dfs("", comment_pages, self.confluence_client)
page_text += comments_text
page_url = self.wiki_base + page["_links"]["webui"]
doc_batch.append(
Document(
id=page_url,
sections=[Section(link=page_url, text=page_text)],
source=DocumentSource.CONFLUENCE,
semantic_identifier=page["title"],
metadata={},
)
)
return doc_batch, len(batch)
def load_from_state(self) -> GenerateDocumentsOutput:
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
start_ind = 0
while True:
doc_batch, num_pages = self._get_doc_batch(start_ind)
start_ind += num_pages
if doc_batch:
yield doc_batch
if num_pages < self.batch_size:
break
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
if self.confluence_client is None:
raise BookstackClientNotSetUpError()
start_time = datetime.fromtimestamp(start, tz=timezone.utc)
end_time = datetime.fromtimestamp(end, tz=timezone.utc)
start_ind = 0
while True:
doc_batch, num_pages = self._get_doc_batch(
start_ind, time_filter=lambda t: start_time <= t <= end_time
)
start_ind += num_pages
if doc_batch:
yield doc_batch
if num_pages < self.batch_size:
break

View File

@ -2,6 +2,7 @@ from typing import Any
from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.bookstack.connector import BookstackConnector
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector
@ -37,6 +38,7 @@ def identify_connector_class(
},
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.BOOKSTACK: BookstackConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,
DocumentSource.JIRA: JiraConnector,
DocumentSource.SLAB: SlabConnector,

View File

@ -0,0 +1,243 @@
"use client";
import * as Yup from "yup";
import { BookstackIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
BookstackCredentialJson,
BookstackConfig,
Credential,
ConnectorIndexingStatus, ConfluenceConfig,
} from "@/lib/types";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePopup } from "@/components/admin/connectors/Popup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
} = useSWR<Credential<BookstackCredentialJson>[]>(
"/api/manage/credential",
fetcher
);
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const bookstackConnectorIndexingStatuses = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "bookstack"
);
const bookstackCredential = credentialsData.filter(
(credential) => credential.credential_json?.bookstack_api_token_id
)[0];
return (
<>
{popup}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access token
</h2>
{bookstackCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing API Token: </p>
<p className="ml-1 italic my-auto max-w-md">
{bookstackCredential.credential_json?.bookstack_api_token_id}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
if (bookstackConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await deleteCredential(bookstackCredential.id);
mutate("/api/manage/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
To get started you'll need API token details for your BookStack instance.
You can get these by editing your (or another) user account in BookStack
and creating a token via the "API Tokens" section at the bottom.
Your user account will require to be assigned a BookStack role which
has the "Access system API" system permission assigned.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
<CredentialForm<BookstackCredentialJson>
formBody={
<>
<TextFormField name="bookstack_base_url" label="Instance Base URL:" />
<TextFormField name="bookstack_api_token_id" label="API Token ID:" />
<TextFormField
name="bookstack_api_token_secret"
label="API Token Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
bookstack_base_url: Yup.string().required(
"Please enter the base URL for your BookStack instance"
),
bookstack_api_token_id: Yup.string().required(
"Please enter your BookStack API token ID"
),
bookstack_api_token_secret: Yup.string().required(
"Please enter your BookStack API token secret"
),
})}
initialValues={{
bookstack_base_url: "",
bookstack_api_token_id: "",
bookstack_api_token_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/manage/credential");
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
{bookstackConnectorIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
BookStack indexing status
</h2>
<p className="text-sm mb-2">
The latest page, chapter, book and shelf changes are fetched
every 10 minutes.
</p>
<div className="mb-2">
<ConnectorsTable<BookstackConfig, BookstackCredentialJson>
connectorIndexingStatuses={
bookstackConnectorIndexingStatuses
}
liveCredential={bookstackCredential}
getCredential={(credential) => {
return (
<div>
<p>
{credential.credential_json.bookstack_api_token_id}
</p>
</div>
);
}}
onCredentialLink={async (connectorId) => {
if (bookstackCredential) {
await linkCredential(
connectorId,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Setup Connector</h2>
<ConnectorForm<BookstackConfig>
nameBuilder={(values) =>
`BookStackConnector`
}
source="bookstack"
inputType="load_state"
formBody={
<>
</>
}
validationSchema={Yup.object().shape({
})}
initialValues={{
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(
responseJson.id,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
{!bookstackCredential && (
<>
<p className="text-sm mb-4">
Please provide your API details in Step 1 first! Once done with that,
you'll be able to see indexing status.
</p>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<BookstackIcon size="32" />
<h1 className="text-3xl font-bold pl-2">BookStack</h1>
</div>
<Main />
</div>
);
}

View File

@ -7,6 +7,7 @@ import {
GoogleDriveIcon,
SlackIcon,
KeyIcon,
BookstackIcon,
ConfluenceIcon,
FileIcon,
JiraIcon,
@ -83,6 +84,15 @@ export default async function AdminLayout({
),
link: "/admin/connectors/google-drive",
},
{
name: (
<div className="flex">
<BookstackIcon size="16" />
<div className="ml-1">BookStack</div>
</div>
),
link: "/admin/connectors/bookstack",
},
{
name: (
<div className="flex">

View File

@ -13,6 +13,7 @@ import {
Brain,
} from "@phosphor-icons/react";
import {
SiBookstack,
SiConfluence,
SiGithub,
SiGoogledrive,
@ -113,6 +114,13 @@ export const GoogleDriveIcon = ({
return <SiGoogledrive size={size} className={className} />;
};
export const BookstackIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <SiBookstack size={size} className={className} />;
};
export const ConfluenceIcon = ({
size = "16",
className = defaultTailwindCSS,

View File

@ -1,5 +1,6 @@
import { ValidSources } from "@/lib/types";
import {
BookstackIcon,
ConfluenceIcon,
FileIcon,
GithubIcon,
@ -48,6 +49,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "Github PRs",
adminPageLink: "/admin/connectors/github",
};
case "bookstack":
return {
icon: BookstackIcon,
displayName: "BookStack",
adminPageLink: "/admin/connectors/bookstack",
};
case "confluence":
return {
icon: ConfluenceIcon,

View File

@ -12,6 +12,7 @@ export type ValidSources =
| "github"
| "slack"
| "google_drive"
| "bookstack"
| "confluence"
| "jira"
| "slab"
@ -44,6 +45,9 @@ export interface GithubConfig {
repo_name: string;
}
export interface BookstackConfig {
}
export interface ConfluenceConfig {
wiki_page_url: string;
}
@ -90,6 +94,12 @@ export interface GithubCredentialJson {
github_access_token: string;
}
export interface BookstackCredentialJson {
bookstack_base_url: string;
bookstack_api_token_id: string;
bookstack_api_token_secret: string;
}
export interface ConfluenceCredentialJson {
confluence_username: string;
confluence_access_token: string;