Merge pull request #139 from ssddanbrown/bookstack_connector

BookStack connector
This commit is contained in:
Chris Weaver 2023-07-08 17:18:59 -07:00 committed by GitHub
commit d135bc7efa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 503 additions and 0 deletions

View File

@ -21,6 +21,7 @@ class DocumentSource(str, Enum):
WEB = "web"
GOOGLE_DRIVE = "google_drive"
GITHUB = "github"
BOOKSTACK = "bookstack"
CONFLUENCE = "confluence"
SLAB = "slab"
JIRA = "jira"

View File

@ -0,0 +1,52 @@
import requests
class BookStackClientRequestFailedError(ConnectionError):
def __init__(self, status: int, error: str) -> None:
super().__init__(
"BookStack Client request failed with status {status}: {error}".format(status=status, error=error)
)
class BookStackApiClient:
def __init__(
self,
base_url: str,
token_id: str,
token_secret: str,
) -> None:
self.base_url = base_url
self.token_id = token_id
self.token_secret = token_secret
def get(self, endpoint: str, params: dict[str, str]):
url: str = self._build_url(endpoint)
headers = self._build_headers()
response = requests.get(url, headers=headers, params=params)
try:
json = response.json()
except:
json = {}
pass
if response.status_code >= 300:
error = response.reason
response_error = json.get("error", {}).get("message", "")
if response_error:
error = response_error
raise BookStackClientRequestFailedError(response.status_code, error)
return json
def _build_headers(self):
auth = 'Token ' + self.token_id + ':' + self.token_secret
return {
'Authorization': auth,
'Accept': 'application/json',
}
def _build_url(self, endpoint: str):
return self.base_url.rstrip('/') + '/api/' + endpoint.lstrip('/')
def build_app_url(self, endpoint: str):
return self.base_url.rstrip('/') + '/' + endpoint.lstrip('/')

View File

@ -0,0 +1,162 @@
import html
import time
from collections.abc import Callable
from datetime import datetime
from typing import Any
from bs4 import BeautifulSoup
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import HTML_SEPARATOR
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.bookstack.client import BookStackApiClient
from danswer.connectors.models import Document
from danswer.connectors.models import Section
class BookstackClientNotSetUpError(PermissionError):
def __init__(self) -> None:
super().__init__(
"BookStack Client is not set up, was load_credentials called?"
)
class BookstackConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.batch_size = batch_size
self.bookstack_client: BookStackApiClient | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.bookstack_client = BookStackApiClient(
base_url=credentials["bookstack_base_url"],
token_id=credentials["bookstack_api_token_id"],
token_secret=credentials["bookstack_api_token_secret"],
)
return None
def _get_doc_batch(
self,
endpoint: str,
transformer: Callable[[dict], Document],
start_ind: int,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> tuple[list[Document], int]:
doc_batch: list[Document] = []
params = {
"count": str(self.batch_size),
"offset": str(start_ind),
"sort": "+id"
}
if start:
params["filter[updated_at:gte]"] = datetime.utcfromtimestamp(start).strftime('%Y-%m-%d %H:%M:%S')
if end:
params["filter[updated_at:lte]"] = datetime.utcfromtimestamp(end).strftime('%Y-%m-%d %H:%M:%S')
batch = self.bookstack_client.get(endpoint, params=params).get("data", [])
for item in batch:
doc_batch.append(transformer(item))
return doc_batch, len(batch)
def _book_to_document(self, book: dict):
url = self.bookstack_client.build_app_url("/books/" + book.get("slug"))
text = book.get("name", "") + "\n" + book.get("description", "")
return Document(
id="book:" + str(book.get("id")),
sections=[Section(link=url, text=text)],
source=DocumentSource.BOOKSTACK,
semantic_identifier="Book: " + book.get("name"),
metadata={
"type": "book",
"updated_at": book.get("updated_at")
},
)
def _chapter_to_document(self, chapter: dict):
url = self.bookstack_client.build_app_url("/books/" + chapter.get("book_slug") + "/chapter/" + chapter.get("slug"))
text = chapter.get("name", "") + "\n" + chapter.get("description", "")
return Document(
id="chapter:" + str(chapter.get("id")),
sections=[Section(link=url, text=text)],
source=DocumentSource.BOOKSTACK,
semantic_identifier="Chapter: " + chapter.get("name"),
metadata={
"type": "chapter",
"updated_at": chapter.get("updated_at")
},
)
def _shelf_to_document(self, shelf: dict):
url = self.bookstack_client.build_app_url("/shelves/" + shelf.get("slug"))
text = shelf.get("name", "") + "\n" + shelf.get("description", "")
return Document(
id="shelf:" + str(shelf.get("id")),
sections=[Section(link=url, text=text)],
source=DocumentSource.BOOKSTACK,
semantic_identifier="Shelf: " + shelf.get("name"),
metadata={
"type": "shelf",
"updated_at": shelf.get("updated_at")
},
)
def _page_to_document(self, page: dict):
page_id = str(page.get("id"))
page_data = self.bookstack_client.get("/pages/" + page_id, {})
url = self.bookstack_client.build_app_url("/books/" + page.get("book_slug") + "/page/" + page_data.get("slug"))
page_html = "<h1>" + html.escape(page_data.get("name")) + "</h1>" + page_data.get("html")
soup = BeautifulSoup(page_html, "html.parser")
text = soup.get_text(HTML_SEPARATOR)
time.sleep(0.1)
return Document(
id="page:" + page_id,
sections=[Section(link=url, text=text)],
source=DocumentSource.BOOKSTACK,
semantic_identifier="Page: " + page_data.get("name"),
metadata={
"type": "page",
"updated_at": page_data.get("updated_at")
},
)
def load_from_state(self) -> GenerateDocumentsOutput:
if self.bookstack_client is None:
raise BookstackClientNotSetUpError()
return self.poll_source(None, None)
def poll_source(
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
) -> GenerateDocumentsOutput:
if self.bookstack_client is None:
raise BookstackClientNotSetUpError()
transform_by_endpoint: dict[str, Callable[[dict], Document]] = {
"/books": self._book_to_document,
"/chapters": self._chapter_to_document,
"/shelves": self._shelf_to_document,
"/pages": self._page_to_document,
}
for endpoint, transform in transform_by_endpoint.items():
start_ind = 0
while True:
doc_batch, num_results = self._get_doc_batch(endpoint, transform, start_ind, start, end)
start_ind += num_results
if doc_batch:
yield doc_batch
if num_results < self.batch_size:
break
else:
time.sleep(0.2)

View File

@ -2,6 +2,7 @@ from typing import Any
from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.bookstack.connector import BookstackConnector
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector
@ -37,6 +38,7 @@ def identify_connector_class(
},
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.BOOKSTACK: BookstackConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,
DocumentSource.JIRA: JiraConnector,
DocumentSource.SLAB: SlabConnector,

View File

@ -0,0 +1,250 @@
"use client";
import * as Yup from "yup";
import { BookstackIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
BookstackCredentialJson,
BookstackConfig,
Credential,
ConnectorIndexingStatus,
} from "@/lib/types";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePopup } from "@/components/admin/connectors/Popup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
} = useSWR<Credential<BookstackCredentialJson>[]>(
"/api/manage/credential",
fetcher
);
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const bookstackConnectorIndexingStatuses = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "bookstack"
);
const bookstackCredential = credentialsData.filter(
(credential) => credential.credential_json?.bookstack_api_token_id
)[0];
return (
<>
{popup}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your API details
</h2>
{bookstackCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing API Token: </p>
<p className="ml-1 italic my-auto max-w-md">
{bookstackCredential.credential_json?.bookstack_api_token_id}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
if (bookstackConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await deleteCredential(bookstackCredential.id);
mutate("/api/manage/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
To get started you&apos;ll need API token details for your BookStack instance.
You can get these by editing your (or another) user account in BookStack
and creating a token via the &apos;API Tokens&apos; section at the bottom.
Your user account will require to be assigned a BookStack role which
has the &apos;Access system API&apos; system permission assigned.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
<CredentialForm<BookstackCredentialJson>
formBody={
<>
<TextFormField name="bookstack_base_url" label="Instance Base URL:" />
<TextFormField name="bookstack_api_token_id" label="API Token ID:" />
<TextFormField
name="bookstack_api_token_secret"
label="API Token Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
bookstack_base_url: Yup.string().required(
"Please enter the base URL for your BookStack instance"
),
bookstack_api_token_id: Yup.string().required(
"Please enter your BookStack API token ID"
),
bookstack_api_token_secret: Yup.string().required(
"Please enter your BookStack API token secret"
),
})}
initialValues={{
bookstack_base_url: "",
bookstack_api_token_id: "",
bookstack_api_token_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/manage/credential");
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
{bookstackConnectorIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
BookStack indexing status
</h2>
<p className="text-sm mb-2">
The latest page, chapter, book and shelf changes are fetched
every 10 minutes.
</p>
<div className="mb-2">
<ConnectorsTable<BookstackConfig, BookstackCredentialJson>
connectorIndexingStatuses={
bookstackConnectorIndexingStatuses
}
liveCredential={bookstackCredential}
getCredential={(credential) => {
return (
<div>
<p>
{credential.credential_json.bookstack_api_token_id}
</p>
</div>
);
}}
onCredentialLink={async (connectorId) => {
if (bookstackCredential) {
await linkCredential(
connectorId,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{bookstackCredential && bookstackConnectorIndexingStatuses.length === 0 && (
<>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<p className="text-sm mb-4">
Press connect below to start the connection to your BookStack instance.
</p>
<ConnectorForm<BookstackConfig>
nameBuilder={(values) =>
`BookStackConnector`
}
source="bookstack"
inputType="poll"
formBody={
<>
</>
}
validationSchema={Yup.object().shape({
})}
initialValues={{
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(
responseJson.id,
bookstackCredential.id
);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
{!bookstackCredential && (
<>
<p className="text-sm mb-4">
Please provide your API details in Step 1 first! Once done with that,
you&apos;ll be able to start the connection then see indexing status.
</p>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<BookstackIcon size="32" />
<h1 className="text-3xl font-bold pl-2">BookStack</h1>
</div>
<Main />
</div>
);
}

View File

@ -7,6 +7,7 @@ import {
GoogleDriveIcon,
SlackIcon,
KeyIcon,
BookstackIcon,
ConfluenceIcon,
FileIcon,
JiraIcon,
@ -83,6 +84,15 @@ export default async function AdminLayout({
),
link: "/admin/connectors/google-drive",
},
{
name: (
<div className="flex">
<BookstackIcon size="16" />
<div className="ml-1">BookStack</div>
</div>
),
link: "/admin/connectors/bookstack",
},
{
name: (
<div className="flex">

View File

@ -13,6 +13,7 @@ import {
Brain,
} from "@phosphor-icons/react";
import {
SiBookstack,
SiConfluence,
SiGithub,
SiGoogledrive,
@ -113,6 +114,13 @@ export const GoogleDriveIcon = ({
return <SiGoogledrive size={size} className={className} />;
};
export const BookstackIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <SiBookstack size={size} className={className} />;
};
export const ConfluenceIcon = ({
size = "16",
className = defaultTailwindCSS,

View File

@ -7,6 +7,7 @@ import { Source } from "@/lib/search/interfaces";
const sources: Source[] = [
{ displayName: "Google Drive", internalName: "google_drive" },
{ displayName: "Slack", internalName: "slack" },
{ displayName: "BookStack", internalName: "bookstack" },
{ displayName: "Confluence", internalName: "confluence" },
{ displayName: "Jira", internalName: "jira" },
{ displayName: "Slab", internalName: "slab" },

View File

@ -1,5 +1,6 @@
import { ValidSources } from "@/lib/types";
import {
BookstackIcon,
ConfluenceIcon,
FileIcon,
GithubIcon,
@ -48,6 +49,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "Github PRs",
adminPageLink: "/admin/connectors/github",
};
case "bookstack":
return {
icon: BookstackIcon,
displayName: "BookStack",
adminPageLink: "/admin/connectors/bookstack",
};
case "confluence":
return {
icon: ConfluenceIcon,

View File

@ -12,6 +12,7 @@ export type ValidSources =
| "github"
| "slack"
| "google_drive"
| "bookstack"
| "confluence"
| "jira"
| "slab"
@ -44,6 +45,9 @@ export interface GithubConfig {
repo_name: string;
}
export interface BookstackConfig {
}
export interface ConfluenceConfig {
wiki_page_url: string;
}
@ -90,6 +94,12 @@ export interface GithubCredentialJson {
github_access_token: string;
}
export interface BookstackCredentialJson {
bookstack_base_url: string;
bookstack_api_token_id: string;
bookstack_api_token_secret: string;
}
export interface ConfluenceCredentialJson {
confluence_username: string;
confluence_access_token: string;