From bfde5fd809894ef96c859167af0bf887503d6b92 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 10:50:27 +0100
Subject: [PATCH 1/6] Got basic bookstack connector setup UI/backend working

---
 backend/danswer/configs/constants.py          |   1 +
 .../danswer/connectors/bookstack/__init__.py  |   0
 .../danswer/connectors/bookstack/connector.py | 118 +++++++++
 backend/danswer/connectors/factory.py         |   2 +
 .../app/admin/connectors/bookstack/page.tsx   | 243 ++++++++++++++++++
 web/src/app/admin/layout.tsx                  |  10 +
 web/src/components/icons/icons.tsx            |   8 +
 web/src/components/source.tsx                 |   7 +
 web/src/lib/types.ts                          |  10 +
 9 files changed, 399 insertions(+)
 create mode 100644 backend/danswer/connectors/bookstack/__init__.py
 create mode 100644 backend/danswer/connectors/bookstack/connector.py
 create mode 100644 web/src/app/admin/connectors/bookstack/page.tsx

diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py
index cb73f8146..9ede56eed 100644
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -21,6 +21,7 @@ class DocumentSource(str, Enum):
     WEB = "web"
     GOOGLE_DRIVE = "google_drive"
     GITHUB = "github"
+    BOOKSTACK = "bookstack"
     CONFLUENCE = "confluence"
     SLAB = "slab"
     JIRA = "jira"
diff --git a/backend/danswer/connectors/bookstack/__init__.py b/backend/danswer/connectors/bookstack/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py
new file mode 100644
index 000000000..9a6dee65f
--- /dev/null
+++ b/backend/danswer/connectors/bookstack/connector.py
@@ -0,0 +1,118 @@
+from collections.abc import Callable
+from collections.abc import Generator
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+from urllib.parse import urlparse
+
+from atlassian import Confluence  # type:ignore
+from bs4 import BeautifulSoup
+from danswer.configs.app_configs import INDEX_BATCH_SIZE
+from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import HTML_SEPARATOR
+from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import LoadConnector
+from danswer.connectors.interfaces import PollConnector
+from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.connectors.models import Document
+from danswer.connectors.models import Section
+
+class BookstackClientNotSetUpError(PermissionError):
+    def __init__(self) -> None:
+        super().__init__(
+            "Confluence Client is not set up, was load_credentials called?"
+        )
+
+class BookstackConnector(LoadConnector, PollConnector):
+    def __init__(
+        self,
+        batch_size: int = INDEX_BATCH_SIZE,
+    ) -> None:
+        self.batch_size = batch_size
+
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        base_url = credentials["bookstack_base_url"]
+        api_token_id = credentials["bookstack_api_token_id"]
+        api_token_secret = credentials["bookstack_api_token_secret"]
+        return None
+
+    def _get_doc_batch(
+        self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
+    ) -> tuple[list[Document], int]:
+        doc_batch: list[Document] = []
+
+        if self.confluence_client is None:
+            raise BookstackClientNotSetUpError()
+
+        batch = self.confluence_client.get_all_pages_from_space(
+            self.space,
+            start=start_ind,
+            limit=self.batch_size,
+            expand="body.storage.value,version",
+        )
+
+        for page in batch:
+            last_modified_str = page["version"]["when"]
+            last_modified = datetime.fromisoformat(last_modified_str)
+
+            if time_filter is None or time_filter(last_modified):
+                page_html = page["body"]["storage"]["value"]
+                soup = BeautifulSoup(page_html, "html.parser")
+                page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR)
+                comment_pages = self.confluence_client.get_page_child_by_type(
+                    page["id"],
+                    type="comment",
+                    start=None,
+                    limit=None,
+                    expand="body.storage.value",
+                )
+                comments_text = _comment_dfs("", comment_pages, self.confluence_client)
+                page_text += comments_text
+
+                page_url = self.wiki_base + page["_links"]["webui"]
+
+                doc_batch.append(
+                    Document(
+                        id=page_url,
+                        sections=[Section(link=page_url, text=page_text)],
+                        source=DocumentSource.CONFLUENCE,
+                        semantic_identifier=page["title"],
+                        metadata={},
+                    )
+                )
+        return doc_batch, len(batch)
+
+    def load_from_state(self) -> GenerateDocumentsOutput:
+        if self.confluence_client is None:
+            raise BookstackClientNotSetUpError()
+
+        start_ind = 0
+        while True:
+            doc_batch, num_pages = self._get_doc_batch(start_ind)
+            start_ind += num_pages
+            if doc_batch:
+                yield doc_batch
+
+            if num_pages < self.batch_size:
+                break
+
+    def poll_source(
+        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
+    ) -> GenerateDocumentsOutput:
+        if self.confluence_client is None:
+            raise BookstackClientNotSetUpError()
+
+        start_time = datetime.fromtimestamp(start, tz=timezone.utc)
+        end_time = datetime.fromtimestamp(end, tz=timezone.utc)
+
+        start_ind = 0
+        while True:
+            doc_batch, num_pages = self._get_doc_batch(
+                start_ind, time_filter=lambda t: start_time <= t <= end_time
+            )
+            start_ind += num_pages
+            if doc_batch:
+                yield doc_batch
+
+            if num_pages < self.batch_size:
+                break
diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py
index abacb0174..0cce12848 100644
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -2,6 +2,7 @@ from typing import Any
 from typing import Type
 
 from danswer.configs.constants import DocumentSource
+from danswer.connectors.bookstack.connector import BookstackConnector
 from danswer.connectors.confluence.connector import ConfluenceConnector
 from danswer.connectors.danswer_jira.connector import JiraConnector
 from danswer.connectors.file.connector import LocalFileConnector
@@ -37,6 +38,7 @@ def identify_connector_class(
         },
         DocumentSource.GITHUB: GithubConnector,
         DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
+        DocumentSource.BOOKSTACK: BookstackConnector,
         DocumentSource.CONFLUENCE: ConfluenceConnector,
         DocumentSource.JIRA: JiraConnector,
         DocumentSource.SLAB: SlabConnector,
diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx
new file mode 100644
index 000000000..2c38bfe6a
--- /dev/null
+++ b/web/src/app/admin/connectors/bookstack/page.tsx
@@ -0,0 +1,243 @@
+"use client";
+
+import * as Yup from "yup";
+import { BookstackIcon, TrashIcon } from "@/components/icons/icons";
+import { TextFormField } from "@/components/admin/connectors/Field";
+import { HealthCheckBanner } from "@/components/health/healthcheck";
+import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
+import {
+  BookstackCredentialJson,
+  BookstackConfig,
+  Credential,
+  ConnectorIndexingStatus, ConfluenceConfig,
+} from "@/lib/types";
+import useSWR, { useSWRConfig } from "swr";
+import { fetcher } from "@/lib/fetcher";
+import { LoadingAnimation } from "@/components/Loading";
+import { deleteCredential, linkCredential } from "@/lib/credential";
+import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
+import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
+import { usePopup } from "@/components/admin/connectors/Popup";
+
+const Main = () => {
+  const { popup, setPopup } = usePopup();
+
+  const { mutate } = useSWRConfig();
+  const {
+    data: connectorIndexingStatuses,
+    isLoading: isConnectorIndexingStatusesLoading,
+    error: isConnectorIndexingStatusesError,
+  } = useSWR<ConnectorIndexingStatus<any>[]>(
+    "/api/manage/admin/connector/indexing-status",
+    fetcher
+  );
+  const {
+    data: credentialsData,
+    isLoading: isCredentialsLoading,
+    error: isCredentialsError,
+  } = useSWR<Credential<BookstackCredentialJson>[]>(
+    "/api/manage/credential",
+    fetcher
+  );
+
+  if (
+    (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
+    (!credentialsData && isCredentialsLoading)
+  ) {
+    return <LoadingAnimation text="Loading" />;
+  }
+
+  if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
+    return <div>Failed to load connectors</div>;
+  }
+
+  if (isCredentialsError || !credentialsData) {
+    return <div>Failed to load credentials</div>;
+  }
+
+  const bookstackConnectorIndexingStatuses = connectorIndexingStatuses.filter(
+    (connectorIndexingStatus) =>
+      connectorIndexingStatus.connector.source === "bookstack"
+  );
+  const bookstackCredential = credentialsData.filter(
+    (credential) => credential.credential_json?.bookstack_api_token_id
+  )[0];
+
+  return (
+    <>
+      {popup}
+      <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
+        Step 1: Provide your access token
+      </h2>
+
+      {bookstackCredential ? (
+        <>
+          <div className="flex mb-1 text-sm">
+            <p className="my-auto">Existing API Token: </p>
+            <p className="ml-1 italic my-auto max-w-md">
+              {bookstackCredential.credential_json?.bookstack_api_token_id}
+            </p>
+            <button
+              className="ml-1 hover:bg-gray-700 rounded-full p-1"
+              onClick={async () => {
+                if (bookstackConnectorIndexingStatuses.length > 0) {
+                  setPopup({
+                    type: "error",
+                    message:
+                      "Must delete all connectors before deleting credentials",
+                  });
+                  return;
+                }
+                await deleteCredential(bookstackCredential.id);
+                mutate("/api/manage/credential");
+              }}
+            >
+              <TrashIcon />
+            </button>
+          </div>
+        </>
+      ) : (
+        <>
+          <p className="text-sm">
+            To get started you'll need API token details for your BookStack instance.
+            You can get these by editing your (or another) user account in BookStack
+            and creating a token via the "API Tokens" section at the bottom.
+            Your user account will require to be assigned a BookStack role which
+            has the "Access system API" system permission assigned.
+          </p>
+          <div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
+            <CredentialForm<BookstackCredentialJson>
+              formBody={
+                <>
+                  <TextFormField name="bookstack_base_url" label="Instance Base URL:" />
+                  <TextFormField name="bookstack_api_token_id" label="API Token ID:" />
+                  <TextFormField
+                    name="bookstack_api_token_secret"
+                    label="API Token Secret:"
+                    type="password"
+                  />
+                </>
+              }
+              validationSchema={Yup.object().shape({
+                bookstack_base_url: Yup.string().required(
+                  "Please enter the base URL for your BookStack instance"
+                ),
+                bookstack_api_token_id: Yup.string().required(
+                  "Please enter your BookStack API token ID"
+                ),
+                bookstack_api_token_secret: Yup.string().required(
+                  "Please enter your BookStack API token secret"
+                ),
+              })}
+              initialValues={{
+                bookstack_base_url: "",
+                bookstack_api_token_id: "",
+                bookstack_api_token_secret: "",
+              }}
+              onSubmit={(isSuccess) => {
+                if (isSuccess) {
+                  mutate("/api/manage/credential");
+                  mutate("/api/manage/admin/connector/indexing-status");
+                }
+              }}
+            />
+          </div>
+        </>
+      )}
+
+      {bookstackConnectorIndexingStatuses.length > 0 && (
+        <>
+          <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
+            BookStack indexing status
+          </h2>
+          <p className="text-sm mb-2">
+            The latest page, chapter, book and shelf changes are fetched
+            every 10 minutes.
+          </p>
+          <div className="mb-2">
+            <ConnectorsTable<BookstackConfig, BookstackCredentialJson>
+              connectorIndexingStatuses={
+                bookstackConnectorIndexingStatuses
+              }
+              liveCredential={bookstackCredential}
+              getCredential={(credential) => {
+                return (
+                  <div>
+                    <p>
+                      {credential.credential_json.bookstack_api_token_id}
+                    </p>
+                  </div>
+                );
+              }}
+              onCredentialLink={async (connectorId) => {
+                if (bookstackCredential) {
+                  await linkCredential(
+                    connectorId,
+                    bookstackCredential.id
+                  );
+                  mutate("/api/manage/admin/connector/indexing-status");
+                }
+              }}
+              onUpdate={() =>
+                mutate("/api/manage/admin/connector/indexing-status")
+              }
+            />
+          </div>
+        </>
+      )}
+
+      <div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
+        <h2 className="font-bold mb-3">Setup Connector</h2>
+        <ConnectorForm<BookstackConfig>
+          nameBuilder={(values) =>
+            `BookStackConnector`
+          }
+          source="bookstack"
+          inputType="load_state"
+          formBody={
+            <>
+            </>
+          }
+          validationSchema={Yup.object().shape({
+          })}
+          initialValues={{
+          }}
+          refreshFreq={10 * 60} // 10 minutes
+          onSubmit={async (isSuccess, responseJson) => {
+            if (isSuccess && responseJson) {
+              await linkCredential(
+                responseJson.id,
+                bookstackCredential.id
+              );
+              mutate("/api/manage/admin/connector/indexing-status");
+            }
+          }}
+        />
+      </div>
+
+      {!bookstackCredential && (
+        <>
+          <p className="text-sm mb-4">
+            Please provide your API details in Step 1 first! Once done with that,
+            you'll be able to see indexing status.
+          </p>
+        </>
+      )}
+    </>
+  );
+};
+
+export default function Page() {
+  return (
+    <div className="mx-auto container">
+      <div className="mb-4">
+        <HealthCheckBanner />
+      </div>
+      <div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
+        <BookstackIcon size="32" />
+        <h1 className="text-3xl font-bold pl-2">BookStack</h1>
+      </div>
+      <Main />
+    </div>
+  );
+}
diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx
index 3a0f4c42b..e8e89db62 100644
--- a/web/src/app/admin/layout.tsx
+++ b/web/src/app/admin/layout.tsx
@@ -7,6 +7,7 @@ import {
   GoogleDriveIcon,
   SlackIcon,
   KeyIcon,
+  BookstackIcon,
   ConfluenceIcon,
   FileIcon,
   JiraIcon,
@@ -83,6 +84,15 @@ export default async function AdminLayout({
                   ),
                   link: "/admin/connectors/google-drive",
                 },
+                {
+                  name: (
+                    <div className="flex">
+                      <BookstackIcon size="16" />
+                      <div className="ml-1">BookStack</div>
+                    </div>
+                  ),
+                  link: "/admin/connectors/bookstack",
+                },
                 {
                   name: (
                     <div className="flex">
diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx
index c6c6cdcc7..b8593b7f4 100644
--- a/web/src/components/icons/icons.tsx
+++ b/web/src/components/icons/icons.tsx
@@ -13,6 +13,7 @@ import {
   Brain,
 } from "@phosphor-icons/react";
 import {
+  SiBookstack,
   SiConfluence,
   SiGithub,
   SiGoogledrive,
@@ -113,6 +114,13 @@ export const GoogleDriveIcon = ({
   return <SiGoogledrive size={size} className={className} />;
 };
 
+export const BookstackIcon = ({
+  size = "16",
+  className = defaultTailwindCSS,
+}: IconProps) => {
+  return <SiBookstack size={size} className={className} />;
+};
+
 export const ConfluenceIcon = ({
   size = "16",
   className = defaultTailwindCSS,
diff --git a/web/src/components/source.tsx b/web/src/components/source.tsx
index b9ef9e094..360ef6677 100644
--- a/web/src/components/source.tsx
+++ b/web/src/components/source.tsx
@@ -1,5 +1,6 @@
 import { ValidSources } from "@/lib/types";
 import {
+  BookstackIcon,
   ConfluenceIcon,
   FileIcon,
   GithubIcon,
@@ -48,6 +49,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
         displayName: "Github PRs",
         adminPageLink: "/admin/connectors/github",
       };
+    case "bookstack":
+      return {
+        icon: BookstackIcon,
+        displayName: "BookStack",
+        adminPageLink: "/admin/connectors/bookstack",
+      };
     case "confluence":
       return {
         icon: ConfluenceIcon,
diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts
index 9c3453275..68274bb50 100644
--- a/web/src/lib/types.ts
+++ b/web/src/lib/types.ts
@@ -12,6 +12,7 @@ export type ValidSources =
   | "github"
   | "slack"
   | "google_drive"
+  | "bookstack"
   | "confluence"
   | "jira"
   | "slab"
@@ -44,6 +45,9 @@ export interface GithubConfig {
   repo_name: string;
 }
 
+export interface BookstackConfig {
+}
+
 export interface ConfluenceConfig {
   wiki_page_url: string;
 }
@@ -90,6 +94,12 @@ export interface GithubCredentialJson {
   github_access_token: string;
 }
 
+export interface BookstackCredentialJson {
+  bookstack_base_url: string;
+  bookstack_api_token_id: string;
+  bookstack_api_token_secret: string;
+}
+
 export interface ConfluenceCredentialJson {
   confluence_username: string;
   confluence_access_token: string;

From 44f905ef80a6b254f050a0d473af5fab6f5160f1 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 14:56:28 +0100
Subject: [PATCH 2/6] Added BookStack connector code

Got to the point of working sync for shelves, books, chapters and pages.
---
 .../danswer/connectors/bookstack/client.py    |  52 +++++
 .../danswer/connectors/bookstack/connector.py | 180 +++++++++++-------
 .../app/admin/connectors/bookstack/page.tsx   |   2 +-
 3 files changed, 165 insertions(+), 69 deletions(-)
 create mode 100644 backend/danswer/connectors/bookstack/client.py

diff --git a/backend/danswer/connectors/bookstack/client.py b/backend/danswer/connectors/bookstack/client.py
new file mode 100644
index 000000000..7cc38427f
--- /dev/null
+++ b/backend/danswer/connectors/bookstack/client.py
@@ -0,0 +1,52 @@
+import requests
+
+class BookStackClientRequestFailedError(ConnectionError):
+    def __init__(self, status: int, error: str) -> None:
+        super().__init__(
+            "BookStack Client request failed with status {status}: {error}".format(status=status, error=error)
+        )
+
+class BookStackApiClient:
+
+    def __init__(
+        self,
+        base_url: str,
+        token_id: str,
+        token_secret: str,
+    ) -> None:
+        self.base_url = base_url
+        self.token_id = token_id
+        self.token_secret = token_secret
+
+    def get(self, endpoint: str, params: dict[str, str]):
+        url: str = self._build_url(endpoint)
+        headers = self._build_headers()
+        response = requests.get(url, headers=headers, params=params)
+
+        try:
+            json = response.json()
+        except:
+            json = {}
+            pass
+
+        if response.status_code >= 300:
+            error = response.reason
+            response_error = json.get("error", {}).get("message", "")
+            if response_error:
+                error = response_error
+            raise BookStackClientRequestFailedError(response.status_code, error)
+
+        return json
+
+    def _build_headers(self):
+        auth = 'Token ' + self.token_id + ':' + self.token_secret
+        return {
+            'Authorization': auth,
+            'Accept': 'application/json',
+        }
+
+    def _build_url(self, endpoint: str):
+        return self.base_url.rstrip('/') + '/api/' + endpoint.lstrip('/')
+
+    def build_app_url(self, endpoint: str):
+        return self.base_url.rstrip('/') + '/' + endpoint.lstrip('/')
diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py
index 9a6dee65f..bab007875 100644
--- a/backend/danswer/connectors/bookstack/connector.py
+++ b/backend/danswer/connectors/bookstack/connector.py
@@ -1,11 +1,9 @@
+import html
+import time
 from collections.abc import Callable
-from collections.abc import Generator
 from datetime import datetime
-from datetime import timezone
 from typing import Any
-from urllib.parse import urlparse
 
-from atlassian import Confluence  # type:ignore
 from bs4 import BeautifulSoup
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
@@ -14,105 +12,151 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.connectors.bookstack.client import BookStackApiClient
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 
+
 class BookstackClientNotSetUpError(PermissionError):
     def __init__(self) -> None:
         super().__init__(
-            "Confluence Client is not set up, was load_credentials called?"
+            "BookStack Client is not set up, was load_credentials called?"
         )
 
+
 class BookstackConnector(LoadConnector, PollConnector):
     def __init__(
         self,
         batch_size: int = INDEX_BATCH_SIZE,
     ) -> None:
         self.batch_size = batch_size
+        self.bookstack_client: BookStackApiClient | None = None
 
     def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        base_url = credentials["bookstack_base_url"]
-        api_token_id = credentials["bookstack_api_token_id"]
-        api_token_secret = credentials["bookstack_api_token_secret"]
+        self.bookstack_client = BookStackApiClient(
+            base_url=credentials["bookstack_base_url"],
+            token_id=credentials["bookstack_api_token_id"],
+            token_secret=credentials["bookstack_api_token_secret"],
+        )
         return None
 
     def _get_doc_batch(
-        self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
+        self,
+        endpoint: str,
+        transformer: Callable[[dict], Document],
+        start_ind: int,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
     ) -> tuple[list[Document], int]:
         doc_batch: list[Document] = []
 
-        if self.confluence_client is None:
-            raise BookstackClientNotSetUpError()
+        params = {
+            "count": str(self.batch_size),
+            "offset": str(start_ind),
+            "sort": "+id"
+        }
 
-        batch = self.confluence_client.get_all_pages_from_space(
-            self.space,
-            start=start_ind,
-            limit=self.batch_size,
-            expand="body.storage.value,version",
-        )
+        if start:
+            params["filter[updated_at:gte]"] = datetime.utcfromtimestamp(start).strftime('%Y-%m-%d %H:%M:%S')
 
-        for page in batch:
-            last_modified_str = page["version"]["when"]
-            last_modified = datetime.fromisoformat(last_modified_str)
+        if end:
+            params["filter[updated_at:lte]"] = datetime.utcfromtimestamp(end).strftime('%Y-%m-%d %H:%M:%S')
 
-            if time_filter is None or time_filter(last_modified):
-                page_html = page["body"]["storage"]["value"]
-                soup = BeautifulSoup(page_html, "html.parser")
-                page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR)
-                comment_pages = self.confluence_client.get_page_child_by_type(
-                    page["id"],
-                    type="comment",
-                    start=None,
-                    limit=None,
-                    expand="body.storage.value",
-                )
-                comments_text = _comment_dfs("", comment_pages, self.confluence_client)
-                page_text += comments_text
+        batch = self.bookstack_client.get(endpoint, params=params).get("data", [])
+        for item in batch:
+            doc_batch.append(transformer(item))
 
-                page_url = self.wiki_base + page["_links"]["webui"]
-
-                doc_batch.append(
-                    Document(
-                        id=page_url,
-                        sections=[Section(link=page_url, text=page_text)],
-                        source=DocumentSource.CONFLUENCE,
-                        semantic_identifier=page["title"],
-                        metadata={},
-                    )
-                )
         return doc_batch, len(batch)
 
+    def _book_to_document(self, book: dict):
+        url = self.bookstack_client.build_app_url("/books/" + book.get("slug"))
+        text = book.get("name", "") + "\n" + book.get("description", "")
+        return Document(
+            id=url,
+            sections=[Section(link=url, text=text)],
+            source=DocumentSource.BOOKSTACK,
+            semantic_identifier="Book: " + book.get("name"),
+            metadata={
+                "type": "book",
+                "updated_at": book.get("updated_at")
+            },
+        )
+
+    def _chapter_to_document(self, chapter: dict):
+        url = self.bookstack_client.build_app_url("/books/" + chapter.get("book_slug") + "/chapter/" + chapter.get("slug"))
+        text = chapter.get("name", "") + "\n" + chapter.get("description", "")
+        return Document(
+            id=url,
+            sections=[Section(link=url, text=text)],
+            source=DocumentSource.BOOKSTACK,
+            semantic_identifier="Chapter: " + chapter.get("name"),
+            metadata={
+                "type": "chapter",
+                "updated_at": chapter.get("updated_at")
+            },
+        )
+
+    def _shelf_to_document(self, shelf: dict):
+        url = self.bookstack_client.build_app_url("/shelves/" + shelf.get("slug"))
+        text = shelf.get("name", "") + "\n" + shelf.get("description", "")
+        return Document(
+            id=url,
+            sections=[Section(link=url, text=text)],
+            source=DocumentSource.BOOKSTACK,
+            semantic_identifier="Shelf: " + shelf.get("name"),
+            metadata={
+                "type": "shelf",
+                "updated_at": shelf.get("updated_at")
+            },
+        )
+
+    def _page_to_document(self, page: dict):
+        page_id = str(page.get("id"))
+        page_data = self.bookstack_client.get("/pages/" + page_id, {})
+        url = self.bookstack_client.build_app_url("/books/" + page.get("book_slug") + "/page/" + page_data.get("slug"))
+        page_html = "<h1>" + html.escape(page_data.get("name")) + "</h1>" + page_data.get("html")
+        soup = BeautifulSoup(page_html, "html.parser")
+        text = soup.get_text(HTML_SEPARATOR)
+        time.sleep(0.1)
+        return Document(
+            id=url,
+            sections=[Section(link=url, text=text)],
+            source=DocumentSource.BOOKSTACK,
+            semantic_identifier="Page: " + page_data.get("name"),
+            metadata={
+                "type": "page",
+                "updated_at": page_data.get("updated_at")
+            },
+        )
+
     def load_from_state(self) -> GenerateDocumentsOutput:
-        if self.confluence_client is None:
+        if self.bookstack_client is None:
             raise BookstackClientNotSetUpError()
 
-        start_ind = 0
-        while True:
-            doc_batch, num_pages = self._get_doc_batch(start_ind)
-            start_ind += num_pages
-            if doc_batch:
-                yield doc_batch
-
-            if num_pages < self.batch_size:
-                break
+        return self.poll_source(None, None)
 
     def poll_source(
-        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
+        self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
     ) -> GenerateDocumentsOutput:
-        if self.confluence_client is None:
+        if self.bookstack_client is None:
             raise BookstackClientNotSetUpError()
 
-        start_time = datetime.fromtimestamp(start, tz=timezone.utc)
-        end_time = datetime.fromtimestamp(end, tz=timezone.utc)
+        transform_by_endpoint: dict[str, Callable[[dict], Document]] = {
+            "/books": self._book_to_document,
+            "/chapters": self._chapter_to_document,
+            "/shelves": self._shelf_to_document,
+            "/pages": self._page_to_document,
+        }
 
-        start_ind = 0
-        while True:
-            doc_batch, num_pages = self._get_doc_batch(
-                start_ind, time_filter=lambda t: start_time <= t <= end_time
-            )
-            start_ind += num_pages
-            if doc_batch:
-                yield doc_batch
+        for endpoint, transform in transform_by_endpoint.items():
+            start_ind = 0
+            while True:
+                doc_batch, num_results = self._get_doc_batch(endpoint, transform, start_ind, start, end)
+                start_ind += num_results
+                if doc_batch:
+                    yield doc_batch
 
-            if num_pages < self.batch_size:
-                break
+                if num_results < self.batch_size:
+                    break
+                else:
+                    time.sleep(0.2)
diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx
index 2c38bfe6a..7c730d3d7 100644
--- a/web/src/app/admin/connectors/bookstack/page.tsx
+++ b/web/src/app/admin/connectors/bookstack/page.tsx
@@ -9,7 +9,7 @@ import {
   BookstackCredentialJson,
   BookstackConfig,
   Credential,
-  ConnectorIndexingStatus, ConfluenceConfig,
+  ConnectorIndexingStatus,
 } from "@/lib/types";
 import useSWR, { useSWRConfig } from "swr";
 import { fetcher } from "@/lib/fetcher";

From f587161577f0c3efab032d19fd5121a8b3f88a5d Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 16:02:53 +0100
Subject: [PATCH 3/6] Added bookstack to filters, changed inputType

---
 web/src/app/admin/connectors/bookstack/page.tsx | 2 +-
 web/src/components/search/Filters.tsx           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx
index 7c730d3d7..80f846d6c 100644
--- a/web/src/app/admin/connectors/bookstack/page.tsx
+++ b/web/src/app/admin/connectors/bookstack/page.tsx
@@ -193,7 +193,7 @@ const Main = () => {
             `BookStackConnector`
           }
           source="bookstack"
-          inputType="load_state"
+          inputType="poll"
           formBody={
             <>
             </>
diff --git a/web/src/components/search/Filters.tsx b/web/src/components/search/Filters.tsx
index 852c7fdd1..431e1a727 100644
--- a/web/src/components/search/Filters.tsx
+++ b/web/src/components/search/Filters.tsx
@@ -7,6 +7,7 @@ import { Source } from "@/lib/search/interfaces";
 const sources: Source[] = [
   { displayName: "Google Drive", internalName: "google_drive" },
   { displayName: "Slack", internalName: "slack" },
+  { displayName: "BookStack", internalName: "bookstack" },
   { displayName: "Confluence", internalName: "confluence" },
   { displayName: "Jira", internalName: "jira" },
   { displayName: "Slab", internalName: "slab" },

From 104a248b112a7a6384ac32fb3c597f55be1ae8e7 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 16:35:21 +0100
Subject: [PATCH 4/6] Cleaned up bookstack connector admin panel

Also fixed ESLint issues
---
 .../app/admin/connectors/bookstack/page.tsx   | 75 ++++++++++---------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx
index 80f846d6c..3d798f635 100644
--- a/web/src/app/admin/connectors/bookstack/page.tsx
+++ b/web/src/app/admin/connectors/bookstack/page.tsx
@@ -67,7 +67,7 @@ const Main = () => {
     <>
       {popup}
       <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
-        Step 1: Provide your access token
+        Step 1: Provide your API details
       </h2>
 
       {bookstackCredential ? (
@@ -99,11 +99,11 @@ const Main = () => {
       ) : (
         <>
           <p className="text-sm">
-            To get started you'll need API token details for your BookStack instance.
+            To get started you&apos;ll need API token details for your BookStack instance.
             You can get these by editing your (or another) user account in BookStack
-            and creating a token via the "API Tokens" section at the bottom.
+            and creating a token via the &apos;API Tokens&apos; section at the bottom.
             Your user account will require to be assigned a BookStack role which
-            has the "Access system API" system permission assigned.
+            has the &apos;Access system API&apos; system permission assigned.
           </p>
           <div className="border-solid border-gray-600 border rounded-md p-6 mt-2 mb-4">
             <CredentialForm<BookstackCredentialJson>
@@ -145,7 +145,7 @@ const Main = () => {
         </>
       )}
 
-      {bookstackConnectorIndexingStatuses.length > 0 && (
+      {bookstackConnectorIndexingStatuses.length > 0 ? (
         <>
           <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
             BookStack indexing status
@@ -184,42 +184,47 @@ const Main = () => {
             />
           </div>
         </>
+      ) : (
+          <>
+            <div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
+              <h2 className="font-bold mb-3">Create Connection</h2>
+              <p className="text-sm mb-4">
+                Press connect below to start the connection to your BookStack instance.
+              </p>
+              <ConnectorForm<BookstackConfig>
+                nameBuilder={(values) =>
+                  `BookStackConnector`
+                }
+                source="bookstack"
+                inputType="poll"
+                formBody={
+                  <>
+                  </>
+                }
+                validationSchema={Yup.object().shape({
+                })}
+                initialValues={{
+                }}
+                refreshFreq={10 * 60} // 10 minutes
+                onSubmit={async (isSuccess, responseJson) => {
+                  if (isSuccess && responseJson) {
+                    await linkCredential(
+                      responseJson.id,
+                      bookstackCredential.id
+                    );
+                    mutate("/api/manage/admin/connector/indexing-status");
+                  }
+                }}
+              />
+            </div>
+          </>
       )}
 
-      <div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
-        <h2 className="font-bold mb-3">Setup Connector</h2>
-        <ConnectorForm<BookstackConfig>
-          nameBuilder={(values) =>
-            `BookStackConnector`
-          }
-          source="bookstack"
-          inputType="poll"
-          formBody={
-            <>
-            </>
-          }
-          validationSchema={Yup.object().shape({
-          })}
-          initialValues={{
-          }}
-          refreshFreq={10 * 60} // 10 minutes
-          onSubmit={async (isSuccess, responseJson) => {
-            if (isSuccess && responseJson) {
-              await linkCredential(
-                responseJson.id,
-                bookstackCredential.id
-              );
-              mutate("/api/manage/admin/connector/indexing-status");
-            }
-          }}
-        />
-      </div>
-
       {!bookstackCredential && (
         <>
           <p className="text-sm mb-4">
             Please provide your API details in Step 1 first! Once done with that,
-            you'll be able to see indexing status.
+            you&apos;ll be able to start the connection then see indexing status.
           </p>
         </>
       )}

From 019e474a4e1dbccfaff876b7db26d70f1a237cdd Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 17:04:31 +0100
Subject: [PATCH 5/6] BookStack connector: Changed to use id-based document ids

---
 backend/danswer/connectors/bookstack/connector.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py
index bab007875..22637cfed 100644
--- a/backend/danswer/connectors/bookstack/connector.py
+++ b/backend/danswer/connectors/bookstack/connector.py
@@ -72,7 +72,7 @@ class BookstackConnector(LoadConnector, PollConnector):
         url = self.bookstack_client.build_app_url("/books/" + book.get("slug"))
         text = book.get("name", "") + "\n" + book.get("description", "")
         return Document(
-            id=url,
+            id="book:" + str(book.get("id")),
             sections=[Section(link=url, text=text)],
             source=DocumentSource.BOOKSTACK,
             semantic_identifier="Book: " + book.get("name"),
@@ -86,7 +86,7 @@ class BookstackConnector(LoadConnector, PollConnector):
         url = self.bookstack_client.build_app_url("/books/" + chapter.get("book_slug") + "/chapter/" + chapter.get("slug"))
         text = chapter.get("name", "") + "\n" + chapter.get("description", "")
         return Document(
-            id=url,
+            id="chapter:" + str(chapter.get("id")),
             sections=[Section(link=url, text=text)],
             source=DocumentSource.BOOKSTACK,
             semantic_identifier="Chapter: " + chapter.get("name"),
@@ -100,7 +100,7 @@ class BookstackConnector(LoadConnector, PollConnector):
         url = self.bookstack_client.build_app_url("/shelves/" + shelf.get("slug"))
         text = shelf.get("name", "") + "\n" + shelf.get("description", "")
         return Document(
-            id=url,
+            id="shelf:" + str(shelf.get("id")),
             sections=[Section(link=url, text=text)],
             source=DocumentSource.BOOKSTACK,
             semantic_identifier="Shelf: " + shelf.get("name"),
@@ -119,7 +119,7 @@ class BookstackConnector(LoadConnector, PollConnector):
         text = soup.get_text(HTML_SEPARATOR)
         time.sleep(0.1)
         return Document(
-            id=url,
+            id="page:" + page_id,
             sections=[Section(link=url, text=text)],
             source=DocumentSource.BOOKSTACK,
             semantic_identifier="Page: " + page_data.get("name"),

From 148d9c358f491e15b1c9a72703263162015cd2d3 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 6 Jul 2023 17:24:04 +0100
Subject: [PATCH 6/6] Fixed incorrect active panel in BookStack connector

---
 web/src/app/admin/connectors/bookstack/page.tsx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/web/src/app/admin/connectors/bookstack/page.tsx b/web/src/app/admin/connectors/bookstack/page.tsx
index 3d798f635..564493f95 100644
--- a/web/src/app/admin/connectors/bookstack/page.tsx
+++ b/web/src/app/admin/connectors/bookstack/page.tsx
@@ -145,7 +145,7 @@ const Main = () => {
         </>
       )}
 
-      {bookstackConnectorIndexingStatuses.length > 0 ? (
+      {bookstackConnectorIndexingStatuses.length > 0 && (
         <>
           <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
             BookStack indexing status
@@ -184,7 +184,9 @@ const Main = () => {
             />
           </div>
         </>
-      ) : (
+      )}
+
+      {bookstackCredential && bookstackConnectorIndexingStatuses.length === 0 && (
           <>
             <div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
               <h2 className="font-bold mb-3">Create Connection</h2>