Document360 Connector (#552)

This commit is contained in:
nlp8899 2023-10-11 23:10:01 -04:00 committed by GitHub
parent 12442c1c06
commit 90828008e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 465 additions and 2 deletions

View File

@ -60,6 +60,7 @@ class DocumentSource(str, Enum):
ZULIP = "zulip"
LINEAR = "linear"
HUBSPOT = "hubspot"
DOCUMENT360 = "document360"
GONG = "gong"
GOOGLE_SITES = "google_sites"

View File

@ -0,0 +1,171 @@
from datetime import datetime
from typing import Any
from typing import List
from typing import Optional
import requests
from bs4 import BeautifulSoup
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
DOCUMENT360_BASE_URL = "https://preview.portal.document360.io/"
DOCUMENT360_API_BASE_URL = "https://apihub.document360.io/v2"
class Document360Connector(LoadConnector, PollConnector):
def __init__(
self,
workspace: str,
categories: List[str] | None = None,
batch_size: int = INDEX_BATCH_SIZE,
portal_id: Optional[str] = None,
api_token: Optional[str] = None,
) -> None:
self.portal_id = portal_id
self.workspace = workspace
self.categories = categories
self.batch_size = batch_size
self.api_token = api_token
def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
self.api_token = credentials.get("document360_api_token")
self.portal_id = credentials.get("portal_id")
return None
def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:
if not self.api_token:
raise ConnectorMissingCredentialError("Document360")
headers = {"accept": "application/json", "api_token": self.api_token}
response = requests.get(
f"{DOCUMENT360_API_BASE_URL}/{endpoint}", headers=headers, params=params
)
response.raise_for_status()
return response.json()["data"]
def _get_workspace_id_by_name(self) -> str:
projects = self._make_request("ProjectVersions")
workspace_id = next(
(
project["id"]
for project in projects
if project["version_code_name"] == self.workspace
),
None,
)
if workspace_id is None:
raise ConnectorMissingCredentialError("Document360")
return workspace_id
def _get_articles_with_category(self, workspace_id: str) -> Any:
all_categories = self._make_request(
f"ProjectVersions/{workspace_id}/categories"
)
articles_with_category = []
for category in all_categories:
if self.categories is None or category["name"] in self.categories:
for article in category["articles"]:
articles_with_category.append(
{"id": article["id"], "category_name": category["name"]}
)
for child_category in category["child_categories"]:
for article in child_category["articles"]:
articles_with_category.append(
{
"id": article["id"],
"category_name": child_category["name"],
}
)
return articles_with_category
def _process_articles(
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
if self.api_token is None:
raise ConnectorMissingCredentialError("Document360")
workspace_id = self._get_workspace_id_by_name()
articles = self._get_articles_with_category(workspace_id)
doc_batch: List[Document] = []
for article in articles:
article_details = self._make_request(
f"Articles/{article['id']}", {"langCode": "en"}
)
updated_at = datetime.strptime(
article_details["modified_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
).replace(tzinfo=None)
if start is not None and updated_at < start:
continue
if end is not None and updated_at > end:
continue
doc_link = f"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}"
html_content = article_details["html_content"]
soup = BeautifulSoup(html_content, "html.parser")
article_content = soup.get_text()
doc_text = (
f"workspace: {self.workspace}\n"
f"category: {article['category_name']}\n"
f"article: {article_details['title']} - "
f"{article_details.get('description', '')} - "
f"{article_content}"
)
document = Document(
id=article_details["id"],
sections=[Section(link=doc_link, text=doc_text)],
source=DocumentSource.DOCUMENT360,
semantic_identifier=article_details["title"],
metadata={},
)
doc_batch.append(document)
if len(doc_batch) >= self.batch_size:
yield doc_batch
doc_batch = []
if doc_batch:
yield doc_batch
def load_from_state(self) -> GenerateDocumentsOutput:
return self._process_articles()
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.fromtimestamp(start)
end_datetime = datetime.fromtimestamp(end)
return self._process_articles(start_datetime, end_datetime)
if __name__ == "__main__":
import time
document360_connector = Document360Connector("Your Workspace", ["Your categories"])
document360_connector.load_credentials(
{"portal_id": "Your Portal ID", "document360_api_token": "Your API Token"}
)
current = time.time()
one_day_ago = current - 24 * 60 * 60 # 1 days
latest_docs = document360_connector.poll_source(one_day_ago, current)
for doc in latest_docs:
print(doc)

View File

@ -4,6 +4,7 @@ from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.bookstack.connector import BookstackConnector
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.document360.connector import Document360Connector
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
@ -54,6 +55,7 @@ def identify_connector_class(
DocumentSource.GURU: GuruConnector,
DocumentSource.LINEAR: LinearConnector,
DocumentSource.HUBSPOT: HubSpotConnector,
DocumentSource.DOCUMENT360: Document360Connector,
DocumentSource.GONG: GongConnector,
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
}

BIN
web/public/Document360.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,244 @@
"use client";
import * as Yup from "yup";
import { TrashIcon, Document360Icon } from "@/components/icons/icons"; // Make sure you have a Document360 icon
import { fetcher } from "@/lib/fetcher";
import useSWR, { useSWRConfig } from "swr";
import { LoadingAnimation } from "@/components/Loading";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import {
Document360Config,
Document360CredentialJson,
ConnectorIndexingStatus,
Credential,
} from "@/lib/types"; // Modify or create these types as required
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
TextFormField,
TextArrayFieldBuilder,
} from "@/components/admin/connectors/Field";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { usePublicCredentials } from "@/lib/hooks";
const MainSection = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const document360ConnectorIndexingStatuses: ConnectorIndexingStatus<
Document360Config,
Document360CredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "document360"
);
const document360Credential: Credential<Document360CredentialJson> | undefined =
credentialsData.find((credential) => credential.credential_json?.document360_api_token);
return (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide Credentials
</h2>
{document360Credential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing Document360 API Token: </p>
<p className="ml-1 italic my-auto">
{document360Credential.credential_json.document360_api_token}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
await adminDeleteCredential(document360Credential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm mb-4">
To use the Document360 connector, you must first provide the API token
and portal ID corresponding to your Document360 setup. For more details,
see the <a className="text-blue-500" href="https://apidocs.document360.com/apidocs/api-token">official Document360 documentation</a>.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
<CredentialForm<Document360CredentialJson>
formBody={
<>
<TextFormField
name="document360_api_token"
label="Document360 API Token:"
type="password"
/>
<TextFormField
name="portal_id"
label="Portal ID:"
/>
</>
}
validationSchema={Yup.object().shape({
document360_api_token: Yup.string().required("Please enter your Document360 API token"),
portal_id: Yup.string().required("Please enter your portal ID"),
})}
initialValues={{
document360_api_token: "",
portal_id: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</div>
</>
)}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Which categories do you want to make searchable?
</h2>
{document360ConnectorIndexingStatuses.length > 0 && (
<>
<p className="text-sm mb-2">
We index the latest articles from each workspace listed below regularly.
</p>
<div className="mb-2">
<ConnectorsTable<Document360Config, Document360CredentialJson>
connectorIndexingStatuses={document360ConnectorIndexingStatuses}
liveCredential={document360Credential}
getCredential={(credential) =>
credential.credential_json.document360_api_token
}
specialColumns={[
{
header: "Workspace",
key: "workspace",
getValue: (ccPairStatus) =>
ccPairStatus.connector.connector_specific_config.workspace,
},
{
header: "Categories",
key: "categories",
getValue: (ccPairStatus) =>
ccPairStatus.connector.connector_specific_config.categories &&
ccPairStatus.connector.connector_specific_config.categories.length > 0
? ccPairStatus.connector.connector_specific_config.categories.join(", ")
: "",
},
]}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
onCredentialLink={async (connectorId) => {
if (document360Credential) {
await linkCredential(connectorId, document360Credential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
{document360Credential ? (
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Connect to a New Workspace</h2>
<ConnectorForm<Document360Config>
nameBuilder={(values) =>
values.categories
? `Document360Connector-${values.workspace}-${values.categories.join("_")}`
: `Document360Connector-${values.workspace}`
}
source="document360"
inputType="poll"
formBody={
<>
<TextFormField name="workspace" label="Workspace" />
</>
}
formBodyBuilder={TextArrayFieldBuilder({
name: "categories",
label: "Categories:",
subtext:
"Specify 0 or more categories to index. For instance, specifying the category " +
"'Help' will cause us to only index all content " +
"within the 'Help' category. " +
"If no categories are specified, all categories in your workspace will be indexed.",
})}
validationSchema={Yup.object().shape({
workspace: Yup.string().required(
"Please enter the workspace to index"
),
categories: Yup.array()
.of(Yup.string().required("Category names must be strings"))
.required(),
})}
initialValues={{
workspace: "",
categories: [],
}}
refreshFreq={10 * 60} // 10 minutes
credentialId={document360Credential.id}
/>
</div>
) : (
<p className="text-sm">
Please provide your Document360 API token and portal ID in Step 1 first! Once done with
that, you can then specify which Document360 categories you want to make
searchable.
</p>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<Document360Icon size={32} />
<h1 className="text-3xl font-bold pl-2">Document360</h1>
</div>
<MainSection />
</div>
);
}

View File

@ -6,4 +6,4 @@ export default async function AdminLayout({
children: React.ReactNode;
}) {
return await Layout({ children });
}
}

View File

@ -22,6 +22,7 @@ import {
HubSpotIcon,
BookmarkIcon,
CPUIcon,
Document360Icon,
GoogleSitesIcon,
} from "@/components/icons/icons";
import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
@ -210,6 +211,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
),
link: "/admin/connectors/hubspot",
},
{
name: (
<div className="flex">
<Document360Icon size={16} />
<div className="ml-1">Document360</div>
</div>
),
link: "/admin/connectors/document360",
},
],
},
{

View File

@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
import zulipIcon from "../../../public/Zulip.png";
import linearIcon from "../../../public/Linear.png";
import hubSpotIcon from "../../../public/HubSpot.png";
import document360Icon from "../../../public/Document360.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
interface IconProps {
@ -452,6 +453,20 @@ export const HubSpotIcon = ({
);
};
export const Document360Icon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size + 4}px`, height: `${size + 4}px` }}
className={`w-[${size + 4}px] h-[${size + 4}px] -m-0.5 ` + className}
>
<Image src={document360Icon} alt="Logo" width="96" height="96" />
</div>
);
};
export const GoogleSitesIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -29,6 +29,7 @@ const sources: Source[] = [
{ displayName: "Zulip", internalName: "zulip" },
{ displayName: "Linear", internalName: "linear" },
{ displayName: "HubSpot", internalName: "hubspot" },
{ displayName: "Document360", internalName: "document360" },
{ displayName: "Google Sites", internalName: "google_sites" },
];

View File

@ -16,6 +16,7 @@ import {
SlackIcon,
ZulipIcon,
HubSpotIcon,
Document360Icon,
GoogleSitesIcon,
} from "./icons/icons";
@ -123,6 +124,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "HubSpot",
adminPageLink: "/admin/connectors/hubspot",
};
case "document360":
return {
icon: Document360Icon,
displayName: "Document360",
adminPageLink: "/admin/connectors/document360",
};
case "google_sites":
return {
icon: GoogleSitesIcon,

View File

@ -23,8 +23,10 @@ export type ValidSources =
| "zulip"
| "linear"
| "hubspot"
| "file"
| "document360"
| "file";
| "google_sites";
export type ValidInputTypes = "load_state" | "poll" | "event";
export type ValidStatuses =
| "success"
@ -115,6 +117,11 @@ export interface NotionConfig {}
export interface HubSpotConfig {}
export interface Document360Config {
workspace: string;
categories?: string[];
}
export interface GoogleSitesConfig {
zip_path: string;
base_url: string;
@ -227,6 +234,11 @@ export interface HubSpotCredentialJson {
hubspot_access_token: string;
}
export interface Document360CredentialJson {
portal_id: string;
document360_api_token: string;
}
// DELETION
export interface DeletionAttemptSnapshot {