mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-07 19:38:19 +02:00
Document360 Connector (#552)
This commit is contained in:
parent
12442c1c06
commit
90828008e1
@ -60,6 +60,7 @@ class DocumentSource(str, Enum):
|
||||
ZULIP = "zulip"
|
||||
LINEAR = "linear"
|
||||
HUBSPOT = "hubspot"
|
||||
DOCUMENT360 = "document360"
|
||||
GONG = "gong"
|
||||
GOOGLE_SITES = "google_sites"
|
||||
|
||||
|
0
backend/danswer/connectors/document360/__init__.py
Normal file
0
backend/danswer/connectors/document360/__init__.py
Normal file
171
backend/danswer/connectors/document360/connector.py
Normal file
171
backend/danswer/connectors/document360/connector.py
Normal file
@ -0,0 +1,171 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
|
||||
DOCUMENT360_BASE_URL = "https://preview.portal.document360.io/"
|
||||
DOCUMENT360_API_BASE_URL = "https://apihub.document360.io/v2"
|
||||
|
||||
|
||||
class Document360Connector(LoadConnector, PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
workspace: str,
|
||||
categories: List[str] | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
portal_id: Optional[str] = None,
|
||||
api_token: Optional[str] = None,
|
||||
) -> None:
|
||||
self.portal_id = portal_id
|
||||
self.workspace = workspace
|
||||
self.categories = categories
|
||||
self.batch_size = batch_size
|
||||
self.api_token = api_token
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]:
|
||||
self.api_token = credentials.get("document360_api_token")
|
||||
self.portal_id = credentials.get("portal_id")
|
||||
return None
|
||||
|
||||
def _make_request(self, endpoint: str, params: Optional[dict] = None) -> Any:
|
||||
if not self.api_token:
|
||||
raise ConnectorMissingCredentialError("Document360")
|
||||
|
||||
headers = {"accept": "application/json", "api_token": self.api_token}
|
||||
|
||||
response = requests.get(
|
||||
f"{DOCUMENT360_API_BASE_URL}/{endpoint}", headers=headers, params=params
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
return response.json()["data"]
|
||||
|
||||
def _get_workspace_id_by_name(self) -> str:
|
||||
projects = self._make_request("ProjectVersions")
|
||||
workspace_id = next(
|
||||
(
|
||||
project["id"]
|
||||
for project in projects
|
||||
if project["version_code_name"] == self.workspace
|
||||
),
|
||||
None,
|
||||
)
|
||||
if workspace_id is None:
|
||||
raise ConnectorMissingCredentialError("Document360")
|
||||
|
||||
return workspace_id
|
||||
|
||||
def _get_articles_with_category(self, workspace_id: str) -> Any:
|
||||
all_categories = self._make_request(
|
||||
f"ProjectVersions/{workspace_id}/categories"
|
||||
)
|
||||
articles_with_category = []
|
||||
|
||||
for category in all_categories:
|
||||
if self.categories is None or category["name"] in self.categories:
|
||||
for article in category["articles"]:
|
||||
articles_with_category.append(
|
||||
{"id": article["id"], "category_name": category["name"]}
|
||||
)
|
||||
for child_category in category["child_categories"]:
|
||||
for article in child_category["articles"]:
|
||||
articles_with_category.append(
|
||||
{
|
||||
"id": article["id"],
|
||||
"category_name": child_category["name"],
|
||||
}
|
||||
)
|
||||
return articles_with_category
|
||||
|
||||
def _process_articles(
|
||||
self, start: datetime | None = None, end: datetime | None = None
|
||||
) -> GenerateDocumentsOutput:
|
||||
if self.api_token is None:
|
||||
raise ConnectorMissingCredentialError("Document360")
|
||||
|
||||
workspace_id = self._get_workspace_id_by_name()
|
||||
articles = self._get_articles_with_category(workspace_id)
|
||||
|
||||
doc_batch: List[Document] = []
|
||||
|
||||
for article in articles:
|
||||
article_details = self._make_request(
|
||||
f"Articles/{article['id']}", {"langCode": "en"}
|
||||
)
|
||||
|
||||
updated_at = datetime.strptime(
|
||||
article_details["modified_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
).replace(tzinfo=None)
|
||||
if start is not None and updated_at < start:
|
||||
continue
|
||||
if end is not None and updated_at > end:
|
||||
continue
|
||||
|
||||
doc_link = f"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}"
|
||||
|
||||
html_content = article_details["html_content"]
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
article_content = soup.get_text()
|
||||
doc_text = (
|
||||
f"workspace: {self.workspace}\n"
|
||||
f"category: {article['category_name']}\n"
|
||||
f"article: {article_details['title']} - "
|
||||
f"{article_details.get('description', '')} - "
|
||||
f"{article_content}"
|
||||
)
|
||||
|
||||
document = Document(
|
||||
id=article_details["id"],
|
||||
sections=[Section(link=doc_link, text=doc_text)],
|
||||
source=DocumentSource.DOCUMENT360,
|
||||
semantic_identifier=article_details["title"],
|
||||
metadata={},
|
||||
)
|
||||
|
||||
doc_batch.append(document)
|
||||
|
||||
if len(doc_batch) >= self.batch_size:
|
||||
yield doc_batch
|
||||
doc_batch = []
|
||||
|
||||
if doc_batch:
|
||||
yield doc_batch
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self._process_articles()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
start_datetime = datetime.fromtimestamp(start)
|
||||
end_datetime = datetime.fromtimestamp(end)
|
||||
return self._process_articles(start_datetime, end_datetime)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
document360_connector = Document360Connector("Your Workspace", ["Your categories"])
|
||||
document360_connector.load_credentials(
|
||||
{"portal_id": "Your Portal ID", "document360_api_token": "Your API Token"}
|
||||
)
|
||||
|
||||
current = time.time()
|
||||
one_day_ago = current - 24 * 60 * 60 # 1 days
|
||||
latest_docs = document360_connector.poll_source(one_day_ago, current)
|
||||
|
||||
for doc in latest_docs:
|
||||
print(doc)
|
@ -4,6 +4,7 @@ from typing import Type
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.bookstack.connector import BookstackConnector
|
||||
from danswer.connectors.confluence.connector import ConfluenceConnector
|
||||
from danswer.connectors.document360.connector import Document360Connector
|
||||
from danswer.connectors.danswer_jira.connector import JiraConnector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
@ -54,6 +55,7 @@ def identify_connector_class(
|
||||
DocumentSource.GURU: GuruConnector,
|
||||
DocumentSource.LINEAR: LinearConnector,
|
||||
DocumentSource.HUBSPOT: HubSpotConnector,
|
||||
DocumentSource.DOCUMENT360: Document360Connector,
|
||||
DocumentSource.GONG: GongConnector,
|
||||
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
|
||||
}
|
||||
|
BIN
web/public/Document360.png
Normal file
BIN
web/public/Document360.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
244
web/src/app/admin/connectors/document360/page.tsx
Normal file
244
web/src/app/admin/connectors/document360/page.tsx
Normal file
@ -0,0 +1,244 @@
|
||||
"use client";
|
||||
|
||||
import * as Yup from "yup";
|
||||
import { TrashIcon, Document360Icon } from "@/components/icons/icons"; // Make sure you have a Document360 icon
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import {
|
||||
Document360Config,
|
||||
Document360CredentialJson,
|
||||
ConnectorIndexingStatus,
|
||||
Credential,
|
||||
} from "@/lib/types"; // Modify or create these types as required
|
||||
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
|
||||
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import {
|
||||
TextFormField,
|
||||
TextArrayFieldBuilder,
|
||||
} from "@/components/admin/connectors/Field";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
import { usePublicCredentials } from "@/lib/hooks";
|
||||
|
||||
const MainSection = () => {
|
||||
const { mutate } = useSWRConfig();
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
|
||||
const {
|
||||
data: credentialsData,
|
||||
isLoading: isCredentialsLoading,
|
||||
error: isCredentialsError,
|
||||
refreshCredentials,
|
||||
} = usePublicCredentials();
|
||||
|
||||
if (
|
||||
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
|
||||
(!credentialsData && isCredentialsLoading)
|
||||
) {
|
||||
return <LoadingAnimation text="Loading" />;
|
||||
}
|
||||
|
||||
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||
return <div>Failed to load connectors</div>;
|
||||
}
|
||||
|
||||
if (isCredentialsError || !credentialsData) {
|
||||
return <div>Failed to load credentials</div>;
|
||||
}
|
||||
|
||||
const document360ConnectorIndexingStatuses: ConnectorIndexingStatus<
|
||||
Document360Config,
|
||||
Document360CredentialJson
|
||||
>[] = connectorIndexingStatuses.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "document360"
|
||||
);
|
||||
|
||||
const document360Credential: Credential<Document360CredentialJson> | undefined =
|
||||
credentialsData.find((credential) => credential.credential_json?.document360_api_token);
|
||||
|
||||
return (
|
||||
<>
|
||||
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 1: Provide Credentials
|
||||
</h2>
|
||||
{document360Credential ? (
|
||||
<>
|
||||
<div className="flex mb-1 text-sm">
|
||||
<p className="my-auto">Existing Document360 API Token: </p>
|
||||
<p className="ml-1 italic my-auto">
|
||||
{document360Credential.credential_json.document360_api_token}
|
||||
</p>
|
||||
<button
|
||||
className="ml-1 hover:bg-gray-700 rounded-full p-1"
|
||||
onClick={async () => {
|
||||
await adminDeleteCredential(document360Credential.id);
|
||||
refreshCredentials();
|
||||
}}
|
||||
>
|
||||
<TrashIcon />
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<p className="text-sm mb-4">
|
||||
To use the Document360 connector, you must first provide the API token
|
||||
and portal ID corresponding to your Document360 setup. For more details,
|
||||
see the <a className="text-blue-500" href="https://apidocs.document360.com/apidocs/api-token">official Document360 documentation</a>.
|
||||
</p>
|
||||
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
|
||||
<CredentialForm<Document360CredentialJson>
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField
|
||||
name="document360_api_token"
|
||||
label="Document360 API Token:"
|
||||
type="password"
|
||||
/>
|
||||
<TextFormField
|
||||
name="portal_id"
|
||||
label="Portal ID:"
|
||||
/>
|
||||
</>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
document360_api_token: Yup.string().required("Please enter your Document360 API token"),
|
||||
portal_id: Yup.string().required("Please enter your portal ID"),
|
||||
})}
|
||||
initialValues={{
|
||||
document360_api_token: "",
|
||||
portal_id: "",
|
||||
}}
|
||||
onSubmit={(isSuccess) => {
|
||||
if (isSuccess) {
|
||||
refreshCredentials();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 2: Which categories do you want to make searchable?
|
||||
</h2>
|
||||
|
||||
{document360ConnectorIndexingStatuses.length > 0 && (
|
||||
<>
|
||||
<p className="text-sm mb-2">
|
||||
We index the latest articles from each workspace listed below regularly.
|
||||
</p>
|
||||
<div className="mb-2">
|
||||
<ConnectorsTable<Document360Config, Document360CredentialJson>
|
||||
connectorIndexingStatuses={document360ConnectorIndexingStatuses}
|
||||
liveCredential={document360Credential}
|
||||
getCredential={(credential) =>
|
||||
credential.credential_json.document360_api_token
|
||||
}
|
||||
specialColumns={[
|
||||
{
|
||||
header: "Workspace",
|
||||
key: "workspace",
|
||||
getValue: (ccPairStatus) =>
|
||||
ccPairStatus.connector.connector_specific_config.workspace,
|
||||
},
|
||||
{
|
||||
header: "Categories",
|
||||
key: "categories",
|
||||
getValue: (ccPairStatus) =>
|
||||
ccPairStatus.connector.connector_specific_config.categories &&
|
||||
ccPairStatus.connector.connector_specific_config.categories.length > 0
|
||||
? ccPairStatus.connector.connector_specific_config.categories.join(", ")
|
||||
: "",
|
||||
},
|
||||
]}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
onCredentialLink={async (connectorId) => {
|
||||
if (document360Credential) {
|
||||
await linkCredential(connectorId, document360Credential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{document360Credential ? (
|
||||
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
|
||||
<h2 className="font-bold mb-3">Connect to a New Workspace</h2>
|
||||
<ConnectorForm<Document360Config>
|
||||
nameBuilder={(values) =>
|
||||
values.categories
|
||||
? `Document360Connector-${values.workspace}-${values.categories.join("_")}`
|
||||
: `Document360Connector-${values.workspace}`
|
||||
}
|
||||
source="document360"
|
||||
inputType="poll"
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField name="workspace" label="Workspace" />
|
||||
</>
|
||||
}
|
||||
formBodyBuilder={TextArrayFieldBuilder({
|
||||
name: "categories",
|
||||
label: "Categories:",
|
||||
subtext:
|
||||
"Specify 0 or more categories to index. For instance, specifying the category " +
|
||||
"'Help' will cause us to only index all content " +
|
||||
"within the 'Help' category. " +
|
||||
"If no categories are specified, all categories in your workspace will be indexed.",
|
||||
})}
|
||||
validationSchema={Yup.object().shape({
|
||||
workspace: Yup.string().required(
|
||||
"Please enter the workspace to index"
|
||||
),
|
||||
categories: Yup.array()
|
||||
.of(Yup.string().required("Category names must be strings"))
|
||||
.required(),
|
||||
})}
|
||||
initialValues={{
|
||||
workspace: "",
|
||||
categories: [],
|
||||
}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
credentialId={document360Credential.id}
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm">
|
||||
Please provide your Document360 API token and portal ID in Step 1 first! Once done with
|
||||
that, you can then specify which Document360 categories you want to make
|
||||
searchable.
|
||||
</p>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
|
||||
<Document360Icon size={32} />
|
||||
<h1 className="text-3xl font-bold pl-2">Document360</h1>
|
||||
</div>
|
||||
<MainSection />
|
||||
</div>
|
||||
);
|
||||
}
|
@ -6,4 +6,4 @@ export default async function AdminLayout({
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
return await Layout({ children });
|
||||
}
|
||||
}
|
@ -22,6 +22,7 @@ import {
|
||||
HubSpotIcon,
|
||||
BookmarkIcon,
|
||||
CPUIcon,
|
||||
Document360Icon,
|
||||
GoogleSitesIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
|
||||
@ -210,6 +211,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
|
||||
),
|
||||
link: "/admin/connectors/hubspot",
|
||||
},
|
||||
{
|
||||
name: (
|
||||
<div className="flex">
|
||||
<Document360Icon size={16} />
|
||||
<div className="ml-1">Document360</div>
|
||||
</div>
|
||||
),
|
||||
link: "/admin/connectors/document360",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
|
||||
import zulipIcon from "../../../public/Zulip.png";
|
||||
import linearIcon from "../../../public/Linear.png";
|
||||
import hubSpotIcon from "../../../public/HubSpot.png";
|
||||
import document360Icon from "../../../public/Document360.png";
|
||||
import googleSitesIcon from "../../../public/GoogleSites.png";
|
||||
|
||||
interface IconProps {
|
||||
@ -452,6 +453,20 @@ export const HubSpotIcon = ({
|
||||
);
|
||||
};
|
||||
|
||||
export const Document360Icon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => {
|
||||
return (
|
||||
<div
|
||||
style={{ width: `${size + 4}px`, height: `${size + 4}px` }}
|
||||
className={`w-[${size + 4}px] h-[${size + 4}px] -m-0.5 ` + className}
|
||||
>
|
||||
<Image src={document360Icon} alt="Logo" width="96" height="96" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const GoogleSitesIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
@ -29,6 +29,7 @@ const sources: Source[] = [
|
||||
{ displayName: "Zulip", internalName: "zulip" },
|
||||
{ displayName: "Linear", internalName: "linear" },
|
||||
{ displayName: "HubSpot", internalName: "hubspot" },
|
||||
{ displayName: "Document360", internalName: "document360" },
|
||||
{ displayName: "Google Sites", internalName: "google_sites" },
|
||||
];
|
||||
|
||||
|
@ -16,6 +16,7 @@ import {
|
||||
SlackIcon,
|
||||
ZulipIcon,
|
||||
HubSpotIcon,
|
||||
Document360Icon,
|
||||
GoogleSitesIcon,
|
||||
} from "./icons/icons";
|
||||
|
||||
@ -123,6 +124,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
|
||||
displayName: "HubSpot",
|
||||
adminPageLink: "/admin/connectors/hubspot",
|
||||
};
|
||||
case "document360":
|
||||
return {
|
||||
icon: Document360Icon,
|
||||
displayName: "Document360",
|
||||
adminPageLink: "/admin/connectors/document360",
|
||||
};
|
||||
case "google_sites":
|
||||
return {
|
||||
icon: GoogleSitesIcon,
|
||||
|
@ -23,8 +23,10 @@ export type ValidSources =
|
||||
| "zulip"
|
||||
| "linear"
|
||||
| "hubspot"
|
||||
| "file"
|
||||
| "document360"
|
||||
| "file";
|
||||
| "google_sites";
|
||||
|
||||
export type ValidInputTypes = "load_state" | "poll" | "event";
|
||||
export type ValidStatuses =
|
||||
| "success"
|
||||
@ -115,6 +117,11 @@ export interface NotionConfig {}
|
||||
|
||||
export interface HubSpotConfig {}
|
||||
|
||||
export interface Document360Config {
|
||||
workspace: string;
|
||||
categories?: string[];
|
||||
}
|
||||
|
||||
export interface GoogleSitesConfig {
|
||||
zip_path: string;
|
||||
base_url: string;
|
||||
@ -227,6 +234,11 @@ export interface HubSpotCredentialJson {
|
||||
hubspot_access_token: string;
|
||||
}
|
||||
|
||||
export interface Document360CredentialJson {
|
||||
portal_id: string;
|
||||
document360_api_token: string;
|
||||
}
|
||||
|
||||
// DELETION
|
||||
|
||||
export interface DeletionAttemptSnapshot {
|
||||
|
Loading…
x
Reference in New Issue
Block a user