diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 641738a4c..58a782541 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -93,6 +93,7 @@ class DocumentSource(str, Enum): GOOGLE_SITES = "google_sites" ZENDESK = "zendesk" LOOPIO = "loopio" + DROPBOX = "dropbox" SHAREPOINT = "sharepoint" TEAMS = "teams" DISCOURSE = "discourse" diff --git a/backend/danswer/connectors/dropbox/__init__.py b/backend/danswer/connectors/dropbox/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/dropbox/connector.py b/backend/danswer/connectors/dropbox/connector.py new file mode 100644 index 000000000..2fd39948a --- /dev/null +++ b/backend/danswer/connectors/dropbox/connector.py @@ -0,0 +1,151 @@ +from datetime import timezone +from io import BytesIO +from typing import Any + +from dropbox import Dropbox # type: ignore +from dropbox.exceptions import ApiError # type:ignore +from dropbox.files import FileMetadata # type:ignore +from dropbox.files import FolderMetadata # type:ignore + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import extract_file_text +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +class DropboxConnector(LoadConnector, PollConnector): + def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: + self.batch_size = batch_size + self.dropbox_client: Dropbox | None = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self.dropbox_client = Dropbox(credentials["dropbox_access_token"]) + return None + + def _download_file(self, path: str) -> bytes: + """Download a single file from Dropbox.""" + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") + _, resp = self.dropbox_client.files_download(path) + return resp.content + + def _get_shared_link(self, path: str) -> str: + """Create a shared link for a file in Dropbox.""" + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") + + try: + # Check if a shared link already exists + shared_links = self.dropbox_client.sharing_list_shared_links(path=path) + if shared_links.links: + return shared_links.links[0].url + + link_metadata = ( + self.dropbox_client.sharing_create_shared_link_with_settings(path) + ) + return link_metadata.url + except ApiError as err: + logger.exception(f"Failed to create a shared link for {path}: {err}") + return "" + + def _yield_files_recursive( + self, + path: str, + start: SecondsSinceUnixEpoch | None, + end: SecondsSinceUnixEpoch | None, + ) -> GenerateDocumentsOutput: + """Yield files in batches from a specified Dropbox folder, including subfolders.""" + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") + + result = self.dropbox_client.files_list_folder( + path, + limit=self.batch_size, + recursive=False, + include_non_downloadable_files=False, + ) + + while True: + batch: list[Document] = [] + for entry in result.entries: + if isinstance(entry, FileMetadata): + modified_time = entry.client_modified + if modified_time.tzinfo is None: + # If no timezone info, assume it is UTC + modified_time = modified_time.replace(tzinfo=timezone.utc) + else: + # If not in UTC, translate it + modified_time = modified_time.astimezone(timezone.utc) + + time_as_seconds = int(modified_time.timestamp()) + if start and time_as_seconds < start: + continue + if end and time_as_seconds > end: + continue + + downloaded_file = self._download_file(entry.path_display) + link = self._get_shared_link(entry.path_display) + try: + text = extract_file_text(entry.name, BytesIO(downloaded_file)) + batch.append( + Document( + id=f"doc:{entry.id}", + sections=[Section(link=link, text=text)], + source=DocumentSource.DROPBOX, + semantic_identifier=entry.name, + doc_updated_at=modified_time, + metadata={"type": "article"}, + ) + ) + except Exception as e: + logger.exception( + f"Error decoding file {entry.path_display} as utf-8 error occurred: {e}" + ) + + elif isinstance(entry, FolderMetadata): + yield from self._yield_files_recursive(entry.path_lower, start, end) + + if batch: + yield batch + + if not result.has_more: + break + + result = self.dropbox_client.files_list_folder_continue(result.cursor) + + def load_from_state(self) -> GenerateDocumentsOutput: + return self.poll_source(None, None) + + def poll_source( + self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + if self.dropbox_client is None: + raise ConnectorMissingCredentialError("Dropbox") + + for batch in self._yield_files_recursive("", start, end): + yield batch + + return None + + +if __name__ == "__main__": + import os + + connector = DropboxConnector() + connector.load_credentials( + { + "dropbox_access_token": os.environ["DROPBOX_ACCESS_TOKEN"], + } + ) + document_batches = connector.load_from_state() + print(next(document_batches)) diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index cb0d41b8a..37ecd8f59 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -8,6 +8,7 @@ from danswer.connectors.confluence.connector import ConfluenceConnector from danswer.connectors.danswer_jira.connector import JiraConnector from danswer.connectors.discourse.connector import DiscourseConnector from danswer.connectors.document360.connector import Document360Connector +from danswer.connectors.dropbox.connector import DropboxConnector from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.gitlab.connector import GitlabConnector @@ -74,6 +75,7 @@ def identify_connector_class( DocumentSource.GOOGLE_SITES: GoogleSitesConnector, DocumentSource.ZENDESK: ZendeskConnector, DocumentSource.LOOPIO: LoopioConnector, + DocumentSource.DROPBOX: DropboxConnector, DocumentSource.SHAREPOINT: SharepointConnector, DocumentSource.TEAMS: TeamsConnector, DocumentSource.DISCOURSE: DiscourseConnector, diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 6052624ad..e25f02b7a 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -69,3 +69,4 @@ uvicorn==0.21.1 zulip==0.8.2 hubspot-api-client==8.1.0 zenpy==2.0.41 +dropbox==11.36.2 diff --git a/web/public/Dropbox.png b/web/public/Dropbox.png new file mode 100644 index 000000000..cd83e09eb Binary files /dev/null and b/web/public/Dropbox.png differ diff --git a/web/src/app/admin/connectors/dropbox/page.tsx b/web/src/app/admin/connectors/dropbox/page.tsx new file mode 100644 index 000000000..a42c8d141 --- /dev/null +++ b/web/src/app/admin/connectors/dropbox/page.tsx @@ -0,0 +1,209 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { DropboxIcon } from "@/components/icons/icons"; +import { LoadingAnimation } from "@/components/Loading"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { TrashIcon } from "@/components/icons/icons"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { fetcher } from "@/lib/fetcher"; +import { usePublicCredentials } from "@/lib/hooks"; +import { + ConnectorIndexingStatus, + Credential, + DropboxConfig, + DropboxCredentialJson, +} from "@/lib/types"; +import { Card, Text, Title } from "@tremor/react"; +import useSWR, { useSWRConfig } from "swr"; +import * as Yup from "yup"; + +const Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const dropboxConnectorIndexingStatuses: ConnectorIndexingStatus< + DropboxConfig, + DropboxCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "dropbox" + ); + const dropboxCredential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.dropbox_access_token + ); + + return ( + <> + {popup} + + Provide your API details + + + {dropboxCredential ? ( + <> +
+

Existing API Token:

+

+ {dropboxCredential.credential_json?.dropbox_access_token} +

+ +
+ + ) : ( + <> + + See the Dropbox connector{" "} + + setup guide + {" "} + on the Danswer docs to obtain a Dropbox token. + + + + formBody={ + <> + + + } + validationSchema={Yup.object().shape({ + dropbox_access_token: Yup.string().required( + "Please enter your Dropbox API token" + ), + })} + initialValues={{ + dropbox_access_token: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> + + + )} + + {dropboxConnectorIndexingStatuses.length > 0 && ( + <> + + Dropbox indexing status + + + The latest article changes are fetched every 10 minutes. + +
+ + connectorIndexingStatuses={dropboxConnectorIndexingStatuses} + liveCredential={dropboxCredential} + onCredentialLink={async (connectorId) => { + if (dropboxCredential) { + await linkCredential(connectorId, dropboxCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {dropboxCredential && dropboxConnectorIndexingStatuses.length === 0 && ( + <> + +

Create Connection

+

+ Press connect below to start the connection to your Dropbox + instance. +

+ + nameBuilder={(values) => `Dropbox`} + ccPairNameBuilder={(values) => `Dropbox`} + source="dropbox" + inputType="poll" + formBody={<>} + validationSchema={Yup.object().shape({})} + initialValues={{}} + refreshFreq={10 * 60} // 10 minutes + credentialId={dropboxCredential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ } title="Dropbox" /> +
+
+ ); +} diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 8cbec72c4..04d003a59 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -51,6 +51,7 @@ import hubSpotIcon from "../../../public/HubSpot.png"; import document360Icon from "../../../public/Document360.png"; import googleSitesIcon from "../../../public/GoogleSites.png"; import zendeskIcon from "../../../public/Zendesk.svg"; +import dropboxIcon from "../../../public/Dropbox.png"; import sharepointIcon from "../../../public/Sharepoint.png"; import teamsIcon from "../../../public/Teams.png"; import mediawikiIcon from "../../../public/MediaWiki.svg"; @@ -617,6 +618,18 @@ export const ZendeskIcon = ({ ); +export const DropboxIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => ( +
+ Logo +
+); + export const DiscourseIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index 303108394..597f45f4d 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -4,6 +4,7 @@ import { ConfluenceIcon, DiscourseIcon, Document360Icon, + DropboxIcon, FileIcon, GithubIcon, GitlabIcon, @@ -154,6 +155,11 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Loopio", category: SourceCategory.AppConnection, }, + dropbox: { + icon: DropboxIcon, + displayName: "Dropbox", + category: SourceCategory.AppConnection, + }, sharepoint: { icon: SharepointIcon, displayName: "Sharepoint", diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 55cfe700c..f20173d1d 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -42,6 +42,7 @@ export type ValidSources = | "file" | "google_sites" | "loopio" + | "dropbox" | "sharepoint" | "teams" | "zendesk" @@ -191,6 +192,8 @@ export interface GoogleSitesConfig { export interface ZendeskConfig {} +export interface DropboxConfig {} + export interface MediaWikiBaseConfig { connector_name: string; language_code: string; @@ -198,6 +201,7 @@ export interface MediaWikiBaseConfig { pages?: string[]; recurse_depth?: number; } + export interface MediaWikiConfig extends MediaWikiBaseConfig { hostname: string; } @@ -362,6 +366,10 @@ export interface ZendeskCredentialJson { zendesk_token: string; } +export interface DropboxCredentialJson { + dropbox_access_token: string; +} + export interface SharepointCredentialJson { sp_client_id: string; sp_client_secret: string;