Add Dropbox connector (#956)

* start dropbox connector

* add wip ui

* polish ui

* Fix some ci

* ignore types

* addressed, fixed, and tested all comments

* ran prettier

* ran mypy fixes

---------

Co-authored-by: Bill Yang <bill@Bills-MacBook-Pro.local>
Co-authored-by: hagen-danswer <hagen@danswer.ai>
This commit is contained in:
Bill Yang 2024-06-04 17:58:01 -07:00 committed by Hagen O'Neill
parent 0b6e85c26b
commit 914dc27a8f
10 changed files with 391 additions and 0 deletions

View File

@ -93,6 +93,7 @@ class DocumentSource(str, Enum):
GOOGLE_SITES = "google_sites"
ZENDESK = "zendesk"
LOOPIO = "loopio"
DROPBOX = "dropbox"
SHAREPOINT = "sharepoint"
TEAMS = "teams"
DISCOURSE = "discourse"

View File

@ -0,0 +1,151 @@
from datetime import timezone
from io import BytesIO
from typing import Any
from dropbox import Dropbox # type: ignore
from dropbox.exceptions import ApiError # type:ignore
from dropbox.files import FileMetadata # type:ignore
from dropbox.files import FolderMetadata # type:ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.file_processing.extract_file_text import extract_file_text
from danswer.utils.logger import setup_logger
logger = setup_logger()
class DropboxConnector(LoadConnector, PollConnector):
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.batch_size = batch_size
self.dropbox_client: Dropbox | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.dropbox_client = Dropbox(credentials["dropbox_access_token"])
return None
def _download_file(self, path: str) -> bytes:
"""Download a single file from Dropbox."""
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
_, resp = self.dropbox_client.files_download(path)
return resp.content
def _get_shared_link(self, path: str) -> str:
"""Create a shared link for a file in Dropbox."""
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
try:
# Check if a shared link already exists
shared_links = self.dropbox_client.sharing_list_shared_links(path=path)
if shared_links.links:
return shared_links.links[0].url
link_metadata = (
self.dropbox_client.sharing_create_shared_link_with_settings(path)
)
return link_metadata.url
except ApiError as err:
logger.exception(f"Failed to create a shared link for {path}: {err}")
return ""
def _yield_files_recursive(
self,
path: str,
start: SecondsSinceUnixEpoch | None,
end: SecondsSinceUnixEpoch | None,
) -> GenerateDocumentsOutput:
"""Yield files in batches from a specified Dropbox folder, including subfolders."""
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
result = self.dropbox_client.files_list_folder(
path,
limit=self.batch_size,
recursive=False,
include_non_downloadable_files=False,
)
while True:
batch: list[Document] = []
for entry in result.entries:
if isinstance(entry, FileMetadata):
modified_time = entry.client_modified
if modified_time.tzinfo is None:
# If no timezone info, assume it is UTC
modified_time = modified_time.replace(tzinfo=timezone.utc)
else:
# If not in UTC, translate it
modified_time = modified_time.astimezone(timezone.utc)
time_as_seconds = int(modified_time.timestamp())
if start and time_as_seconds < start:
continue
if end and time_as_seconds > end:
continue
downloaded_file = self._download_file(entry.path_display)
link = self._get_shared_link(entry.path_display)
try:
text = extract_file_text(entry.name, BytesIO(downloaded_file))
batch.append(
Document(
id=f"doc:{entry.id}",
sections=[Section(link=link, text=text)],
source=DocumentSource.DROPBOX,
semantic_identifier=entry.name,
doc_updated_at=modified_time,
metadata={"type": "article"},
)
)
except Exception as e:
logger.exception(
f"Error decoding file {entry.path_display} as utf-8 error occurred: {e}"
)
elif isinstance(entry, FolderMetadata):
yield from self._yield_files_recursive(entry.path_lower, start, end)
if batch:
yield batch
if not result.has_more:
break
result = self.dropbox_client.files_list_folder_continue(result.cursor)
def load_from_state(self) -> GenerateDocumentsOutput:
return self.poll_source(None, None)
def poll_source(
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
) -> GenerateDocumentsOutput:
if self.dropbox_client is None:
raise ConnectorMissingCredentialError("Dropbox")
for batch in self._yield_files_recursive("", start, end):
yield batch
return None
if __name__ == "__main__":
import os
connector = DropboxConnector()
connector.load_credentials(
{
"dropbox_access_token": os.environ["DROPBOX_ACCESS_TOKEN"],
}
)
document_batches = connector.load_from_state()
print(next(document_batches))

View File

@ -8,6 +8,7 @@ from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.discourse.connector import DiscourseConnector
from danswer.connectors.document360.connector import Document360Connector
from danswer.connectors.dropbox.connector import DropboxConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.gitlab.connector import GitlabConnector
@ -74,6 +75,7 @@ def identify_connector_class(
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
DocumentSource.ZENDESK: ZendeskConnector,
DocumentSource.LOOPIO: LoopioConnector,
DocumentSource.DROPBOX: DropboxConnector,
DocumentSource.SHAREPOINT: SharepointConnector,
DocumentSource.TEAMS: TeamsConnector,
DocumentSource.DISCOURSE: DiscourseConnector,

View File

@ -69,3 +69,4 @@ uvicorn==0.21.1
zulip==0.8.2
hubspot-api-client==8.1.0
zenpy==2.0.41
dropbox==11.36.2

BIN
web/public/Dropbox.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

View File

@ -0,0 +1,209 @@
"use client";
import { AdminPageTitle } from "@/components/admin/Title";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { DropboxIcon } from "@/components/icons/icons";
import { LoadingAnimation } from "@/components/Loading";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { usePopup } from "@/components/admin/connectors/Popup";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { TrashIcon } from "@/components/icons/icons";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { fetcher } from "@/lib/fetcher";
import { usePublicCredentials } from "@/lib/hooks";
import {
ConnectorIndexingStatus,
Credential,
DropboxConfig,
DropboxCredentialJson,
} from "@/lib/types";
import { Card, Text, Title } from "@tremor/react";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const dropboxConnectorIndexingStatuses: ConnectorIndexingStatus<
DropboxConfig,
DropboxCredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "dropbox"
);
const dropboxCredential: Credential<DropboxCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.dropbox_access_token
);
return (
<>
{popup}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Provide your API details
</Title>
{dropboxCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing API Token: </p>
<p className="ml-1 italic my-auto max-w-md">
{dropboxCredential.credential_json?.dropbox_access_token}
</p>
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
if (dropboxConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await adminDeleteCredential(dropboxCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
See the Dropbox connector{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/dropbox/overview"
>
setup guide
</a>{" "}
on the Danswer docs to obtain a Dropbox token.
</Text>
<Card className="mt-4 mb-4">
<CredentialForm<DropboxCredentialJson>
formBody={
<>
<TextFormField
name="dropbox_access_token"
label="Dropbox API Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
dropbox_access_token: Yup.string().required(
"Please enter your Dropbox API token"
),
})}
initialValues={{
dropbox_access_token: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</Card>
</>
)}
{dropboxConnectorIndexingStatuses.length > 0 && (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
Dropbox indexing status
</Title>
<Text className="mb-2">
The latest article changes are fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<DropboxConfig, DropboxCredentialJson>
connectorIndexingStatuses={dropboxConnectorIndexingStatuses}
liveCredential={dropboxCredential}
onCredentialLink={async (connectorId) => {
if (dropboxCredential) {
await linkCredential(connectorId, dropboxCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{dropboxCredential && dropboxConnectorIndexingStatuses.length === 0 && (
<>
<Card className="mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<p className="text-sm mb-4">
Press connect below to start the connection to your Dropbox
instance.
</p>
<ConnectorForm<DropboxConfig>
nameBuilder={(values) => `Dropbox`}
ccPairNameBuilder={(values) => `Dropbox`}
source="dropbox"
inputType="poll"
formBody={<></>}
validationSchema={Yup.object().shape({})}
initialValues={{}}
refreshFreq={10 * 60} // 10 minutes
credentialId={dropboxCredential.id}
/>
</Card>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle icon={<DropboxIcon size={32} />} title="Dropbox" />
<Main />
</div>
);
}

View File

@ -51,6 +51,7 @@ import hubSpotIcon from "../../../public/HubSpot.png";
import document360Icon from "../../../public/Document360.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
import zendeskIcon from "../../../public/Zendesk.svg";
import dropboxIcon from "../../../public/Dropbox.png";
import sharepointIcon from "../../../public/Sharepoint.png";
import teamsIcon from "../../../public/Teams.png";
import mediawikiIcon from "../../../public/MediaWiki.svg";
@ -617,6 +618,18 @@ export const ZendeskIcon = ({
</div>
);
export const DropboxIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={dropboxIcon} alt="Logo" width="96" height="96" />
</div>
);
export const DiscourseIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -4,6 +4,7 @@ import {
ConfluenceIcon,
DiscourseIcon,
Document360Icon,
DropboxIcon,
FileIcon,
GithubIcon,
GitlabIcon,
@ -154,6 +155,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Loopio",
category: SourceCategory.AppConnection,
},
dropbox: {
icon: DropboxIcon,
displayName: "Dropbox",
category: SourceCategory.AppConnection,
},
sharepoint: {
icon: SharepointIcon,
displayName: "Sharepoint",

View File

@ -42,6 +42,7 @@ export type ValidSources =
| "file"
| "google_sites"
| "loopio"
| "dropbox"
| "sharepoint"
| "teams"
| "zendesk"
@ -191,6 +192,8 @@ export interface GoogleSitesConfig {
export interface ZendeskConfig {}
export interface DropboxConfig {}
export interface MediaWikiBaseConfig {
connector_name: string;
language_code: string;
@ -198,6 +201,7 @@ export interface MediaWikiBaseConfig {
pages?: string[];
recurse_depth?: number;
}
export interface MediaWikiConfig extends MediaWikiBaseConfig {
hostname: string;
}
@ -362,6 +366,10 @@ export interface ZendeskCredentialJson {
zendesk_token: string;
}
export interface DropboxCredentialJson {
dropbox_access_token: string;
}
export interface SharepointCredentialJson {
sp_client_id: string;
sp_client_secret: string;