mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-07 11:28:09 +02:00
Add Dropbox connector (#956)
* start dropbox connector * add wip ui * polish ui * Fix some ci * ignore types * addressed, fixed, and tested all comments * ran prettier * ran mypy fixes --------- Co-authored-by: Bill Yang <bill@Bills-MacBook-Pro.local> Co-authored-by: hagen-danswer <hagen@danswer.ai>
This commit is contained in:
parent
0b6e85c26b
commit
914dc27a8f
@ -93,6 +93,7 @@ class DocumentSource(str, Enum):
|
||||
GOOGLE_SITES = "google_sites"
|
||||
ZENDESK = "zendesk"
|
||||
LOOPIO = "loopio"
|
||||
DROPBOX = "dropbox"
|
||||
SHAREPOINT = "sharepoint"
|
||||
TEAMS = "teams"
|
||||
DISCOURSE = "discourse"
|
||||
|
0
backend/danswer/connectors/dropbox/__init__.py
Normal file
0
backend/danswer/connectors/dropbox/__init__.py
Normal file
151
backend/danswer/connectors/dropbox/connector.py
Normal file
151
backend/danswer/connectors/dropbox/connector.py
Normal file
@ -0,0 +1,151 @@
|
||||
from datetime import timezone
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
|
||||
from dropbox import Dropbox # type: ignore
|
||||
from dropbox.exceptions import ApiError # type:ignore
|
||||
from dropbox.files import FileMetadata # type:ignore
|
||||
from dropbox.files import FolderMetadata # type:ignore
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.file_processing.extract_file_text import extract_file_text
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class DropboxConnector(LoadConnector, PollConnector):
|
||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||
self.batch_size = batch_size
|
||||
self.dropbox_client: Dropbox | None = None
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
self.dropbox_client = Dropbox(credentials["dropbox_access_token"])
|
||||
return None
|
||||
|
||||
def _download_file(self, path: str) -> bytes:
|
||||
"""Download a single file from Dropbox."""
|
||||
if self.dropbox_client is None:
|
||||
raise ConnectorMissingCredentialError("Dropbox")
|
||||
_, resp = self.dropbox_client.files_download(path)
|
||||
return resp.content
|
||||
|
||||
def _get_shared_link(self, path: str) -> str:
|
||||
"""Create a shared link for a file in Dropbox."""
|
||||
if self.dropbox_client is None:
|
||||
raise ConnectorMissingCredentialError("Dropbox")
|
||||
|
||||
try:
|
||||
# Check if a shared link already exists
|
||||
shared_links = self.dropbox_client.sharing_list_shared_links(path=path)
|
||||
if shared_links.links:
|
||||
return shared_links.links[0].url
|
||||
|
||||
link_metadata = (
|
||||
self.dropbox_client.sharing_create_shared_link_with_settings(path)
|
||||
)
|
||||
return link_metadata.url
|
||||
except ApiError as err:
|
||||
logger.exception(f"Failed to create a shared link for {path}: {err}")
|
||||
return ""
|
||||
|
||||
def _yield_files_recursive(
|
||||
self,
|
||||
path: str,
|
||||
start: SecondsSinceUnixEpoch | None,
|
||||
end: SecondsSinceUnixEpoch | None,
|
||||
) -> GenerateDocumentsOutput:
|
||||
"""Yield files in batches from a specified Dropbox folder, including subfolders."""
|
||||
if self.dropbox_client is None:
|
||||
raise ConnectorMissingCredentialError("Dropbox")
|
||||
|
||||
result = self.dropbox_client.files_list_folder(
|
||||
path,
|
||||
limit=self.batch_size,
|
||||
recursive=False,
|
||||
include_non_downloadable_files=False,
|
||||
)
|
||||
|
||||
while True:
|
||||
batch: list[Document] = []
|
||||
for entry in result.entries:
|
||||
if isinstance(entry, FileMetadata):
|
||||
modified_time = entry.client_modified
|
||||
if modified_time.tzinfo is None:
|
||||
# If no timezone info, assume it is UTC
|
||||
modified_time = modified_time.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
# If not in UTC, translate it
|
||||
modified_time = modified_time.astimezone(timezone.utc)
|
||||
|
||||
time_as_seconds = int(modified_time.timestamp())
|
||||
if start and time_as_seconds < start:
|
||||
continue
|
||||
if end and time_as_seconds > end:
|
||||
continue
|
||||
|
||||
downloaded_file = self._download_file(entry.path_display)
|
||||
link = self._get_shared_link(entry.path_display)
|
||||
try:
|
||||
text = extract_file_text(entry.name, BytesIO(downloaded_file))
|
||||
batch.append(
|
||||
Document(
|
||||
id=f"doc:{entry.id}",
|
||||
sections=[Section(link=link, text=text)],
|
||||
source=DocumentSource.DROPBOX,
|
||||
semantic_identifier=entry.name,
|
||||
doc_updated_at=modified_time,
|
||||
metadata={"type": "article"},
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error decoding file {entry.path_display} as utf-8 error occurred: {e}"
|
||||
)
|
||||
|
||||
elif isinstance(entry, FolderMetadata):
|
||||
yield from self._yield_files_recursive(entry.path_lower, start, end)
|
||||
|
||||
if batch:
|
||||
yield batch
|
||||
|
||||
if not result.has_more:
|
||||
break
|
||||
|
||||
result = self.dropbox_client.files_list_folder_continue(result.cursor)
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self.poll_source(None, None)
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
||||
) -> GenerateDocumentsOutput:
|
||||
if self.dropbox_client is None:
|
||||
raise ConnectorMissingCredentialError("Dropbox")
|
||||
|
||||
for batch in self._yield_files_recursive("", start, end):
|
||||
yield batch
|
||||
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
connector = DropboxConnector()
|
||||
connector.load_credentials(
|
||||
{
|
||||
"dropbox_access_token": os.environ["DROPBOX_ACCESS_TOKEN"],
|
||||
}
|
||||
)
|
||||
document_batches = connector.load_from_state()
|
||||
print(next(document_batches))
|
@ -8,6 +8,7 @@ from danswer.connectors.confluence.connector import ConfluenceConnector
|
||||
from danswer.connectors.danswer_jira.connector import JiraConnector
|
||||
from danswer.connectors.discourse.connector import DiscourseConnector
|
||||
from danswer.connectors.document360.connector import Document360Connector
|
||||
from danswer.connectors.dropbox.connector import DropboxConnector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
from danswer.connectors.gitlab.connector import GitlabConnector
|
||||
@ -74,6 +75,7 @@ def identify_connector_class(
|
||||
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
|
||||
DocumentSource.ZENDESK: ZendeskConnector,
|
||||
DocumentSource.LOOPIO: LoopioConnector,
|
||||
DocumentSource.DROPBOX: DropboxConnector,
|
||||
DocumentSource.SHAREPOINT: SharepointConnector,
|
||||
DocumentSource.TEAMS: TeamsConnector,
|
||||
DocumentSource.DISCOURSE: DiscourseConnector,
|
||||
|
@ -69,3 +69,4 @@ uvicorn==0.21.1
|
||||
zulip==0.8.2
|
||||
hubspot-api-client==8.1.0
|
||||
zenpy==2.0.41
|
||||
dropbox==11.36.2
|
||||
|
BIN
web/public/Dropbox.png
Normal file
BIN
web/public/Dropbox.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
209
web/src/app/admin/connectors/dropbox/page.tsx
Normal file
209
web/src/app/admin/connectors/dropbox/page.tsx
Normal file
@ -0,0 +1,209 @@
|
||||
"use client";
|
||||
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { DropboxIcon } from "@/components/icons/icons";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { TrashIcon } from "@/components/icons/icons";
|
||||
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import { usePublicCredentials } from "@/lib/hooks";
|
||||
import {
|
||||
ConnectorIndexingStatus,
|
||||
Credential,
|
||||
DropboxConfig,
|
||||
DropboxCredentialJson,
|
||||
} from "@/lib/types";
|
||||
import { Card, Text, Title } from "@tremor/react";
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import * as Yup from "yup";
|
||||
|
||||
const Main = () => {
|
||||
const { popup, setPopup } = usePopup();
|
||||
|
||||
const { mutate } = useSWRConfig();
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
const {
|
||||
data: credentialsData,
|
||||
isLoading: isCredentialsLoading,
|
||||
error: isCredentialsError,
|
||||
refreshCredentials,
|
||||
} = usePublicCredentials();
|
||||
|
||||
if (
|
||||
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
|
||||
(!credentialsData && isCredentialsLoading)
|
||||
) {
|
||||
return <LoadingAnimation text="Loading" />;
|
||||
}
|
||||
|
||||
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||
return <div>Failed to load connectors</div>;
|
||||
}
|
||||
|
||||
if (isCredentialsError || !credentialsData) {
|
||||
return <div>Failed to load credentials</div>;
|
||||
}
|
||||
|
||||
const dropboxConnectorIndexingStatuses: ConnectorIndexingStatus<
|
||||
DropboxConfig,
|
||||
DropboxCredentialJson
|
||||
>[] = connectorIndexingStatuses.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "dropbox"
|
||||
);
|
||||
const dropboxCredential: Credential<DropboxCredentialJson> | undefined =
|
||||
credentialsData.find(
|
||||
(credential) => credential.credential_json?.dropbox_access_token
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
{popup}
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Provide your API details
|
||||
</Title>
|
||||
|
||||
{dropboxCredential ? (
|
||||
<>
|
||||
<div className="flex mb-1 text-sm">
|
||||
<p className="my-auto">Existing API Token: </p>
|
||||
<p className="ml-1 italic my-auto max-w-md">
|
||||
{dropboxCredential.credential_json?.dropbox_access_token}
|
||||
</p>
|
||||
<button
|
||||
className="ml-1 hover:bg-hover rounded p-1"
|
||||
onClick={async () => {
|
||||
if (dropboxConnectorIndexingStatuses.length > 0) {
|
||||
setPopup({
|
||||
type: "error",
|
||||
message:
|
||||
"Must delete all connectors before deleting credentials",
|
||||
});
|
||||
return;
|
||||
}
|
||||
await adminDeleteCredential(dropboxCredential.id);
|
||||
refreshCredentials();
|
||||
}}
|
||||
>
|
||||
<TrashIcon />
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Text>
|
||||
See the Dropbox connector{" "}
|
||||
<a
|
||||
className="text-blue-500"
|
||||
href="https://docs.danswer.dev/connectors/dropbox/overview"
|
||||
>
|
||||
setup guide
|
||||
</a>{" "}
|
||||
on the Danswer docs to obtain a Dropbox token.
|
||||
</Text>
|
||||
<Card className="mt-4 mb-4">
|
||||
<CredentialForm<DropboxCredentialJson>
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField
|
||||
name="dropbox_access_token"
|
||||
label="Dropbox API Token:"
|
||||
type="password"
|
||||
/>
|
||||
</>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
dropbox_access_token: Yup.string().required(
|
||||
"Please enter your Dropbox API token"
|
||||
),
|
||||
})}
|
||||
initialValues={{
|
||||
dropbox_access_token: "",
|
||||
}}
|
||||
onSubmit={(isSuccess) => {
|
||||
if (isSuccess) {
|
||||
refreshCredentials();
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
|
||||
{dropboxConnectorIndexingStatuses.length > 0 && (
|
||||
<>
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Dropbox indexing status
|
||||
</Title>
|
||||
<Text className="mb-2">
|
||||
The latest article changes are fetched every 10 minutes.
|
||||
</Text>
|
||||
<div className="mb-2">
|
||||
<ConnectorsTable<DropboxConfig, DropboxCredentialJson>
|
||||
connectorIndexingStatuses={dropboxConnectorIndexingStatuses}
|
||||
liveCredential={dropboxCredential}
|
||||
onCredentialLink={async (connectorId) => {
|
||||
if (dropboxCredential) {
|
||||
await linkCredential(connectorId, dropboxCredential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{dropboxCredential && dropboxConnectorIndexingStatuses.length === 0 && (
|
||||
<>
|
||||
<Card className="mt-4">
|
||||
<h2 className="font-bold mb-3">Create Connection</h2>
|
||||
<p className="text-sm mb-4">
|
||||
Press connect below to start the connection to your Dropbox
|
||||
instance.
|
||||
</p>
|
||||
<ConnectorForm<DropboxConfig>
|
||||
nameBuilder={(values) => `Dropbox`}
|
||||
ccPairNameBuilder={(values) => `Dropbox`}
|
||||
source="dropbox"
|
||||
inputType="poll"
|
||||
formBody={<></>}
|
||||
validationSchema={Yup.object().shape({})}
|
||||
initialValues={{}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
credentialId={dropboxCredential.id}
|
||||
/>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
<AdminPageTitle icon={<DropboxIcon size={32} />} title="Dropbox" />
|
||||
<Main />
|
||||
</div>
|
||||
);
|
||||
}
|
@ -51,6 +51,7 @@ import hubSpotIcon from "../../../public/HubSpot.png";
|
||||
import document360Icon from "../../../public/Document360.png";
|
||||
import googleSitesIcon from "../../../public/GoogleSites.png";
|
||||
import zendeskIcon from "../../../public/Zendesk.svg";
|
||||
import dropboxIcon from "../../../public/Dropbox.png";
|
||||
import sharepointIcon from "../../../public/Sharepoint.png";
|
||||
import teamsIcon from "../../../public/Teams.png";
|
||||
import mediawikiIcon from "../../../public/MediaWiki.svg";
|
||||
@ -617,6 +618,18 @@ export const ZendeskIcon = ({
|
||||
</div>
|
||||
);
|
||||
|
||||
export const DropboxIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => (
|
||||
<div
|
||||
style={{ width: `${size}px`, height: `${size}px` }}
|
||||
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||
>
|
||||
<Image src={dropboxIcon} alt="Logo" width="96" height="96" />
|
||||
</div>
|
||||
);
|
||||
|
||||
export const DiscourseIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
@ -4,6 +4,7 @@ import {
|
||||
ConfluenceIcon,
|
||||
DiscourseIcon,
|
||||
Document360Icon,
|
||||
DropboxIcon,
|
||||
FileIcon,
|
||||
GithubIcon,
|
||||
GitlabIcon,
|
||||
@ -154,6 +155,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
|
||||
displayName: "Loopio",
|
||||
category: SourceCategory.AppConnection,
|
||||
},
|
||||
dropbox: {
|
||||
icon: DropboxIcon,
|
||||
displayName: "Dropbox",
|
||||
category: SourceCategory.AppConnection,
|
||||
},
|
||||
sharepoint: {
|
||||
icon: SharepointIcon,
|
||||
displayName: "Sharepoint",
|
||||
|
@ -42,6 +42,7 @@ export type ValidSources =
|
||||
| "file"
|
||||
| "google_sites"
|
||||
| "loopio"
|
||||
| "dropbox"
|
||||
| "sharepoint"
|
||||
| "teams"
|
||||
| "zendesk"
|
||||
@ -191,6 +192,8 @@ export interface GoogleSitesConfig {
|
||||
|
||||
export interface ZendeskConfig {}
|
||||
|
||||
export interface DropboxConfig {}
|
||||
|
||||
export interface MediaWikiBaseConfig {
|
||||
connector_name: string;
|
||||
language_code: string;
|
||||
@ -198,6 +201,7 @@ export interface MediaWikiBaseConfig {
|
||||
pages?: string[];
|
||||
recurse_depth?: number;
|
||||
}
|
||||
|
||||
export interface MediaWikiConfig extends MediaWikiBaseConfig {
|
||||
hostname: string;
|
||||
}
|
||||
@ -362,6 +366,10 @@ export interface ZendeskCredentialJson {
|
||||
zendesk_token: string;
|
||||
}
|
||||
|
||||
export interface DropboxCredentialJson {
|
||||
dropbox_access_token: string;
|
||||
}
|
||||
|
||||
export interface SharepointCredentialJson {
|
||||
sp_client_id: string;
|
||||
sp_client_secret: string;
|
||||
|
Loading…
x
Reference in New Issue
Block a user