diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 84579ca33..b29d3558b 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -100,6 +100,17 @@ class DocumentSource(str, Enum): CLICKUP = "clickup" MEDIAWIKI = "mediawiki" WIKIPEDIA = "wikipedia" + S3 = "s3" + R2 = "r2" + GOOGLE_CLOUD_STORAGE = "google_cloud_storage" + OCI_STORAGE = "oci_storage" + + +class BlobType(str, Enum): + R2 = "r2" + S3 = "s3" + GOOGLE_CLOUD_STORAGE = "google_cloud_storage" + OCI_STORAGE = "oci_storage" class DocumentIndexType(str, Enum): diff --git a/backend/danswer/connectors/blob/__init__.py b/backend/danswer/connectors/blob/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/blob/connector.py b/backend/danswer/connectors/blob/connector.py new file mode 100644 index 000000000..2446bfd16 --- /dev/null +++ b/backend/danswer/connectors/blob/connector.py @@ -0,0 +1,277 @@ +import os +from datetime import datetime +from datetime import timezone +from io import BytesIO +from typing import Any +from typing import Optional + +import boto3 +from botocore.client import Config +from mypy_boto3_s3 import S3Client + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import BlobType +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import extract_file_text +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class BlobStorageConnector(LoadConnector, PollConnector): + def __init__( + self, + bucket_type: str, + bucket_name: str, + prefix: str = "", + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.bucket_type: BlobType = BlobType(bucket_type) + self.bucket_name = bucket_name + self.prefix = prefix if not prefix or prefix.endswith("/") else prefix + "/" + self.batch_size = batch_size + self.s3_client: Optional[S3Client] = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + """Checks for boto3 credentials based on the bucket type. + (1) R2: Access Key ID, Secret Access Key, Account ID + (2) S3: AWS Access Key ID, AWS Secret Access Key + (3) GOOGLE_CLOUD_STORAGE: Access Key ID, Secret Access Key, Project ID + (4) OCI_STORAGE: Namespace, Region, Access Key ID, Secret Access Key + + For each bucket type, the method initializes the appropriate S3 client: + - R2: Uses Cloudflare R2 endpoint with S3v4 signature + - S3: Creates a standard boto3 S3 client + - GOOGLE_CLOUD_STORAGE: Uses Google Cloud Storage endpoint + - OCI_STORAGE: Uses Oracle Cloud Infrastructure Object Storage endpoint + + Raises ConnectorMissingCredentialError if required credentials are missing. + Raises ValueError for unsupported bucket types. + """ + + logger.info( + f"Loading credentials for {self.bucket_name} or type {self.bucket_type}" + ) + + if self.bucket_type == BlobType.R2: + if not all( + credentials.get(key) + for key in ["r2_access_key_id", "r2_secret_access_key", "account_id"] + ): + raise ConnectorMissingCredentialError("Cloudflare R2") + self.s3_client = boto3.client( + "s3", + endpoint_url=f"https://{credentials['account_id']}.r2.cloudflarestorage.com", + aws_access_key_id=credentials["r2_access_key_id"], + aws_secret_access_key=credentials["r2_secret_access_key"], + region_name="auto", + config=Config(signature_version="s3v4"), + ) + + elif self.bucket_type == BlobType.S3: + if not all( + credentials.get(key) + for key in ["aws_access_key_id", "aws_secret_access_key"] + ): + raise ConnectorMissingCredentialError("Google Cloud Storage") + + session = boto3.Session( + aws_access_key_id=credentials["aws_access_key_id"], + aws_secret_access_key=credentials["aws_secret_access_key"], + ) + self.s3_client = session.client("s3") + + elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE: + if not all( + credentials.get(key) for key in ["access_key_id", "secret_access_key"] + ): + raise ConnectorMissingCredentialError("Google Cloud Storage") + + self.s3_client = boto3.client( + "s3", + endpoint_url="https://storage.googleapis.com", + aws_access_key_id=credentials["access_key_id"], + aws_secret_access_key=credentials["secret_access_key"], + region_name="auto", + ) + + elif self.bucket_type == BlobType.OCI_STORAGE: + if not all( + credentials.get(key) + for key in ["namespace", "region", "access_key_id", "secret_access_key"] + ): + raise ConnectorMissingCredentialError("Oracle Cloud Infrastructure") + + self.s3_client = boto3.client( + "s3", + endpoint_url=f"https://{credentials['namespace']}.compat.objectstorage.{credentials['region']}.oraclecloud.com", + aws_access_key_id=credentials["access_key_id"], + aws_secret_access_key=credentials["secret_access_key"], + region_name=credentials["region"], + ) + + else: + raise ValueError(f"Unsupported bucket type: {self.bucket_type}") + + return None + + def _download_object(self, key: str) -> bytes: + if self.s3_client is None: + raise ConnectorMissingCredentialError("Blob storage") + object = self.s3_client.get_object(Bucket=self.bucket_name, Key=key) + return object["Body"].read() + + # NOTE: Left in as may be useful for one-off access to documents and sharing across orgs. + # def _get_presigned_url(self, key: str) -> str: + # if self.s3_client is None: + # raise ConnectorMissingCredentialError("Blog storage") + + # url = self.s3_client.generate_presigned_url( + # "get_object", + # Params={"Bucket": self.bucket_name, "Key": key}, + # ExpiresIn=self.presign_length, + # ) + # return url + + def _get_blob_link(self, key: str) -> str: + if self.s3_client is None: + raise ConnectorMissingCredentialError("Blob storage") + + if self.bucket_type == BlobType.R2: + account_id = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0] + return f"https://{account_id}.r2.cloudflarestorage.com/{self.bucket_name}/{key}" + + elif self.bucket_type == BlobType.S3: + region = self.s3_client.meta.region_name + return f"https://{self.bucket_name}.s3.{region}.amazonaws.com/{key}" + + elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE: + return f"https://storage.cloud.google.com/{self.bucket_name}/{key}" + + elif self.bucket_type == BlobType.OCI_STORAGE: + namespace = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0] + region = self.s3_client.meta.region_name + return f"https://objectstorage.{region}.oraclecloud.com/n/{namespace}/b/{self.bucket_name}/o/{key}" + + else: + raise ValueError(f"Unsupported bucket type: {self.bucket_type}") + + def _yield_blob_objects( + self, + start: datetime, + end: datetime, + ) -> GenerateDocumentsOutput: + if self.s3_client is None: + raise ConnectorMissingCredentialError("Blog storage") + + paginator = self.s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix) + + batch: list[Document] = [] + for page in pages: + if "Contents" not in page: + continue + + for obj in page["Contents"]: + if obj["Key"].endswith("/"): + continue + + last_modified = obj["LastModified"].replace(tzinfo=timezone.utc) + + if not start <= last_modified <= end: + continue + + downloaded_file = self._download_object(obj["Key"]) + link = self._get_blob_link(obj["Key"]) + name = os.path.basename(obj["Key"]) + + try: + text = extract_file_text( + name, + BytesIO(downloaded_file), + break_on_unprocessable=False, + ) + batch.append( + Document( + id=f"{self.bucket_type}:{self.bucket_name}:{obj['Key']}", + sections=[Section(link=link, text=text)], + source=DocumentSource(self.bucket_type.value), + semantic_identifier=name, + doc_updated_at=last_modified, + metadata={}, + ) + ) + if len(batch) == self.batch_size: + yield batch + batch = [] + + except Exception as e: + logger.exception( + f"Error decoding object {obj['Key']} as UTF-8: {e}" + ) + if batch: + yield batch + + def load_from_state(self) -> GenerateDocumentsOutput: + logger.info("Loading blob objects") + return self._yield_blob_objects( + start=datetime(1970, 1, 1, tzinfo=timezone.utc), + end=datetime.now(timezone.utc), + ) + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + if self.s3_client is None: + raise ConnectorMissingCredentialError("Blog storage") + + start_datetime = datetime.fromtimestamp(start, tz=timezone.utc) + end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) + + for batch in self._yield_blob_objects(start_datetime, end_datetime): + yield batch + + return None + + +if __name__ == "__main__": + credentials_dict = { + "aws_access_key_id": os.environ.get("AWS_ACCESS_KEY_ID"), + "aws_secret_access_key": os.environ.get("AWS_SECRET_ACCESS_KEY"), + } + + # Initialize the connector + connector = BlobStorageConnector( + bucket_type=os.environ.get("BUCKET_TYPE") or "s3", + bucket_name=os.environ.get("BUCKET_NAME") or "test", + prefix="", + ) + + try: + connector.load_credentials(credentials_dict) + document_batch_generator = connector.load_from_state() + for document_batch in document_batch_generator: + print("First batch of documents:") + for doc in document_batch: + print(f"Document ID: {doc.id}") + print(f"Semantic Identifier: {doc.semantic_identifier}") + print(f"Source: {doc.source}") + print(f"Updated At: {doc.doc_updated_at}") + print("Sections:") + for section in doc.sections: + print(f" - Link: {section.link}") + print(f" - Text: {section.text[:100]}...") + print("---") + break + + except ConnectorMissingCredentialError as e: + print(f"Error: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index f70980a8d..1a3d605d3 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -5,6 +5,7 @@ from sqlalchemy.orm import Session from danswer.configs.constants import DocumentSource from danswer.connectors.axero.connector import AxeroConnector +from danswer.connectors.blob.connector import BlobStorageConnector from danswer.connectors.bookstack.connector import BookstackConnector from danswer.connectors.clickup.connector import ClickupConnector from danswer.connectors.confluence.connector import ConfluenceConnector @@ -90,6 +91,10 @@ def identify_connector_class( DocumentSource.CLICKUP: ClickupConnector, DocumentSource.MEDIAWIKI: MediaWikiConnector, DocumentSource.WIKIPEDIA: WikipediaConnector, + DocumentSource.S3: BlobStorageConnector, + DocumentSource.R2: BlobStorageConnector, + DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector, + DocumentSource.OCI_STORAGE: BlobStorageConnector, } connector_by_source = connector_map.get(source, {}) @@ -115,7 +120,6 @@ def identify_connector_class( raise ConnectorMissingException( f"Connector for source={source} does not accept input_type={input_type}" ) - return connector diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 2d8745f33..2252551b9 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -71,4 +71,4 @@ uvicorn==0.21.1 zulip==0.8.2 hubspot-api-client==8.1.0 zenpy==2.0.41 -dropbox==11.36.2 +dropbox==11.36.2 \ No newline at end of file diff --git a/backend/requirements/dev.txt b/backend/requirements/dev.txt index 4a9bd21d3..3a062a859 100644 --- a/backend/requirements/dev.txt +++ b/backend/requirements/dev.txt @@ -19,3 +19,4 @@ types-regex==2023.3.23.1 types-requests==2.28.11.17 types-retry==0.9.9.3 types-urllib3==1.26.25.11 +boto3-stubs[s3]==1.34.133 \ No newline at end of file diff --git a/web/public/GoogleCloudStorage.png b/web/public/GoogleCloudStorage.png new file mode 100644 index 000000000..4790f2c52 Binary files /dev/null and b/web/public/GoogleCloudStorage.png differ diff --git a/web/public/OCI.svg b/web/public/OCI.svg new file mode 100644 index 000000000..75d359b00 --- /dev/null +++ b/web/public/OCI.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/public/S3.png b/web/public/S3.png new file mode 100644 index 000000000..75eb1f19b Binary files /dev/null and b/web/public/S3.png differ diff --git a/web/public/r2.webp b/web/public/r2.webp new file mode 100644 index 000000000..d79134ce7 Binary files /dev/null and b/web/public/r2.webp differ diff --git a/web/src/app/admin/connectors/google-storage/page.tsx b/web/src/app/admin/connectors/google-storage/page.tsx new file mode 100644 index 000000000..a836df21f --- /dev/null +++ b/web/src/app/admin/connectors/google-storage/page.tsx @@ -0,0 +1,257 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { GoogleStorageIcon, TrashIcon } from "@/components/icons/icons"; +import { LoadingAnimation } from "@/components/Loading"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { usePublicCredentials } from "@/lib/hooks"; +import { ConnectorIndexingStatus, Credential } from "@/lib/types"; + +import { GCSConfig, GCSCredentialJson } from "@/lib/types"; + +import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; +import useSWR, { useSWRConfig } from "swr"; +import * as Yup from "yup"; +import { useState } from "react"; + +const GCSMain = () => { + const { popup, setPopup } = usePopup(); + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + errorHandlingFetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( + + ); + } + + if (credentialsError || !credentialsData) { + return ( + + ); + } + + const gcsConnectorIndexingStatuses: ConnectorIndexingStatus< + GCSConfig, + GCSCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "google_cloud_storage" + ); + + const gcsCredential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.project_id + ); + + return ( + <> + {popup} + + Step 1: Provide your GCS access info + + {gcsCredential ? ( + <> +
+

Existing GCS Access Key ID:

+

+ {gcsCredential.credential_json.access_key_id} +

+ {", "} +

Secret Access Key:

+

+ {gcsCredential.credential_json.secret_access_key} +

{" "} + +
+ + ) : ( + <> + +
    +
  • + Provide your GCS Project ID, Client Email, and Private Key for + authentication. +
  • +
  • + These credentials will be used to access your GCS buckets. +
  • +
+
+ + + formBody={ + <> + + + + + } + validationSchema={Yup.object().shape({ + secret_access_key: Yup.string().required( + "Client Email is required" + ), + access_key_id: Yup.string().required("Private Key is required"), + })} + initialValues={{ + secret_access_key: "", + access_key_id: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which GCS bucket do you want to make searchable? + + + {gcsConnectorIndexingStatuses.length > 0 && ( + <> + + GCS indexing status + + + The latest changes are fetched every 10 minutes. + +
+ + includeName={true} + connectorIndexingStatuses={gcsConnectorIndexingStatuses} + liveCredential={gcsCredential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (gcsCredential) { + await linkCredential(connectorId, gcsCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {gcsCredential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your GCS bucket. + + + nameBuilder={(values) => `GCSConnector-${values.bucket_name}`} + ccPairNameBuilder={(values) => + `GCSConnector-${values.bucket_name}` + } + source="google_cloud_storage" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + +
+ )} + validationSchema={Yup.object().shape({ + bucket_type: Yup.string() + .oneOf(["google_cloud_storage"]) + .required("Bucket type must be google_cloud_storage"), + bucket_name: Yup.string().required( + "Please enter the name of the GCS bucket to index, e.g. my-gcs-bucket" + ), + prefix: Yup.string().default(""), + })} + initialValues={{ + bucket_type: "google_cloud_storage", + bucket_name: "", + prefix: "", + }} + refreshFreq={60 * 60 * 24} // 1 day + credentialId={gcsCredential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ } + title="Google Cloud Storage" + /> + +
+ ); +} diff --git a/web/src/app/admin/connectors/oracle-storage/page.tsx b/web/src/app/admin/connectors/oracle-storage/page.tsx new file mode 100644 index 000000000..34847a4b9 --- /dev/null +++ b/web/src/app/admin/connectors/oracle-storage/page.tsx @@ -0,0 +1,272 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { OCIStorageIcon, TrashIcon } from "@/components/icons/icons"; +import { LoadingAnimation } from "@/components/Loading"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { usePublicCredentials } from "@/lib/hooks"; + +import { + ConnectorIndexingStatus, + Credential, + OCIConfig, + OCICredentialJson, + R2Config, + R2CredentialJson, +} from "@/lib/types"; +import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; +import useSWR, { useSWRConfig } from "swr"; +import * as Yup from "yup"; +import { useState } from "react"; + +const OCIMain = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + errorHandlingFetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( + + ); + } + + if (credentialsError || !credentialsData) { + return ( + + ); + } + + const ociConnectorIndexingStatuses: ConnectorIndexingStatus< + OCIConfig, + OCICredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "oci_storage" + ); + + const ociCredential: Credential | undefined = + credentialsData.find((credential) => credential.credential_json?.namespace); + + return ( + <> + {popup} + + Step 1: Provide your access info + + {ociCredential ? ( + <> + {" "} +
+

Existing OCI Access Key ID:

+

+ {ociCredential.credential_json.access_key_id} +

+ {", "} +

Namespace:

+

+ {ociCredential.credential_json.namespace} +

{" "} + +
+ + ) : ( + <> + +
    +
  • + Provide your OCI Access Key ID, Secret Access Key, Namespace, + and Region for authentication. +
  • +
  • + These credentials will be used to access your OCI buckets. +
  • +
+
+ + + formBody={ + <> + + + + + + } + validationSchema={Yup.object().shape({ + access_key_id: Yup.string().required( + "OCI Access Key ID is required" + ), + secret_access_key: Yup.string().required( + "OCI Secret Access Key is required" + ), + namespace: Yup.string().required("Namespace is required"), + region: Yup.string().required("Region is required"), + })} + initialValues={{ + access_key_id: "", + secret_access_key: "", + namespace: "", + region: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which OCI bucket do you want to make searchable? + + + {ociConnectorIndexingStatuses.length > 0 && ( + <> + + OCI indexing status + + + The latest changes are fetched every 10 minutes. + +
+ + includeName={true} + connectorIndexingStatuses={ociConnectorIndexingStatuses} + liveCredential={ociCredential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (ociCredential) { + await linkCredential(connectorId, ociCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {ociCredential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your OCI bucket. + + + nameBuilder={(values) => `OCIConnector-${values.bucket_name}`} + ccPairNameBuilder={(values) => + `OCIConnector-${values.bucket_name}` + } + source="oci_storage" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + +
+ )} + validationSchema={Yup.object().shape({ + bucket_type: Yup.string() + .oneOf(["oci_storage"]) + .required("Bucket type must be oci_storage"), + bucket_name: Yup.string().required( + "Please enter the name of the OCI bucket to index, e.g. my-test-bucket" + ), + prefix: Yup.string().default(""), + })} + initialValues={{ + bucket_type: "oci_storage", + bucket_name: "", + prefix: "", + }} + refreshFreq={60 * 60 * 24} // 1 day + credentialId={ociCredential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ } + title="Oracle Cloud Infrastructure" + /> + +
+ ); +} diff --git a/web/src/app/admin/connectors/r2/page.tsx b/web/src/app/admin/connectors/r2/page.tsx new file mode 100644 index 000000000..372660acc --- /dev/null +++ b/web/src/app/admin/connectors/r2/page.tsx @@ -0,0 +1,265 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { R2Icon, S3Icon, TrashIcon } from "@/components/icons/icons"; +import { LoadingAnimation } from "@/components/Loading"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { usePublicCredentials } from "@/lib/hooks"; +import { + ConnectorIndexingStatus, + Credential, + R2Config, + R2CredentialJson, +} from "@/lib/types"; +import { Card, Select, SelectItem, Text, Title } from "@tremor/react"; +import useSWR, { useSWRConfig } from "swr"; +import * as Yup from "yup"; +import { useState } from "react"; + +const R2Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + errorHandlingFetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( + + ); + } + + if (credentialsError || !credentialsData) { + return ( + + ); + } + + const r2ConnectorIndexingStatuses: ConnectorIndexingStatus< + R2Config, + R2CredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "r2" + ); + + const r2Credential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.account_id + ); + + return ( + <> + {popup} + + Step 1: Provide your access info + + {r2Credential ? ( + <> + {" "} +
+

Existing R2 Access Key ID:

+

+ {r2Credential.credential_json.r2_access_key_id} +

+ {", "} +

Account ID:

+

+ {r2Credential.credential_json.account_id} +

{" "} + +
+ + ) : ( + <> + +
    +
  • + Provide your R2 Access Key ID, Secret Access Key, and Account ID + for authentication. +
  • +
  • These credentials will be used to access your R2 buckets.
  • +
+
+ + + formBody={ + <> + + + + + } + validationSchema={Yup.object().shape({ + r2_access_key_id: Yup.string().required( + "R2 Access Key ID is required" + ), + r2_secret_access_key: Yup.string().required( + "R2 Secret Access Key is required" + ), + account_id: Yup.string().required("Account ID is required"), + })} + initialValues={{ + r2_access_key_id: "", + r2_secret_access_key: "", + account_id: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which R2 bucket do you want to make searchable? + + + {r2ConnectorIndexingStatuses.length > 0 && ( + <> + + R2 indexing status + + + The latest changes are fetched every 10 minutes. + +
+ + includeName={true} + connectorIndexingStatuses={r2ConnectorIndexingStatuses} + liveCredential={r2Credential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (r2Credential) { + await linkCredential(connectorId, r2Credential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {r2Credential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your R2 bucket. + + + nameBuilder={(values) => `R2Connector-${values.bucket_name}`} + ccPairNameBuilder={(values) => + `R2Connector-${values.bucket_name}` + } + source="r2" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + +
+ )} + validationSchema={Yup.object().shape({ + bucket_type: Yup.string() + .oneOf(["r2"]) + .required("Bucket type must be r2"), + bucket_name: Yup.string().required( + "Please enter the name of the r2 bucket to index, e.g. my-test-bucket" + ), + prefix: Yup.string().default(""), + })} + initialValues={{ + bucket_type: "r2", + bucket_name: "", + prefix: "", + }} + refreshFreq={60 * 60 * 24} // 1 day + credentialId={r2Credential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + const [selectedStorage, setSelectedStorage] = useState("s3"); + + return ( +
+
+ +
+ } title="R2 Storage" /> + +
+ ); +} diff --git a/web/src/app/admin/connectors/s3/page.tsx b/web/src/app/admin/connectors/s3/page.tsx new file mode 100644 index 000000000..81064a70b --- /dev/null +++ b/web/src/app/admin/connectors/s3/page.tsx @@ -0,0 +1,258 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { S3Icon, TrashIcon } from "@/components/icons/icons"; +import { LoadingAnimation } from "@/components/Loading"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { usePublicCredentials } from "@/lib/hooks"; +import { + ConnectorIndexingStatus, + Credential, + S3Config, + S3CredentialJson, +} from "@/lib/types"; +import { Card, Text, Title } from "@tremor/react"; +import useSWR, { useSWRConfig } from "swr"; +import * as Yup from "yup"; +import { useState } from "react"; + +const S3Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: connectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + errorHandlingFetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: credentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (connectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( + + ); + } + + if (credentialsError || !credentialsData) { + return ( + + ); + } + + const s3ConnectorIndexingStatuses: ConnectorIndexingStatus< + S3Config, + S3CredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "s3" + ); + + const s3Credential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.aws_access_key_id + ); + + return ( + <> + {popup} + + Step 1: Provide your access info + + {s3Credential ? ( + <> + {" "} +
+

Existing AWS Access Key ID:

+

+ {s3Credential.credential_json.aws_access_key_id} +

+ +
+ + ) : ( + <> + +
    +
  • + If AWS Access Key ID and AWS Secret Access Key are provided, + they will be used for authenticating the connector. +
  • +
  • Otherwise, the Profile Name will be used (if provided).
  • +
  • + If no credentials are provided, then the connector will try to + authenticate with any default AWS credentials available. +
  • +
+
+ + + formBody={ + <> + + + + } + validationSchema={Yup.object().shape({ + aws_access_key_id: Yup.string().default(""), + aws_secret_access_key: Yup.string().default(""), + })} + initialValues={{ + aws_access_key_id: "", + aws_secret_access_key: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which S3 bucket do you want to make searchable? + + + {s3ConnectorIndexingStatuses.length > 0 && ( + <> + + S3 indexing status + + + The latest changes are fetched every 10 minutes. + +
+ + includeName={true} + connectorIndexingStatuses={s3ConnectorIndexingStatuses} + liveCredential={s3Credential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (s3Credential) { + await linkCredential(connectorId, s3Credential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {s3Credential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your S3 bucket. + + + nameBuilder={(values) => `S3Connector-${values.bucket_name}`} + ccPairNameBuilder={(values) => + `S3Connector-${values.bucket_name}` + } + source="s3" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + +
+ )} + validationSchema={Yup.object().shape({ + bucket_type: Yup.string() + .oneOf(["s3"]) + .required("Bucket type must be s3"), + bucket_name: Yup.string().required( + "Please enter the name of the s3 bucket to index, e.g. my-test-bucket" + ), + prefix: Yup.string().default(""), + })} + initialValues={{ + bucket_type: "s3", + bucket_name: "", + prefix: "", + }} + refreshFreq={60 * 60 * 24} // 1 day + credentialId={s3Credential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + const [selectedStorage, setSelectedStorage] = useState("s3"); + + return ( +
+
+ +
+ } title="S3 Storage" /> + + +
+ ); +} diff --git a/web/src/components/admin/connectors/ConnectorForm.tsx b/web/src/components/admin/connectors/ConnectorForm.tsx index fd269501d..626eb0d18 100644 --- a/web/src/components/admin/connectors/ConnectorForm.tsx +++ b/web/src/components/admin/connectors/ConnectorForm.tsx @@ -27,7 +27,6 @@ export async function submitConnector( ): Promise<{ message: string; isSuccess: boolean; response?: Connector }> { const isUpdate = connectorId !== undefined; - let isSuccess = false; try { const response = await fetch( BASE_CONNECTOR_URL + (isUpdate ? `/${connectorId}` : ""), @@ -41,7 +40,6 @@ export async function submitConnector( ); if (response.ok) { - isSuccess = true; const responseJson = await response.json(); return { message: "Success!", isSuccess: true, response: responseJson }; } else { @@ -162,7 +160,6 @@ export function ConnectorForm({ }); return; } - const { message, isSuccess, response } = await submitConnector({ name: connectorName, source, diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index e66afd9e2..b82ef5f31 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -44,6 +44,8 @@ import { SiBookstack } from "react-icons/si"; import Image from "next/image"; import jiraSVG from "../../../public/Jira.svg"; import confluenceSVG from "../../../public/Confluence.svg"; +import OCIStorageSVG from "../../../public/OCI.svg"; +import googleCloudStorageIcon from "../../../public/GoogleCloudStorage.png"; import guruIcon from "../../../public/Guru.svg"; import gongIcon from "../../../public/Gong.png"; import requestTrackerIcon from "../../../public/RequestTracker.png"; @@ -54,6 +56,8 @@ import document360Icon from "../../../public/Document360.png"; import googleSitesIcon from "../../../public/GoogleSites.png"; import zendeskIcon from "../../../public/Zendesk.svg"; import dropboxIcon from "../../../public/Dropbox.png"; +import s3Icon from "../../../public/S3.png"; +import r2Icon from "../../../public/r2.webp"; import salesforceIcon from "../../../public/Salesforce.png"; import sharepointIcon from "../../../public/Sharepoint.png"; import teamsIcon from "../../../public/Teams.png"; @@ -423,6 +427,20 @@ export const ConfluenceIcon = ({ ); }; +export const OCIStorageIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ( +
+ Logo +
+ ); +}; + export const JiraIcon = ({ size = 16, className = defaultTailwindCSS, @@ -452,6 +470,20 @@ export const ZulipIcon = ({ ); }; +export const GoogleStorageIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ( +
+ Logo +
+ ); +}; + export const ProductboardIcon = ({ size = 16, className = defaultTailwindCSS, @@ -543,6 +575,30 @@ export const SalesforceIcon = ({ ); +export const R2Icon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => ( +
+ Logo +
+); + +export const S3Icon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => ( +
+ Logo +
+); + export const SharepointIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index 23affabde..f141e42ed 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -22,6 +22,7 @@ import { NotionIcon, ProductboardIcon, RequestTrackerIcon, + R2Icon, SalesforceIcon, SharepointIcon, TeamsIcon, @@ -31,10 +32,14 @@ import { ZulipIcon, MediaWikiIcon, WikipediaIcon, + S3Icon, + OCIStorageIcon, + GoogleStorageIcon, } from "@/components/icons/icons"; import { ValidSources } from "./types"; import { SourceCategory, SourceMetadata } from "./search/interfaces"; import { Persona } from "@/app/admin/assistants/interfaces"; +import internal from "stream"; interface PartialSourceMetadata { icon: React.FC<{ size?: number; className?: string }>; @@ -207,6 +212,26 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Clickup", category: SourceCategory.AppConnection, }, + s3: { + icon: S3Icon, + displayName: "S3", + category: SourceCategory.AppConnection, + }, + r2: { + icon: R2Icon, + displayName: "R2", + category: SourceCategory.AppConnection, + }, + oci_storage: { + icon: OCIStorageIcon, + displayName: "Oracle Storage", + category: SourceCategory.AppConnection, + }, + google_cloud_storage: { + icon: GoogleStorageIcon, + displayName: "Google Storage", + category: SourceCategory.AppConnection, + }, }; function fillSourceMetadata( @@ -223,13 +248,21 @@ function fillSourceMetadata( } export function getSourceMetadata(sourceType: ValidSources): SourceMetadata { - return fillSourceMetadata(SOURCE_METADATA_MAP[sourceType], sourceType); + const response = fillSourceMetadata( + SOURCE_METADATA_MAP[sourceType], + sourceType + ); + + return response; } export function listSourceMetadata(): SourceMetadata[] { - return Object.entries(SOURCE_METADATA_MAP).map(([source, metadata]) => { - return fillSourceMetadata(metadata, source as ValidSources); - }); + const entries = Object.entries(SOURCE_METADATA_MAP).map( + ([source, metadata]) => { + return fillSourceMetadata(metadata, source as ValidSources); + } + ); + return entries; } export function getSourceDisplayName(sourceType: ValidSources): string | null { diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 30c336f97..67adca87c 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -59,7 +59,11 @@ export type ValidSources = | "clickup" | "axero" | "wikipedia" - | "mediawiki"; + | "mediawiki" + | "s3" + | "r2" + | "google_cloud_storage" + | "oci_storage"; export type ValidInputTypes = "load_state" | "poll" | "event"; export type ValidStatuses = @@ -219,6 +223,30 @@ export interface ZendeskConfig {} export interface DropboxConfig {} +export interface S3Config { + bucket_type: "s3"; + bucket_name: string; + prefix: string; +} + +export interface R2Config { + bucket_type: "r2"; + bucket_name: string; + prefix: string; +} + +export interface GCSConfig { + bucket_type: "google_cloud_storage"; + bucket_name: string; + prefix: string; +} + +export interface OCIConfig { + bucket_type: "oci_storage"; + bucket_name: string; + prefix: string; +} + export interface MediaWikiBaseConfig { connector_name: string; language_code: string; @@ -400,6 +428,28 @@ export interface DropboxCredentialJson { dropbox_access_token: string; } +export interface R2CredentialJson { + account_id: string; + r2_access_key_id: string; + r2_secret_access_key: string; +} + +export interface S3CredentialJson { + aws_access_key_id: string; + aws_secret_access_key: string; +} + +export interface GCSCredentialJson { + access_key_id: string; + secret_access_key: string; +} + +export interface OCICredentialJson { + namespace: string; + region: string; + access_key_id: string; + secret_access_key: string; +} export interface SalesforceCredentialJson { sf_username: string; sf_password: string;