Blob Storage (#1705)

S3 + OCI + Google Cloud Storage + R2
---------

Co-authored-by: Art Matsak <5328078+artmatsak@users.noreply.github.com>
This commit is contained in:
pablodanswer 2024-06-27 17:12:20 -07:00 committed by GitHub
parent 145cdb69b7
commit f03f97307f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 1492 additions and 10 deletions

View File

@ -100,6 +100,17 @@ class DocumentSource(str, Enum):
CLICKUP = "clickup"
MEDIAWIKI = "mediawiki"
WIKIPEDIA = "wikipedia"
S3 = "s3"
R2 = "r2"
GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
OCI_STORAGE = "oci_storage"
class BlobType(str, Enum):
R2 = "r2"
S3 = "s3"
GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
OCI_STORAGE = "oci_storage"
class DocumentIndexType(str, Enum):

View File

@ -0,0 +1,277 @@
import os
from datetime import datetime
from datetime import timezone
from io import BytesIO
from typing import Any
from typing import Optional
import boto3
from botocore.client import Config
from mypy_boto3_s3 import S3Client
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import BlobType
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.file_processing.extract_file_text import extract_file_text
from danswer.utils.logger import setup_logger
logger = setup_logger()
class BlobStorageConnector(LoadConnector, PollConnector):
def __init__(
self,
bucket_type: str,
bucket_name: str,
prefix: str = "",
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.bucket_type: BlobType = BlobType(bucket_type)
self.bucket_name = bucket_name
self.prefix = prefix if not prefix or prefix.endswith("/") else prefix + "/"
self.batch_size = batch_size
self.s3_client: Optional[S3Client] = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
"""Checks for boto3 credentials based on the bucket type.
(1) R2: Access Key ID, Secret Access Key, Account ID
(2) S3: AWS Access Key ID, AWS Secret Access Key
(3) GOOGLE_CLOUD_STORAGE: Access Key ID, Secret Access Key, Project ID
(4) OCI_STORAGE: Namespace, Region, Access Key ID, Secret Access Key
For each bucket type, the method initializes the appropriate S3 client:
- R2: Uses Cloudflare R2 endpoint with S3v4 signature
- S3: Creates a standard boto3 S3 client
- GOOGLE_CLOUD_STORAGE: Uses Google Cloud Storage endpoint
- OCI_STORAGE: Uses Oracle Cloud Infrastructure Object Storage endpoint
Raises ConnectorMissingCredentialError if required credentials are missing.
Raises ValueError for unsupported bucket types.
"""
logger.info(
f"Loading credentials for {self.bucket_name} or type {self.bucket_type}"
)
if self.bucket_type == BlobType.R2:
if not all(
credentials.get(key)
for key in ["r2_access_key_id", "r2_secret_access_key", "account_id"]
):
raise ConnectorMissingCredentialError("Cloudflare R2")
self.s3_client = boto3.client(
"s3",
endpoint_url=f"https://{credentials['account_id']}.r2.cloudflarestorage.com",
aws_access_key_id=credentials["r2_access_key_id"],
aws_secret_access_key=credentials["r2_secret_access_key"],
region_name="auto",
config=Config(signature_version="s3v4"),
)
elif self.bucket_type == BlobType.S3:
if not all(
credentials.get(key)
for key in ["aws_access_key_id", "aws_secret_access_key"]
):
raise ConnectorMissingCredentialError("Google Cloud Storage")
session = boto3.Session(
aws_access_key_id=credentials["aws_access_key_id"],
aws_secret_access_key=credentials["aws_secret_access_key"],
)
self.s3_client = session.client("s3")
elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:
if not all(
credentials.get(key) for key in ["access_key_id", "secret_access_key"]
):
raise ConnectorMissingCredentialError("Google Cloud Storage")
self.s3_client = boto3.client(
"s3",
endpoint_url="https://storage.googleapis.com",
aws_access_key_id=credentials["access_key_id"],
aws_secret_access_key=credentials["secret_access_key"],
region_name="auto",
)
elif self.bucket_type == BlobType.OCI_STORAGE:
if not all(
credentials.get(key)
for key in ["namespace", "region", "access_key_id", "secret_access_key"]
):
raise ConnectorMissingCredentialError("Oracle Cloud Infrastructure")
self.s3_client = boto3.client(
"s3",
endpoint_url=f"https://{credentials['namespace']}.compat.objectstorage.{credentials['region']}.oraclecloud.com",
aws_access_key_id=credentials["access_key_id"],
aws_secret_access_key=credentials["secret_access_key"],
region_name=credentials["region"],
)
else:
raise ValueError(f"Unsupported bucket type: {self.bucket_type}")
return None
def _download_object(self, key: str) -> bytes:
if self.s3_client is None:
raise ConnectorMissingCredentialError("Blob storage")
object = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
return object["Body"].read()
# NOTE: Left in as may be useful for one-off access to documents and sharing across orgs.
# def _get_presigned_url(self, key: str) -> str:
# if self.s3_client is None:
# raise ConnectorMissingCredentialError("Blog storage")
# url = self.s3_client.generate_presigned_url(
# "get_object",
# Params={"Bucket": self.bucket_name, "Key": key},
# ExpiresIn=self.presign_length,
# )
# return url
def _get_blob_link(self, key: str) -> str:
if self.s3_client is None:
raise ConnectorMissingCredentialError("Blob storage")
if self.bucket_type == BlobType.R2:
account_id = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0]
return f"https://{account_id}.r2.cloudflarestorage.com/{self.bucket_name}/{key}"
elif self.bucket_type == BlobType.S3:
region = self.s3_client.meta.region_name
return f"https://{self.bucket_name}.s3.{region}.amazonaws.com/{key}"
elif self.bucket_type == BlobType.GOOGLE_CLOUD_STORAGE:
return f"https://storage.cloud.google.com/{self.bucket_name}/{key}"
elif self.bucket_type == BlobType.OCI_STORAGE:
namespace = self.s3_client.meta.endpoint_url.split("//")[1].split(".")[0]
region = self.s3_client.meta.region_name
return f"https://objectstorage.{region}.oraclecloud.com/n/{namespace}/b/{self.bucket_name}/o/{key}"
else:
raise ValueError(f"Unsupported bucket type: {self.bucket_type}")
def _yield_blob_objects(
self,
start: datetime,
end: datetime,
) -> GenerateDocumentsOutput:
if self.s3_client is None:
raise ConnectorMissingCredentialError("Blog storage")
paginator = self.s3_client.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix)
batch: list[Document] = []
for page in pages:
if "Contents" not in page:
continue
for obj in page["Contents"]:
if obj["Key"].endswith("/"):
continue
last_modified = obj["LastModified"].replace(tzinfo=timezone.utc)
if not start <= last_modified <= end:
continue
downloaded_file = self._download_object(obj["Key"])
link = self._get_blob_link(obj["Key"])
name = os.path.basename(obj["Key"])
try:
text = extract_file_text(
name,
BytesIO(downloaded_file),
break_on_unprocessable=False,
)
batch.append(
Document(
id=f"{self.bucket_type}:{self.bucket_name}:{obj['Key']}",
sections=[Section(link=link, text=text)],
source=DocumentSource(self.bucket_type.value),
semantic_identifier=name,
doc_updated_at=last_modified,
metadata={},
)
)
if len(batch) == self.batch_size:
yield batch
batch = []
except Exception as e:
logger.exception(
f"Error decoding object {obj['Key']} as UTF-8: {e}"
)
if batch:
yield batch
def load_from_state(self) -> GenerateDocumentsOutput:
logger.info("Loading blob objects")
return self._yield_blob_objects(
start=datetime(1970, 1, 1, tzinfo=timezone.utc),
end=datetime.now(timezone.utc),
)
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
if self.s3_client is None:
raise ConnectorMissingCredentialError("Blog storage")
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
for batch in self._yield_blob_objects(start_datetime, end_datetime):
yield batch
return None
if __name__ == "__main__":
credentials_dict = {
"aws_access_key_id": os.environ.get("AWS_ACCESS_KEY_ID"),
"aws_secret_access_key": os.environ.get("AWS_SECRET_ACCESS_KEY"),
}
# Initialize the connector
connector = BlobStorageConnector(
bucket_type=os.environ.get("BUCKET_TYPE") or "s3",
bucket_name=os.environ.get("BUCKET_NAME") or "test",
prefix="",
)
try:
connector.load_credentials(credentials_dict)
document_batch_generator = connector.load_from_state()
for document_batch in document_batch_generator:
print("First batch of documents:")
for doc in document_batch:
print(f"Document ID: {doc.id}")
print(f"Semantic Identifier: {doc.semantic_identifier}")
print(f"Source: {doc.source}")
print(f"Updated At: {doc.doc_updated_at}")
print("Sections:")
for section in doc.sections:
print(f" - Link: {section.link}")
print(f" - Text: {section.text[:100]}...")
print("---")
break
except ConnectorMissingCredentialError as e:
print(f"Error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")

View File

@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
from danswer.configs.constants import DocumentSource
from danswer.connectors.axero.connector import AxeroConnector
from danswer.connectors.blob.connector import BlobStorageConnector
from danswer.connectors.bookstack.connector import BookstackConnector
from danswer.connectors.clickup.connector import ClickupConnector
from danswer.connectors.confluence.connector import ConfluenceConnector
@ -90,6 +91,10 @@ def identify_connector_class(
DocumentSource.CLICKUP: ClickupConnector,
DocumentSource.MEDIAWIKI: MediaWikiConnector,
DocumentSource.WIKIPEDIA: WikipediaConnector,
DocumentSource.S3: BlobStorageConnector,
DocumentSource.R2: BlobStorageConnector,
DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
DocumentSource.OCI_STORAGE: BlobStorageConnector,
}
connector_by_source = connector_map.get(source, {})
@ -115,7 +120,6 @@ def identify_connector_class(
raise ConnectorMissingException(
f"Connector for source={source} does not accept input_type={input_type}"
)
return connector

View File

@ -71,4 +71,4 @@ uvicorn==0.21.1
zulip==0.8.2
hubspot-api-client==8.1.0
zenpy==2.0.41
dropbox==11.36.2
dropbox==11.36.2

View File

@ -19,3 +19,4 @@ types-regex==2023.3.23.1
types-requests==2.28.11.17
types-retry==0.9.9.3
types-urllib3==1.26.25.11
boto3-stubs[s3]==1.34.133

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

1
web/public/OCI.svg Normal file
View File

@ -0,0 +1 @@
<svg style="display:block" class="u30-oicn" xmlns="http://www.w3.org/2000/svg" width="32" height="21" viewBox="0 0 32 21"><path fill="#C74634" d="M9.9,20.1c-5.5,0-9.9-4.4-9.9-9.9c0-5.5,4.4-9.9,9.9-9.9h11.6c5.5,0,9.9,4.4,9.9,9.9c0,5.5-4.4,9.9-9.9,9.9H9.9 M21.2,16.6c3.6,0,6.4-2.9,6.4-6.4c0-3.6-2.9-6.4-6.4-6.4h-11c-3.6,0-6.4,2.9-6.4,6.4s2.9,6.4,6.4,6.4H21.2"></path></svg>

After

Width:  |  Height:  |  Size: 371 B

BIN
web/public/S3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

BIN
web/public/r2.webp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 938 B

View File

@ -0,0 +1,257 @@
"use client";
import { AdminPageTitle } from "@/components/admin/Title";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { GoogleStorageIcon, TrashIcon } from "@/components/icons/icons";
import { LoadingAnimation } from "@/components/Loading";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { usePopup } from "@/components/admin/connectors/Popup";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ErrorCallout } from "@/components/ErrorCallout";
import { usePublicCredentials } from "@/lib/hooks";
import { ConnectorIndexingStatus, Credential } from "@/lib/types";
import { GCSConfig, GCSCredentialJson } from "@/lib/types";
import { Card, Select, SelectItem, Text, Title } from "@tremor/react";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useState } from "react";
const GCSMain = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: connectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
errorHandlingFetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: credentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (connectorIndexingStatusesError || !connectorIndexingStatuses) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={connectorIndexingStatusesError?.info?.detail}
/>
);
}
if (credentialsError || !credentialsData) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={credentialsError?.info?.detail}
/>
);
}
const gcsConnectorIndexingStatuses: ConnectorIndexingStatus<
GCSConfig,
GCSCredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "google_cloud_storage"
);
const gcsCredential: Credential<GCSCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.project_id
);
return (
<>
{popup}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your GCS access info
</Title>
{gcsCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing GCS Access Key ID: </p>
<p className="ml-1 italic my-auto">
{gcsCredential.credential_json.access_key_id}
</p>
{", "}
<p className="ml-1 my-auto">Secret Access Key: </p>
<p className="ml-1 italic my-auto">
{gcsCredential.credential_json.secret_access_key}
</p>{" "}
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
if (gcsConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await adminDeleteCredential(gcsCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
<ul className="list-disc mt-2 ml-4">
<li>
Provide your GCS Project ID, Client Email, and Private Key for
authentication.
</li>
<li>
These credentials will be used to access your GCS buckets.
</li>
</ul>
</Text>
<Card className="mt-4">
<CredentialForm<GCSCredentialJson>
formBody={
<>
<TextFormField name="project_id" label="GCS Project ID:" />
<TextFormField name="access_key_id" label="Access Key ID:" />
<TextFormField
name="secret_access_key"
label="Secret Access Key:"
/>
</>
}
validationSchema={Yup.object().shape({
secret_access_key: Yup.string().required(
"Client Email is required"
),
access_key_id: Yup.string().required("Private Key is required"),
})}
initialValues={{
secret_access_key: "",
access_key_id: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Which GCS bucket do you want to make searchable?
</Title>
{gcsConnectorIndexingStatuses.length > 0 && (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
GCS indexing status
</Title>
<Text className="mb-2">
The latest changes are fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<GCSConfig, GCSCredentialJson>
includeName={true}
connectorIndexingStatuses={gcsConnectorIndexingStatuses}
liveCredential={gcsCredential}
getCredential={(credential) => {
return <div></div>;
}}
onCredentialLink={async (connectorId) => {
if (gcsCredential) {
await linkCredential(connectorId, gcsCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{gcsCredential && (
<>
<Card className="mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<Text className="mb-4">
Press connect below to start the connection to your GCS bucket.
</Text>
<ConnectorForm<GCSConfig>
nameBuilder={(values) => `GCSConnector-${values.bucket_name}`}
ccPairNameBuilder={(values) =>
`GCSConnector-${values.bucket_name}`
}
source="google_cloud_storage"
inputType="poll"
formBodyBuilder={(values) => (
<div>
<TextFormField name="bucket_name" label="Bucket Name:" />
<TextFormField
name="prefix"
label="Path Prefix (optional):"
/>
</div>
)}
validationSchema={Yup.object().shape({
bucket_type: Yup.string()
.oneOf(["google_cloud_storage"])
.required("Bucket type must be google_cloud_storage"),
bucket_name: Yup.string().required(
"Please enter the name of the GCS bucket to index, e.g. my-gcs-bucket"
),
prefix: Yup.string().default(""),
})}
initialValues={{
bucket_type: "google_cloud_storage",
bucket_name: "",
prefix: "",
}}
refreshFreq={60 * 60 * 24} // 1 day
credentialId={gcsCredential.id}
/>
</Card>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle
icon={<GoogleStorageIcon size={32} />}
title="Google Cloud Storage"
/>
<GCSMain />
</div>
);
}

View File

@ -0,0 +1,272 @@
"use client";
import { AdminPageTitle } from "@/components/admin/Title";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { OCIStorageIcon, TrashIcon } from "@/components/icons/icons";
import { LoadingAnimation } from "@/components/Loading";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { usePopup } from "@/components/admin/connectors/Popup";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ErrorCallout } from "@/components/ErrorCallout";
import { usePublicCredentials } from "@/lib/hooks";
import {
ConnectorIndexingStatus,
Credential,
OCIConfig,
OCICredentialJson,
R2Config,
R2CredentialJson,
} from "@/lib/types";
import { Card, Select, SelectItem, Text, Title } from "@tremor/react";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useState } from "react";
const OCIMain = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: connectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
errorHandlingFetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: credentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (connectorIndexingStatusesError || !connectorIndexingStatuses) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={connectorIndexingStatusesError?.info?.detail}
/>
);
}
if (credentialsError || !credentialsData) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={credentialsError?.info?.detail}
/>
);
}
const ociConnectorIndexingStatuses: ConnectorIndexingStatus<
OCIConfig,
OCICredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "oci_storage"
);
const ociCredential: Credential<OCICredentialJson> | undefined =
credentialsData.find((credential) => credential.credential_json?.namespace);
return (
<>
{popup}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access info
</Title>
{ociCredential ? (
<>
{" "}
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing OCI Access Key ID: </p>
<p className="ml-1 italic my-auto">
{ociCredential.credential_json.access_key_id}
</p>
{", "}
<p className="ml-1 my-auto">Namespace: </p>
<p className="ml-1 italic my-auto">
{ociCredential.credential_json.namespace}
</p>{" "}
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
if (ociConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await adminDeleteCredential(ociCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
<ul className="list-disc mt-2 ml-4">
<li>
Provide your OCI Access Key ID, Secret Access Key, Namespace,
and Region for authentication.
</li>
<li>
These credentials will be used to access your OCI buckets.
</li>
</ul>
</Text>
<Card className="mt-4">
<CredentialForm<OCICredentialJson>
formBody={
<>
<TextFormField
name="access_key_id"
label="OCI Access Key ID:"
/>
<TextFormField
name="secret_access_key"
label="OCI Secret Access Key:"
/>
<TextFormField name="namespace" label="Namespace:" />
<TextFormField name="region" label="Region:" />
</>
}
validationSchema={Yup.object().shape({
access_key_id: Yup.string().required(
"OCI Access Key ID is required"
),
secret_access_key: Yup.string().required(
"OCI Secret Access Key is required"
),
namespace: Yup.string().required("Namespace is required"),
region: Yup.string().required("Region is required"),
})}
initialValues={{
access_key_id: "",
secret_access_key: "",
namespace: "",
region: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Which OCI bucket do you want to make searchable?
</Title>
{ociConnectorIndexingStatuses.length > 0 && (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
OCI indexing status
</Title>
<Text className="mb-2">
The latest changes are fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<OCIConfig, OCICredentialJson>
includeName={true}
connectorIndexingStatuses={ociConnectorIndexingStatuses}
liveCredential={ociCredential}
getCredential={(credential) => {
return <div></div>;
}}
onCredentialLink={async (connectorId) => {
if (ociCredential) {
await linkCredential(connectorId, ociCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{ociCredential && (
<>
<Card className="mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<Text className="mb-4">
Press connect below to start the connection to your OCI bucket.
</Text>
<ConnectorForm<OCIConfig>
nameBuilder={(values) => `OCIConnector-${values.bucket_name}`}
ccPairNameBuilder={(values) =>
`OCIConnector-${values.bucket_name}`
}
source="oci_storage"
inputType="poll"
formBodyBuilder={(values) => (
<div>
<TextFormField name="bucket_name" label="Bucket Name:" />
<TextFormField
name="prefix"
label="Path Prefix (optional):"
/>
</div>
)}
validationSchema={Yup.object().shape({
bucket_type: Yup.string()
.oneOf(["oci_storage"])
.required("Bucket type must be oci_storage"),
bucket_name: Yup.string().required(
"Please enter the name of the OCI bucket to index, e.g. my-test-bucket"
),
prefix: Yup.string().default(""),
})}
initialValues={{
bucket_type: "oci_storage",
bucket_name: "",
prefix: "",
}}
refreshFreq={60 * 60 * 24} // 1 day
credentialId={ociCredential.id}
/>
</Card>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle
icon={<OCIStorageIcon size={32} />}
title="Oracle Cloud Infrastructure"
/>
<OCIMain />
</div>
);
}

View File

@ -0,0 +1,265 @@
"use client";
import { AdminPageTitle } from "@/components/admin/Title";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { R2Icon, S3Icon, TrashIcon } from "@/components/icons/icons";
import { LoadingAnimation } from "@/components/Loading";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { usePopup } from "@/components/admin/connectors/Popup";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ErrorCallout } from "@/components/ErrorCallout";
import { usePublicCredentials } from "@/lib/hooks";
import {
ConnectorIndexingStatus,
Credential,
R2Config,
R2CredentialJson,
} from "@/lib/types";
import { Card, Select, SelectItem, Text, Title } from "@tremor/react";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useState } from "react";
const R2Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: connectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
errorHandlingFetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: credentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (connectorIndexingStatusesError || !connectorIndexingStatuses) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={connectorIndexingStatusesError?.info?.detail}
/>
);
}
if (credentialsError || !credentialsData) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={credentialsError?.info?.detail}
/>
);
}
const r2ConnectorIndexingStatuses: ConnectorIndexingStatus<
R2Config,
R2CredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "r2"
);
const r2Credential: Credential<R2CredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.account_id
);
return (
<>
{popup}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access info
</Title>
{r2Credential ? (
<>
{" "}
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing R2 Access Key ID: </p>
<p className="ml-1 italic my-auto">
{r2Credential.credential_json.r2_access_key_id}
</p>
{", "}
<p className="ml-1 my-auto">Account ID: </p>
<p className="ml-1 italic my-auto">
{r2Credential.credential_json.account_id}
</p>{" "}
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
if (r2ConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await adminDeleteCredential(r2Credential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
<ul className="list-disc mt-2 ml-4">
<li>
Provide your R2 Access Key ID, Secret Access Key, and Account ID
for authentication.
</li>
<li>These credentials will be used to access your R2 buckets.</li>
</ul>
</Text>
<Card className="mt-4">
<CredentialForm<R2CredentialJson>
formBody={
<>
<TextFormField
name="r2_access_key_id"
label="R2 Access Key ID:"
/>
<TextFormField
name="r2_secret_access_key"
label="R2 Secret Access Key:"
/>
<TextFormField name="account_id" label="Account ID:" />
</>
}
validationSchema={Yup.object().shape({
r2_access_key_id: Yup.string().required(
"R2 Access Key ID is required"
),
r2_secret_access_key: Yup.string().required(
"R2 Secret Access Key is required"
),
account_id: Yup.string().required("Account ID is required"),
})}
initialValues={{
r2_access_key_id: "",
r2_secret_access_key: "",
account_id: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Which R2 bucket do you want to make searchable?
</Title>
{r2ConnectorIndexingStatuses.length > 0 && (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
R2 indexing status
</Title>
<Text className="mb-2">
The latest changes are fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<R2Config, R2CredentialJson>
includeName={true}
connectorIndexingStatuses={r2ConnectorIndexingStatuses}
liveCredential={r2Credential}
getCredential={(credential) => {
return <div></div>;
}}
onCredentialLink={async (connectorId) => {
if (r2Credential) {
await linkCredential(connectorId, r2Credential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{r2Credential && (
<>
<Card className="mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<Text className="mb-4">
Press connect below to start the connection to your R2 bucket.
</Text>
<ConnectorForm<R2Config>
nameBuilder={(values) => `R2Connector-${values.bucket_name}`}
ccPairNameBuilder={(values) =>
`R2Connector-${values.bucket_name}`
}
source="r2"
inputType="poll"
formBodyBuilder={(values) => (
<div>
<TextFormField name="bucket_name" label="Bucket Name:" />
<TextFormField
name="prefix"
label="Path Prefix (optional):"
/>
</div>
)}
validationSchema={Yup.object().shape({
bucket_type: Yup.string()
.oneOf(["r2"])
.required("Bucket type must be r2"),
bucket_name: Yup.string().required(
"Please enter the name of the r2 bucket to index, e.g. my-test-bucket"
),
prefix: Yup.string().default(""),
})}
initialValues={{
bucket_type: "r2",
bucket_name: "",
prefix: "",
}}
refreshFreq={60 * 60 * 24} // 1 day
credentialId={r2Credential.id}
/>
</Card>
</>
)}
</>
);
};
export default function Page() {
const [selectedStorage, setSelectedStorage] = useState<string>("s3");
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle icon={<R2Icon size={32} />} title="R2 Storage" />
<R2Main key={2} />
</div>
);
}

View File

@ -0,0 +1,258 @@
"use client";
import { AdminPageTitle } from "@/components/admin/Title";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { S3Icon, TrashIcon } from "@/components/icons/icons";
import { LoadingAnimation } from "@/components/Loading";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { usePopup } from "@/components/admin/connectors/Popup";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ErrorCallout } from "@/components/ErrorCallout";
import { usePublicCredentials } from "@/lib/hooks";
import {
ConnectorIndexingStatus,
Credential,
S3Config,
S3CredentialJson,
} from "@/lib/types";
import { Card, Text, Title } from "@tremor/react";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { useState } from "react";
const S3Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: connectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
errorHandlingFetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: credentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (connectorIndexingStatusesError || !connectorIndexingStatuses) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={connectorIndexingStatusesError?.info?.detail}
/>
);
}
if (credentialsError || !credentialsData) {
return (
<ErrorCallout
errorTitle="Something went wrong :("
errorMsg={credentialsError?.info?.detail}
/>
);
}
const s3ConnectorIndexingStatuses: ConnectorIndexingStatus<
S3Config,
S3CredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "s3"
);
const s3Credential: Credential<S3CredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.aws_access_key_id
);
return (
<>
{popup}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access info
</Title>
{s3Credential ? (
<>
{" "}
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing AWS Access Key ID: </p>
<p className="ml-1 italic my-auto">
{s3Credential.credential_json.aws_access_key_id}
</p>
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
if (s3ConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await adminDeleteCredential(s3Credential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text>
<ul className="list-disc mt-2 ml-4">
<li>
If AWS Access Key ID and AWS Secret Access Key are provided,
they will be used for authenticating the connector.
</li>
<li>Otherwise, the Profile Name will be used (if provided).</li>
<li>
If no credentials are provided, then the connector will try to
authenticate with any default AWS credentials available.
</li>
</ul>
</Text>
<Card className="mt-4">
<CredentialForm<S3CredentialJson>
formBody={
<>
<TextFormField
name="aws_access_key_id"
label="AWS Access Key ID:"
/>
<TextFormField
name="aws_secret_access_key"
label="AWS Secret Access Key:"
/>
</>
}
validationSchema={Yup.object().shape({
aws_access_key_id: Yup.string().default(""),
aws_secret_access_key: Yup.string().default(""),
})}
initialValues={{
aws_access_key_id: "",
aws_secret_access_key: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Which S3 bucket do you want to make searchable?
</Title>
{s3ConnectorIndexingStatuses.length > 0 && (
<>
<Title className="mb-2 mt-6 ml-auto mr-auto">
S3 indexing status
</Title>
<Text className="mb-2">
The latest changes are fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<S3Config, S3CredentialJson>
includeName={true}
connectorIndexingStatuses={s3ConnectorIndexingStatuses}
liveCredential={s3Credential}
getCredential={(credential) => {
return <div></div>;
}}
onCredentialLink={async (connectorId) => {
if (s3Credential) {
await linkCredential(connectorId, s3Credential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
{s3Credential && (
<>
<Card className="mt-4">
<h2 className="font-bold mb-3">Create Connection</h2>
<Text className="mb-4">
Press connect below to start the connection to your S3 bucket.
</Text>
<ConnectorForm<S3Config>
nameBuilder={(values) => `S3Connector-${values.bucket_name}`}
ccPairNameBuilder={(values) =>
`S3Connector-${values.bucket_name}`
}
source="s3"
inputType="poll"
formBodyBuilder={(values) => (
<div>
<TextFormField name="bucket_name" label="Bucket Name:" />
<TextFormField
name="prefix"
label="Path Prefix (optional):"
/>
</div>
)}
validationSchema={Yup.object().shape({
bucket_type: Yup.string()
.oneOf(["s3"])
.required("Bucket type must be s3"),
bucket_name: Yup.string().required(
"Please enter the name of the s3 bucket to index, e.g. my-test-bucket"
),
prefix: Yup.string().default(""),
})}
initialValues={{
bucket_type: "s3",
bucket_name: "",
prefix: "",
}}
refreshFreq={60 * 60 * 24} // 1 day
credentialId={s3Credential.id}
/>
</Card>
</>
)}
</>
);
};
export default function Page() {
const [selectedStorage, setSelectedStorage] = useState<string>("s3");
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle icon={<S3Icon size={32} />} title="S3 Storage" />
<S3Main key={1} />
</div>
);
}

View File

@ -27,7 +27,6 @@ export async function submitConnector<T>(
): Promise<{ message: string; isSuccess: boolean; response?: Connector<T> }> {
const isUpdate = connectorId !== undefined;
let isSuccess = false;
try {
const response = await fetch(
BASE_CONNECTOR_URL + (isUpdate ? `/${connectorId}` : ""),
@ -41,7 +40,6 @@ export async function submitConnector<T>(
);
if (response.ok) {
isSuccess = true;
const responseJson = await response.json();
return { message: "Success!", isSuccess: true, response: responseJson };
} else {
@ -162,7 +160,6 @@ export function ConnectorForm<T extends Yup.AnyObject>({
});
return;
}
const { message, isSuccess, response } = await submitConnector<T>({
name: connectorName,
source,

View File

@ -44,6 +44,8 @@ import { SiBookstack } from "react-icons/si";
import Image from "next/image";
import jiraSVG from "../../../public/Jira.svg";
import confluenceSVG from "../../../public/Confluence.svg";
import OCIStorageSVG from "../../../public/OCI.svg";
import googleCloudStorageIcon from "../../../public/GoogleCloudStorage.png";
import guruIcon from "../../../public/Guru.svg";
import gongIcon from "../../../public/Gong.png";
import requestTrackerIcon from "../../../public/RequestTracker.png";
@ -54,6 +56,8 @@ import document360Icon from "../../../public/Document360.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
import zendeskIcon from "../../../public/Zendesk.svg";
import dropboxIcon from "../../../public/Dropbox.png";
import s3Icon from "../../../public/S3.png";
import r2Icon from "../../../public/r2.webp";
import salesforceIcon from "../../../public/Salesforce.png";
import sharepointIcon from "../../../public/Sharepoint.png";
import teamsIcon from "../../../public/Teams.png";
@ -423,6 +427,20 @@ export const ConfluenceIcon = ({
);
};
export const OCIStorageIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size + 4}px`, height: `${size + 4}px` }}
className={`w-[${size + 4}px] h-[${size + 4}px] -m-0.5 ` + className}
>
<Image src={OCIStorageSVG} alt="Logo" width="96" height="96" />
</div>
);
};
export const JiraIcon = ({
size = 16,
className = defaultTailwindCSS,
@ -452,6 +470,20 @@ export const ZulipIcon = ({
);
};
export const GoogleStorageIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size + 4}px`, height: `${size + 4}px` }}
className={`w-[${size + 4}px] h-[${size + 4}px] -m-0.5 ` + className}
>
<Image src={googleCloudStorageIcon} alt="Logo" width="96" height="96" />
</div>
);
};
export const ProductboardIcon = ({
size = 16,
className = defaultTailwindCSS,
@ -543,6 +575,30 @@ export const SalesforceIcon = ({
</div>
);
export const R2Icon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={r2Icon} alt="Logo" width="96" height="96" />
</div>
);
export const S3Icon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={s3Icon} alt="Logo" width="96" height="96" />
</div>
);
export const SharepointIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -22,6 +22,7 @@ import {
NotionIcon,
ProductboardIcon,
RequestTrackerIcon,
R2Icon,
SalesforceIcon,
SharepointIcon,
TeamsIcon,
@ -31,10 +32,14 @@ import {
ZulipIcon,
MediaWikiIcon,
WikipediaIcon,
S3Icon,
OCIStorageIcon,
GoogleStorageIcon,
} from "@/components/icons/icons";
import { ValidSources } from "./types";
import { SourceCategory, SourceMetadata } from "./search/interfaces";
import { Persona } from "@/app/admin/assistants/interfaces";
import internal from "stream";
interface PartialSourceMetadata {
icon: React.FC<{ size?: number; className?: string }>;
@ -207,6 +212,26 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Clickup",
category: SourceCategory.AppConnection,
},
s3: {
icon: S3Icon,
displayName: "S3",
category: SourceCategory.AppConnection,
},
r2: {
icon: R2Icon,
displayName: "R2",
category: SourceCategory.AppConnection,
},
oci_storage: {
icon: OCIStorageIcon,
displayName: "Oracle Storage",
category: SourceCategory.AppConnection,
},
google_cloud_storage: {
icon: GoogleStorageIcon,
displayName: "Google Storage",
category: SourceCategory.AppConnection,
},
};
function fillSourceMetadata(
@ -223,13 +248,21 @@ function fillSourceMetadata(
}
export function getSourceMetadata(sourceType: ValidSources): SourceMetadata {
return fillSourceMetadata(SOURCE_METADATA_MAP[sourceType], sourceType);
const response = fillSourceMetadata(
SOURCE_METADATA_MAP[sourceType],
sourceType
);
return response;
}
export function listSourceMetadata(): SourceMetadata[] {
return Object.entries(SOURCE_METADATA_MAP).map(([source, metadata]) => {
return fillSourceMetadata(metadata, source as ValidSources);
});
const entries = Object.entries(SOURCE_METADATA_MAP).map(
([source, metadata]) => {
return fillSourceMetadata(metadata, source as ValidSources);
}
);
return entries;
}
export function getSourceDisplayName(sourceType: ValidSources): string | null {

View File

@ -59,7 +59,11 @@ export type ValidSources =
| "clickup"
| "axero"
| "wikipedia"
| "mediawiki";
| "mediawiki"
| "s3"
| "r2"
| "google_cloud_storage"
| "oci_storage";
export type ValidInputTypes = "load_state" | "poll" | "event";
export type ValidStatuses =
@ -219,6 +223,30 @@ export interface ZendeskConfig {}
export interface DropboxConfig {}
export interface S3Config {
bucket_type: "s3";
bucket_name: string;
prefix: string;
}
export interface R2Config {
bucket_type: "r2";
bucket_name: string;
prefix: string;
}
export interface GCSConfig {
bucket_type: "google_cloud_storage";
bucket_name: string;
prefix: string;
}
export interface OCIConfig {
bucket_type: "oci_storage";
bucket_name: string;
prefix: string;
}
export interface MediaWikiBaseConfig {
connector_name: string;
language_code: string;
@ -400,6 +428,28 @@ export interface DropboxCredentialJson {
dropbox_access_token: string;
}
export interface R2CredentialJson {
account_id: string;
r2_access_key_id: string;
r2_secret_access_key: string;
}
export interface S3CredentialJson {
aws_access_key_id: string;
aws_secret_access_key: string;
}
export interface GCSCredentialJson {
access_key_id: string;
secret_access_key: string;
}
export interface OCICredentialJson {
namespace: string;
region: string;
access_key_id: string;
secret_access_key: string;
}
export interface SalesforceCredentialJson {
sf_username: string;
sf_password: string;