Add Google Sites connector (#532)

This commit is contained in:
Chris Weaver
2023-10-08 19:20:38 -07:00
committed by GitHub
parent fb1fbbee5c
commit d95da554ea
17 changed files with 561 additions and 77 deletions

View File

@@ -61,6 +61,7 @@ class DocumentSource(str, Enum):
LINEAR = "linear" LINEAR = "linear"
HUBSPOT = "hubspot" HUBSPOT = "hubspot"
GONG = "gong" GONG = "gong"
GOOGLE_SITES = "google_sites"
class DocumentIndexType(str, Enum): class DocumentIndexType(str, Enum):

View File

@@ -0,0 +1,49 @@
import json
import os
import zipfile
from collections.abc import Generator
from pathlib import Path
from typing import Any
from typing import IO
_METADATA_FLAG = "#DANSWER_METADATA="
def is_macos_resource_fork_file(file_name: str) -> bool:
return os.path.basename(file_name).startswith("._") and file_name.startswith(
"__MACOSX"
)
def load_files_from_zip(
zip_location: str | Path,
ignore_macos_resource_fork_files: bool = True,
ignore_dirs: bool = True,
) -> Generator[tuple[zipfile.ZipInfo, IO[Any]], None, None]:
with zipfile.ZipFile(zip_location, "r") as zip_file:
for file_info in zip_file.infolist():
with zip_file.open(file_info.filename, "r") as file:
if ignore_dirs and file_info.is_dir():
continue
if ignore_macos_resource_fork_files and is_macos_resource_fork_file(
file_info.filename
):
continue
yield file_info, file
def read_file(file_reader: IO[Any]) -> tuple[str, dict[str, Any]]:
metadata = {}
file_content_raw = ""
for ind, line in enumerate(file_reader):
if isinstance(line, bytes):
line = line.decode("utf-8")
line = str(line)
if ind == 0 and line.startswith(_METADATA_FLAG):
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
else:
file_content_raw += line
return file_content_raw, metadata

View File

@@ -0,0 +1,57 @@
from copy import copy
from dataclasses import dataclass
from bs4 import BeautifulSoup
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
from danswer.utils.text_processing import format_document_soup
MINTLIFY_UNWANTED = ["sticky", "hidden"]
@dataclass
class ParsedHTML:
title: str | None
cleaned_text: str
def standard_html_cleanup(
page_content: str | BeautifulSoup,
mintlify_cleanup_enabled: bool = True,
additional_element_types_to_discard: list[str] | None = None,
) -> ParsedHTML:
if isinstance(page_content, str):
soup = BeautifulSoup(page_content, "html.parser")
else:
soup = page_content
title_tag = soup.find("title")
title = None
if title_tag and title_tag.text:
title = title_tag.text
title_tag.extract()
# Heuristics based cleaning of elements based on css classes
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
if mintlify_cleanup_enabled:
unwanted_classes.extend(MINTLIFY_UNWANTED)
for undesired_element in unwanted_classes:
[
tag.extract()
for tag in soup.find_all(
class_=lambda x: x and undesired_element in x.split()
)
]
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
[tag.extract() for tag in soup.find_all(undesired_tag)]
if additional_element_types_to_discard:
for undesired_tag in additional_element_types_to_discard:
[tag.extract() for tag in soup.find_all(undesired_tag)]
# 200B is ZeroWidthSpace which we don't care for
page_text = format_document_soup(soup).replace("\u200B", "")
return ParsedHTML(title=title, cleaned_text=page_text)

View File

@@ -9,6 +9,7 @@ from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.gong.connector import GongConnector from danswer.connectors.gong.connector import GongConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.google_site.connector import GoogleSitesConnector
from danswer.connectors.guru.connector import GuruConnector from danswer.connectors.guru.connector import GuruConnector
from danswer.connectors.hubspot.connector import HubSpotConnector from danswer.connectors.hubspot.connector import HubSpotConnector
from danswer.connectors.interfaces import BaseConnector from danswer.connectors.interfaces import BaseConnector
@@ -54,6 +55,7 @@ def identify_connector_class(
DocumentSource.LINEAR: LinearConnector, DocumentSource.LINEAR: LinearConnector,
DocumentSource.HUBSPOT: HubSpotConnector, DocumentSource.HUBSPOT: HubSpotConnector,
DocumentSource.GONG: GongConnector, DocumentSource.GONG: GongConnector,
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
} }
connector_by_source = connector_map.get(source, {}) connector_by_source = connector_map.get(source, {})

View File

@@ -1,6 +1,4 @@
import json
import os import os
import zipfile
from collections.abc import Generator from collections.abc import Generator
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -10,6 +8,8 @@ from PyPDF2 import PdfReader
from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
from danswer.connectors.cross_connector_utils.file_utils import read_file
from danswer.connectors.file.utils import check_file_ext_is_valid from danswer.connectors.file.utils import check_file_ext_is_valid
from danswer.connectors.file.utils import get_file_ext from danswer.connectors.file.utils import get_file_ext
from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -21,17 +21,6 @@ from danswer.utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
_METADATA_FLAG = "#DANSWER_METADATA="
def _get_files_from_zip(
zip_location: str | Path,
) -> Generator[tuple[str, IO[Any]], None, None]:
with zipfile.ZipFile(zip_location, "r") as zip_file:
for file_name in zip_file.namelist():
with zip_file.open(file_name, "r") as file:
yield os.path.basename(file_name), file
def _open_files_at_location( def _open_files_at_location(
file_path: str | Path, file_path: str | Path,
@@ -39,7 +28,8 @@ def _open_files_at_location(
extension = get_file_ext(file_path) extension = get_file_ext(file_path)
if extension == ".zip": if extension == ".zip":
yield from _get_files_from_zip(file_path) for file_info, file in load_files_from_zip(file_path, ignore_dirs=True):
yield file_info.filename, file
elif extension == ".txt" or extension == ".pdf": elif extension == ".txt" or extension == ".pdf":
mode = "r" mode = "r"
if extension == ".pdf": if extension == ".pdf":
@@ -56,7 +46,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
return [] return []
metadata = {} metadata: dict[str, Any] = {}
file_content_raw = "" file_content_raw = ""
if extension == ".pdf": if extension == ".pdf":
pdf_reader = PdfReader(file) pdf_reader = PdfReader(file)
@@ -65,15 +55,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
page.extract_text() for page in pdf_reader.pages page.extract_text() for page in pdf_reader.pages
) )
else: else:
for ind, line in enumerate(file): file_content_raw, metadata = read_file(file)
if isinstance(line, bytes):
line = line.decode("utf-8")
line = str(line)
if ind == 0 and line.startswith(_METADATA_FLAG):
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
else:
file_content_raw += line
return [ return [
Document( Document(

View File

@@ -0,0 +1,139 @@
import os
import urllib.parse
from typing import Any
from typing import cast
from bs4 import BeautifulSoup
from bs4 import Tag
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
from danswer.connectors.cross_connector_utils.file_utils import read_file
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
def process_link(element: BeautifulSoup | Tag) -> str:
href = cast(str | None, element.get("href"))
if not href:
raise RuntimeError(f"Invalid link - {element}")
# cleanup href
href = urllib.parse.unquote(href)
href = href.rstrip(".html").lower()
href = href.replace("_", "")
href = href.replace(" ", "-")
return href
def find_google_sites_page_path_from_navbar(
element: BeautifulSoup | Tag, path: str, is_initial: bool
) -> str | None:
ul = cast(Tag | None, element.find("ul"))
if ul:
if not is_initial:
a = cast(Tag, element.find("a"))
new_path = f"{path}/{process_link(a)}"
if a.get("aria-selected") == "true":
return new_path
else:
new_path = ""
for li in ul.find_all("li", recursive=False):
found_link = find_google_sites_page_path_from_navbar(li, new_path, False)
if found_link:
return found_link
else:
a = cast(Tag, element.find("a"))
if a:
href = process_link(a)
if href and a.get("aria-selected") == "true":
return path + "/" + href
return None
class GoogleSitesConnector(LoadConnector):
def __init__(
self,
zip_path: str,
base_url: str,
batch_size: int = INDEX_BATCH_SIZE,
):
self.zip_path = zip_path
self.base_url = base_url
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
pass
def load_from_state(self) -> GenerateDocumentsOutput:
documents: list[Document] = []
# load the HTML files
files = load_files_from_zip(self.zip_path)
for file_info, file_io in files:
# skip non-published files
if "/PUBLISHED/" not in file_info.filename:
continue
file_path, extension = os.path.splitext(file_info.filename)
if extension != ".html":
continue
file_content, _ = read_file(file_io)
soup = BeautifulSoup(file_content, "html.parser")
# get the link out of the navbar
header = cast(Tag, soup.find("header"))
nav = cast(Tag, header.find("nav"))
path = find_google_sites_page_path_from_navbar(nav, "", True)
if not path:
raise RuntimeError(f"Could not find path for {file_info.filename}")
# cleanup the hidden `Skip to main content` and `Skip to navigation` that
# appears at the top of every page
for div in soup.find_all("div", attrs={"data-is-touch-wrapper": "true"}):
div.extract()
# get the body of the page
parsed_html = standard_html_cleanup(
soup, additional_element_types_to_discard=["header", "nav"]
)
title = parsed_html.title or file_path.split("/")[-1]
documents.append(
Document(
id=f"{DocumentSource.GOOGLE_SITES.value}:{path}",
source=DocumentSource.GOOGLE_SITES,
semantic_identifier=title,
sections=[
Section(
link=self.base_url.rstrip("/") + "/" + path.lstrip("/"),
text=parsed_html.cleaned_text,
)
],
metadata={},
)
)
if len(documents) >= self.batch_size:
yield documents
documents = []
if documents:
yield documents
if __name__ == "__main__":
connector = GoogleSitesConnector(
os.environ["GOOGLE_SITES_ZIP_PATH"],
os.environ.get("GOOGLE_SITES_BASE_URL", ""),
)
for doc_batch in connector.load_from_state():
for doc in doc_batch:
print(doc)

View File

@@ -1,5 +1,4 @@
import io import io
from copy import copy
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from typing import Any from typing import Any
@@ -18,25 +17,20 @@ from PyPDF2 import PdfReader
from requests_oauthlib import OAuth2Session # type:ignore from requests_oauthlib import OAuth2Session # type:ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document from danswer.connectors.models import Document
from danswer.connectors.models import Section from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger from danswer.utils.logger import setup_logger
from danswer.utils.text_processing import format_document_soup
logger = setup_logger() logger = setup_logger()
MINTLIFY_UNWANTED = ["sticky", "hidden"]
class WEB_CONNECTOR_VALID_SETTINGS(str, Enum): class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
# Given a base site, index everything under that path # Given a base site, index everything under that path
RECURSIVE = "recursive" RECURSIVE = "recursive"
@@ -224,36 +218,16 @@ class WebConnector(LoadConnector):
if link not in visited_links: if link not in visited_links:
to_visit.append(link) to_visit.append(link)
title_tag = soup.find("title") parsed_html = standard_html_cleanup(soup, self.mintlify_cleanup)
title = None
if title_tag and title_tag.text:
title = title_tag.text
title_tag.extract()
# Heuristics based cleaning of elements based on css classes
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
if self.mintlify_cleanup:
unwanted_classes.extend(MINTLIFY_UNWANTED)
for undesired_element in unwanted_classes:
[
tag.extract()
for tag in soup.find_all(
class_=lambda x: x and undesired_element in x.split()
)
]
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
[tag.extract() for tag in soup.find_all(undesired_tag)]
# 200B is ZeroWidthSpace which we don't care for
page_text = format_document_soup(soup).replace("\u200B", "")
doc_batch.append( doc_batch.append(
Document( Document(
id=current_url, id=current_url,
sections=[Section(link=current_url, text=page_text)], sections=[
Section(link=current_url, text=parsed_html.cleaned_text)
],
source=DocumentSource.WEB, source=DocumentSource.WEB,
semantic_identifier=title or current_url, semantic_identifier=parsed_html.title or current_url,
metadata={}, metadata={},
) )
) )

BIN
web/public/GoogleSites.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

@@ -8,7 +8,6 @@ import { fetcher } from "@/lib/fetcher";
import { HealthCheckBanner } from "@/components/health/healthcheck"; import { HealthCheckBanner } from "@/components/health/healthcheck";
import { ConnectorIndexingStatus, FileConfig } from "@/lib/types"; import { ConnectorIndexingStatus, FileConfig } from "@/lib/types";
import { linkCredential } from "@/lib/credential"; import { linkCredential } from "@/lib/credential";
import { FileUpload } from "./FileUpload";
import { useState } from "react"; import { useState } from "react";
import { usePopup } from "@/components/admin/connectors/Popup"; import { usePopup } from "@/components/admin/connectors/Popup";
import { createConnector, runConnector } from "@/lib/connector"; import { createConnector, runConnector } from "@/lib/connector";
@@ -17,6 +16,7 @@ import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/Si
import { LoadingAnimation } from "@/components/Loading"; import { LoadingAnimation } from "@/components/Loading";
import { Form, Formik } from "formik"; import { Form, Formik } from "formik";
import { TextFormField } from "@/components/admin/connectors/Field"; import { TextFormField } from "@/components/admin/connectors/Field";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
const getNameFromPath = (path: string) => { const getNameFromPath = (path: string) => {
const pathParts = path.split("/"); const pathParts = path.split("/");

View File

@@ -0,0 +1,241 @@
"use client";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { LoadingAnimation } from "@/components/Loading";
import { GoogleSitesIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { ConnectorIndexingStatus, GoogleSitesConfig } from "@/lib/types";
import { Form, Formik } from "formik";
import { useState } from "react";
import { usePopup } from "@/components/admin/connectors/Popup";
import { createConnector, runConnector } from "@/lib/connector";
import { linkCredential } from "@/lib/credential";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable";
import { Spinner } from "@/components/Spinner";
export default function GoogleSites() {
const { mutate } = useSWRConfig();
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [filesAreUploading, setFilesAreUploading] = useState<boolean>(false);
const { popup, setPopup } = usePopup();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const googleSitesIndexingStatuses: ConnectorIndexingStatus<
GoogleSitesConfig,
{}
>[] =
connectorIndexingStatuses?.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "google_sites"
) ?? [];
return (
<>
{popup}
{filesAreUploading && <Spinner />}
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b pb-2 mb-4 flex">
<GoogleSitesIcon size={32} />
<h1 className="text-3xl font-bold pl-2">Google Sites</h1>
</div>
<p className="text-sm mb-2">
For an in-depth guide on how to setup this connector, check out{" "}
<a
href="https://docs.danswer.dev/connectors/google-sites"
target="_blank"
className="text-blue-500"
>
the documentation
</a>
.
</p>
<div className="mt-4">
<h2 className="font-bold text-xl mb-2">Upload Files</h2>
<div className="mx-auto w-full">
<Formik
initialValues={{
base_url: "",
}}
validationSchema={Yup.object().shape({
base_url: Yup.string().required("Base URL is required"),
})}
onSubmit={async (values, formikHelpers) => {
const uploadCreateAndTriggerConnector = async () => {
const formData = new FormData();
selectedFiles.forEach((file) => {
formData.append("files", file);
});
const response = await fetch(
"/api/manage/admin/connector/file/upload",
{ method: "POST", body: formData }
);
const responseJson = await response.json();
if (!response.ok) {
setPopup({
message: `Unable to upload files - ${responseJson.detail}`,
type: "error",
});
return;
}
const filePaths = responseJson.file_paths as string[];
const [connectorErrorMsg, connector] =
await createConnector<GoogleSitesConfig>({
name: `GoogleSitesConnector-${values.base_url}`,
source: "google_sites",
input_type: "load_state",
connector_specific_config: {
base_url: values.base_url,
zip_path: filePaths[0],
},
refresh_freq: null,
disabled: false,
});
if (connectorErrorMsg || !connector) {
setPopup({
message: `Unable to create connector - ${connectorErrorMsg}`,
type: "error",
});
return;
}
const credentialResponse = await linkCredential(
connector.id,
0,
values.base_url
);
if (!credentialResponse.ok) {
const credentialResponseJson =
await credentialResponse.json();
setPopup({
message: `Unable to link connector to credential - ${credentialResponseJson.detail}`,
type: "error",
});
return;
}
const runConnectorErrorMsg = await runConnector(
connector.id,
[0]
);
if (runConnectorErrorMsg) {
setPopup({
message: `Unable to run connector - ${runConnectorErrorMsg}`,
type: "error",
});
return;
}
mutate("/api/manage/admin/connector/indexing-status");
setSelectedFiles([]);
formikHelpers.resetForm();
setPopup({
type: "success",
message: "Successfully uploaded files!",
});
};
setFilesAreUploading(true);
try {
await uploadCreateAndTriggerConnector();
} catch (e) {
console.log("Failed to index filels: ", e);
}
setFilesAreUploading(false);
}}
>
{({ values, isSubmitting }) => (
<Form className="p-3 border border-gray-600 rounded">
<TextFormField
name="base_url"
label="Base URL:"
placeholder={`Base URL of your Google Site e.g. https://sites.google.com/view/your-site`}
subtext="This will be used to generate links for each page."
autoCompleteDisabled={true}
/>
<p className="mb-1 font-medium">Files:</p>
<FileUpload
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
message="Upload a zip file containing the HTML of your Google Site"
/>
<button
className={
"bg-slate-500 hover:bg-slate-700 text-white " +
"font-bold py-2 px-4 rounded focus:outline-none " +
"focus:shadow-outline w-full mx-auto mt-4"
}
type="submit"
disabled={
selectedFiles.length !== 1 ||
!values.base_url ||
isSubmitting
}
>
Upload!
</button>
</Form>
)}
</Formik>
</div>
</div>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Existing Google Site Connectors
</h2>
{isConnectorIndexingStatusesLoading ? (
<LoadingAnimation text="Loading" />
) : isConnectorIndexingStatusesError || !connectorIndexingStatuses ? (
<div>Error loading indexing history</div>
) : googleSitesIndexingStatuses.length > 0 ? (
<SingleUseConnectorsTable<GoogleSitesConfig, {}>
connectorIndexingStatuses={googleSitesIndexingStatuses}
specialColumns={[
{
header: "Base URL",
key: "base_url",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return (
<a
className="text-blue-500"
href={connectorConfig.base_url}
>
{connectorConfig.base_url}
</a>
);
},
},
]}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
) : (
<p className="text-sm">No indexed Google Sites found</p>
)}
</div>
</>
);
}

View File

@@ -22,6 +22,7 @@ import {
HubSpotIcon, HubSpotIcon,
BookmarkIcon, BookmarkIcon,
CPUIcon, CPUIcon,
GoogleSitesIcon,
} from "@/components/icons/icons"; } from "@/components/icons/icons";
import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS"; import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
import { redirect } from "next/navigation"; import { redirect } from "next/navigation";
@@ -173,6 +174,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
), ),
link: "/admin/connectors/zulip", link: "/admin/connectors/zulip",
}, },
{
name: (
<div className="flex">
<GoogleSitesIcon size={16} />
<div className="ml-1">Google Sites</div>
</div>
),
link: "/admin/connectors/google-sites",
},
{ {
name: ( name: (
<div className="flex"> <div className="flex">

View File

@@ -1,16 +1,17 @@
// components/FileUpload.tsx import { FC, useState } from "react";
import { ChangeEvent, FC, useState } from "react";
import React from "react"; import React from "react";
import Dropzone from "react-dropzone"; import Dropzone from "react-dropzone";
interface FileUploadProps { interface FileUploadProps {
selectedFiles: File[]; selectedFiles: File[];
setSelectedFiles: (files: File[]) => void; setSelectedFiles: (files: File[]) => void;
message?: string;
} }
export const FileUpload: FC<FileUploadProps> = ({ export const FileUpload: FC<FileUploadProps> = ({
selectedFiles, selectedFiles,
setSelectedFiles, setSelectedFiles,
message,
}) => { }) => {
const [dragActive, setDragActive] = useState(false); const [dragActive, setDragActive] = useState(false);
@@ -35,7 +36,10 @@ export const FileUpload: FC<FileUploadProps> = ({
} }
> >
<input {...getInputProps()} /> <input {...getInputProps()} />
<b>Drag and drop some files here, or click to select files</b> <b>
{message ||
"Drag and drop some files here, or click to select files"}
</b>
</div> </div>
</section> </section>
)} )}

View File

@@ -1,10 +1,4 @@
import { import { DeletionAttemptSnapshot, ValidStatuses } from "@/lib/types";
Connector,
ConnectorIndexingStatus,
Credential,
DeletionAttemptSnapshot,
ValidStatuses,
} from "@/lib/types";
import { BasicTable } from "@/components/admin/connectors/BasicTable"; import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { Popup } from "@/components/admin/connectors/Popup"; import { Popup } from "@/components/admin/connectors/Popup";
import { useState } from "react"; import { useState } from "react";
@@ -64,17 +58,19 @@ export function SingleUseConnectorsTable<
const connectorIncludesCredential = const connectorIncludesCredential =
getCredential !== undefined && onCredentialLink !== undefined; getCredential !== undefined && onCredentialLink !== undefined;
const columns = [ const columns = [];
{
if (includeName) {
columns.push({
header: "Name", header: "Name",
key: "name", key: "name",
}, });
...(specialColumns ?? []), }
{ columns.push(...(specialColumns ?? []));
header: "Status", columns.push({
key: "status", header: "Status",
}, key: "status",
]; });
if (connectorIncludesCredential) { if (connectorIncludesCredential) {
columns.push({ columns.push({
header: "Credential", header: "Credential",

View File

@@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
import zulipIcon from "../../../public/Zulip.png"; import zulipIcon from "../../../public/Zulip.png";
import linearIcon from "../../../public/Linear.png"; import linearIcon from "../../../public/Linear.png";
import hubSpotIcon from "../../../public/HubSpot.png"; import hubSpotIcon from "../../../public/HubSpot.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
interface IconProps { interface IconProps {
size?: number; size?: number;
@@ -450,3 +451,17 @@ export const HubSpotIcon = ({
</div> </div>
); );
}; };
export const GoogleSitesIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={googleSitesIcon} alt="Logo" width="96" height="96" />
</div>
);
};

View File

@@ -29,6 +29,7 @@ const sources: Source[] = [
{ displayName: "Zulip", internalName: "zulip" }, { displayName: "Zulip", internalName: "zulip" },
{ displayName: "Linear", internalName: "linear" }, { displayName: "Linear", internalName: "linear" },
{ displayName: "HubSpot", internalName: "hubspot" }, { displayName: "HubSpot", internalName: "hubspot" },
{ displayName: "Google Sites", internalName: "google_sites" },
]; ];
interface SourceSelectorProps { interface SourceSelectorProps {

View File

@@ -16,6 +16,7 @@ import {
SlackIcon, SlackIcon,
ZulipIcon, ZulipIcon,
HubSpotIcon, HubSpotIcon,
GoogleSitesIcon,
} from "./icons/icons"; } from "./icons/icons";
interface SourceMetadata { interface SourceMetadata {
@@ -122,6 +123,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "HubSpot", displayName: "HubSpot",
adminPageLink: "/admin/connectors/hubspot", adminPageLink: "/admin/connectors/hubspot",
}; };
case "google_sites":
return {
icon: GoogleSitesIcon,
displayName: "Google Sites",
adminPageLink: "/admin/connectors/google-sites",
};
default: default:
throw new Error("Invalid source type"); throw new Error("Invalid source type");
} }

View File

@@ -23,7 +23,8 @@ export type ValidSources =
| "zulip" | "zulip"
| "linear" | "linear"
| "hubspot" | "hubspot"
| "file"; | "file"
| "google_sites";
export type ValidInputTypes = "load_state" | "poll" | "event"; export type ValidInputTypes = "load_state" | "poll" | "event";
export type ValidStatuses = export type ValidStatuses =
| "success" | "success"
@@ -114,6 +115,11 @@ export interface NotionConfig {}
export interface HubSpotConfig {} export interface HubSpotConfig {}
export interface GoogleSitesConfig {
zip_path: string;
base_url: string;
}
export interface IndexAttemptSnapshot { export interface IndexAttemptSnapshot {
status: ValidStatuses | null; status: ValidStatuses | null;
num_docs_indexed: number; num_docs_indexed: number;