Add Google Sites connector (#532)

This commit is contained in:
Chris Weaver
2023-10-08 19:20:38 -07:00
committed by GitHub
parent fb1fbbee5c
commit d95da554ea
17 changed files with 561 additions and 77 deletions

View File

@@ -61,6 +61,7 @@ class DocumentSource(str, Enum):
LINEAR = "linear"
HUBSPOT = "hubspot"
GONG = "gong"
GOOGLE_SITES = "google_sites"
class DocumentIndexType(str, Enum):

View File

@@ -0,0 +1,49 @@
import json
import os
import zipfile
from collections.abc import Generator
from pathlib import Path
from typing import Any
from typing import IO
_METADATA_FLAG = "#DANSWER_METADATA="
def is_macos_resource_fork_file(file_name: str) -> bool:
return os.path.basename(file_name).startswith("._") and file_name.startswith(
"__MACOSX"
)
def load_files_from_zip(
zip_location: str | Path,
ignore_macos_resource_fork_files: bool = True,
ignore_dirs: bool = True,
) -> Generator[tuple[zipfile.ZipInfo, IO[Any]], None, None]:
with zipfile.ZipFile(zip_location, "r") as zip_file:
for file_info in zip_file.infolist():
with zip_file.open(file_info.filename, "r") as file:
if ignore_dirs and file_info.is_dir():
continue
if ignore_macos_resource_fork_files and is_macos_resource_fork_file(
file_info.filename
):
continue
yield file_info, file
def read_file(file_reader: IO[Any]) -> tuple[str, dict[str, Any]]:
metadata = {}
file_content_raw = ""
for ind, line in enumerate(file_reader):
if isinstance(line, bytes):
line = line.decode("utf-8")
line = str(line)
if ind == 0 and line.startswith(_METADATA_FLAG):
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
else:
file_content_raw += line
return file_content_raw, metadata

View File

@@ -0,0 +1,57 @@
from copy import copy
from dataclasses import dataclass
from bs4 import BeautifulSoup
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
from danswer.utils.text_processing import format_document_soup
MINTLIFY_UNWANTED = ["sticky", "hidden"]
@dataclass
class ParsedHTML:
title: str | None
cleaned_text: str
def standard_html_cleanup(
page_content: str | BeautifulSoup,
mintlify_cleanup_enabled: bool = True,
additional_element_types_to_discard: list[str] | None = None,
) -> ParsedHTML:
if isinstance(page_content, str):
soup = BeautifulSoup(page_content, "html.parser")
else:
soup = page_content
title_tag = soup.find("title")
title = None
if title_tag and title_tag.text:
title = title_tag.text
title_tag.extract()
# Heuristics based cleaning of elements based on css classes
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
if mintlify_cleanup_enabled:
unwanted_classes.extend(MINTLIFY_UNWANTED)
for undesired_element in unwanted_classes:
[
tag.extract()
for tag in soup.find_all(
class_=lambda x: x and undesired_element in x.split()
)
]
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
[tag.extract() for tag in soup.find_all(undesired_tag)]
if additional_element_types_to_discard:
for undesired_tag in additional_element_types_to_discard:
[tag.extract() for tag in soup.find_all(undesired_tag)]
# 200B is ZeroWidthSpace which we don't care for
page_text = format_document_soup(soup).replace("\u200B", "")
return ParsedHTML(title=title, cleaned_text=page_text)

View File

@@ -9,6 +9,7 @@ from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.gong.connector import GongConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.google_site.connector import GoogleSitesConnector
from danswer.connectors.guru.connector import GuruConnector
from danswer.connectors.hubspot.connector import HubSpotConnector
from danswer.connectors.interfaces import BaseConnector
@@ -54,6 +55,7 @@ def identify_connector_class(
DocumentSource.LINEAR: LinearConnector,
DocumentSource.HUBSPOT: HubSpotConnector,
DocumentSource.GONG: GongConnector,
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
}
connector_by_source = connector_map.get(source, {})

View File

@@ -1,6 +1,4 @@
import json
import os
import zipfile
from collections.abc import Generator
from pathlib import Path
from typing import Any
@@ -10,6 +8,8 @@ from PyPDF2 import PdfReader
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
from danswer.connectors.cross_connector_utils.file_utils import read_file
from danswer.connectors.file.utils import check_file_ext_is_valid
from danswer.connectors.file.utils import get_file_ext
from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -21,17 +21,6 @@ from danswer.utils.logger import setup_logger
logger = setup_logger()
_METADATA_FLAG = "#DANSWER_METADATA="
def _get_files_from_zip(
zip_location: str | Path,
) -> Generator[tuple[str, IO[Any]], None, None]:
with zipfile.ZipFile(zip_location, "r") as zip_file:
for file_name in zip_file.namelist():
with zip_file.open(file_name, "r") as file:
yield os.path.basename(file_name), file
def _open_files_at_location(
file_path: str | Path,
@@ -39,7 +28,8 @@ def _open_files_at_location(
extension = get_file_ext(file_path)
if extension == ".zip":
yield from _get_files_from_zip(file_path)
for file_info, file in load_files_from_zip(file_path, ignore_dirs=True):
yield file_info.filename, file
elif extension == ".txt" or extension == ".pdf":
mode = "r"
if extension == ".pdf":
@@ -56,7 +46,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
return []
metadata = {}
metadata: dict[str, Any] = {}
file_content_raw = ""
if extension == ".pdf":
pdf_reader = PdfReader(file)
@@ -65,15 +55,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
page.extract_text() for page in pdf_reader.pages
)
else:
for ind, line in enumerate(file):
if isinstance(line, bytes):
line = line.decode("utf-8")
line = str(line)
if ind == 0 and line.startswith(_METADATA_FLAG):
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
else:
file_content_raw += line
file_content_raw, metadata = read_file(file)
return [
Document(

View File

@@ -0,0 +1,139 @@
import os
import urllib.parse
from typing import Any
from typing import cast
from bs4 import BeautifulSoup
from bs4 import Tag
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
from danswer.connectors.cross_connector_utils.file_utils import read_file
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
def process_link(element: BeautifulSoup | Tag) -> str:
href = cast(str | None, element.get("href"))
if not href:
raise RuntimeError(f"Invalid link - {element}")
# cleanup href
href = urllib.parse.unquote(href)
href = href.rstrip(".html").lower()
href = href.replace("_", "")
href = href.replace(" ", "-")
return href
def find_google_sites_page_path_from_navbar(
element: BeautifulSoup | Tag, path: str, is_initial: bool
) -> str | None:
ul = cast(Tag | None, element.find("ul"))
if ul:
if not is_initial:
a = cast(Tag, element.find("a"))
new_path = f"{path}/{process_link(a)}"
if a.get("aria-selected") == "true":
return new_path
else:
new_path = ""
for li in ul.find_all("li", recursive=False):
found_link = find_google_sites_page_path_from_navbar(li, new_path, False)
if found_link:
return found_link
else:
a = cast(Tag, element.find("a"))
if a:
href = process_link(a)
if href and a.get("aria-selected") == "true":
return path + "/" + href
return None
class GoogleSitesConnector(LoadConnector):
def __init__(
self,
zip_path: str,
base_url: str,
batch_size: int = INDEX_BATCH_SIZE,
):
self.zip_path = zip_path
self.base_url = base_url
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
pass
def load_from_state(self) -> GenerateDocumentsOutput:
documents: list[Document] = []
# load the HTML files
files = load_files_from_zip(self.zip_path)
for file_info, file_io in files:
# skip non-published files
if "/PUBLISHED/" not in file_info.filename:
continue
file_path, extension = os.path.splitext(file_info.filename)
if extension != ".html":
continue
file_content, _ = read_file(file_io)
soup = BeautifulSoup(file_content, "html.parser")
# get the link out of the navbar
header = cast(Tag, soup.find("header"))
nav = cast(Tag, header.find("nav"))
path = find_google_sites_page_path_from_navbar(nav, "", True)
if not path:
raise RuntimeError(f"Could not find path for {file_info.filename}")
# cleanup the hidden `Skip to main content` and `Skip to navigation` that
# appears at the top of every page
for div in soup.find_all("div", attrs={"data-is-touch-wrapper": "true"}):
div.extract()
# get the body of the page
parsed_html = standard_html_cleanup(
soup, additional_element_types_to_discard=["header", "nav"]
)
title = parsed_html.title or file_path.split("/")[-1]
documents.append(
Document(
id=f"{DocumentSource.GOOGLE_SITES.value}:{path}",
source=DocumentSource.GOOGLE_SITES,
semantic_identifier=title,
sections=[
Section(
link=self.base_url.rstrip("/") + "/" + path.lstrip("/"),
text=parsed_html.cleaned_text,
)
],
metadata={},
)
)
if len(documents) >= self.batch_size:
yield documents
documents = []
if documents:
yield documents
if __name__ == "__main__":
connector = GoogleSitesConnector(
os.environ["GOOGLE_SITES_ZIP_PATH"],
os.environ.get("GOOGLE_SITES_BASE_URL", ""),
)
for doc_batch in connector.load_from_state():
for doc in doc_batch:
print(doc)

View File

@@ -1,5 +1,4 @@
import io
from copy import copy
from datetime import datetime
from enum import Enum
from typing import Any
@@ -18,25 +17,20 @@ from PyPDF2 import PdfReader
from requests_oauthlib import OAuth2Session # type:ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from danswer.utils.text_processing import format_document_soup
logger = setup_logger()
MINTLIFY_UNWANTED = ["sticky", "hidden"]
class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
# Given a base site, index everything under that path
RECURSIVE = "recursive"
@@ -224,36 +218,16 @@ class WebConnector(LoadConnector):
if link not in visited_links:
to_visit.append(link)
title_tag = soup.find("title")
title = None
if title_tag and title_tag.text:
title = title_tag.text
title_tag.extract()
# Heuristics based cleaning of elements based on css classes
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
if self.mintlify_cleanup:
unwanted_classes.extend(MINTLIFY_UNWANTED)
for undesired_element in unwanted_classes:
[
tag.extract()
for tag in soup.find_all(
class_=lambda x: x and undesired_element in x.split()
)
]
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
[tag.extract() for tag in soup.find_all(undesired_tag)]
# 200B is ZeroWidthSpace which we don't care for
page_text = format_document_soup(soup).replace("\u200B", "")
parsed_html = standard_html_cleanup(soup, self.mintlify_cleanup)
doc_batch.append(
Document(
id=current_url,
sections=[Section(link=current_url, text=page_text)],
sections=[
Section(link=current_url, text=parsed_html.cleaned_text)
],
source=DocumentSource.WEB,
semantic_identifier=title or current_url,
semantic_identifier=parsed_html.title or current_url,
metadata={},
)
)

BIN
web/public/GoogleSites.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

@@ -8,7 +8,6 @@ import { fetcher } from "@/lib/fetcher";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { ConnectorIndexingStatus, FileConfig } from "@/lib/types";
import { linkCredential } from "@/lib/credential";
import { FileUpload } from "./FileUpload";
import { useState } from "react";
import { usePopup } from "@/components/admin/connectors/Popup";
import { createConnector, runConnector } from "@/lib/connector";
@@ -17,6 +16,7 @@ import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/Si
import { LoadingAnimation } from "@/components/Loading";
import { Form, Formik } from "formik";
import { TextFormField } from "@/components/admin/connectors/Field";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
const getNameFromPath = (path: string) => {
const pathParts = path.split("/");

View File

@@ -0,0 +1,241 @@
"use client";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { LoadingAnimation } from "@/components/Loading";
import { GoogleSitesIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { ConnectorIndexingStatus, GoogleSitesConfig } from "@/lib/types";
import { Form, Formik } from "formik";
import { useState } from "react";
import { usePopup } from "@/components/admin/connectors/Popup";
import { createConnector, runConnector } from "@/lib/connector";
import { linkCredential } from "@/lib/credential";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable";
import { Spinner } from "@/components/Spinner";
export default function GoogleSites() {
const { mutate } = useSWRConfig();
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [filesAreUploading, setFilesAreUploading] = useState<boolean>(false);
const { popup, setPopup } = usePopup();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const googleSitesIndexingStatuses: ConnectorIndexingStatus<
GoogleSitesConfig,
{}
>[] =
connectorIndexingStatuses?.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "google_sites"
) ?? [];
return (
<>
{popup}
{filesAreUploading && <Spinner />}
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b pb-2 mb-4 flex">
<GoogleSitesIcon size={32} />
<h1 className="text-3xl font-bold pl-2">Google Sites</h1>
</div>
<p className="text-sm mb-2">
For an in-depth guide on how to setup this connector, check out{" "}
<a
href="https://docs.danswer.dev/connectors/google-sites"
target="_blank"
className="text-blue-500"
>
the documentation
</a>
.
</p>
<div className="mt-4">
<h2 className="font-bold text-xl mb-2">Upload Files</h2>
<div className="mx-auto w-full">
<Formik
initialValues={{
base_url: "",
}}
validationSchema={Yup.object().shape({
base_url: Yup.string().required("Base URL is required"),
})}
onSubmit={async (values, formikHelpers) => {
const uploadCreateAndTriggerConnector = async () => {
const formData = new FormData();
selectedFiles.forEach((file) => {
formData.append("files", file);
});
const response = await fetch(
"/api/manage/admin/connector/file/upload",
{ method: "POST", body: formData }
);
const responseJson = await response.json();
if (!response.ok) {
setPopup({
message: `Unable to upload files - ${responseJson.detail}`,
type: "error",
});
return;
}
const filePaths = responseJson.file_paths as string[];
const [connectorErrorMsg, connector] =
await createConnector<GoogleSitesConfig>({
name: `GoogleSitesConnector-${values.base_url}`,
source: "google_sites",
input_type: "load_state",
connector_specific_config: {
base_url: values.base_url,
zip_path: filePaths[0],
},
refresh_freq: null,
disabled: false,
});
if (connectorErrorMsg || !connector) {
setPopup({
message: `Unable to create connector - ${connectorErrorMsg}`,
type: "error",
});
return;
}
const credentialResponse = await linkCredential(
connector.id,
0,
values.base_url
);
if (!credentialResponse.ok) {
const credentialResponseJson =
await credentialResponse.json();
setPopup({
message: `Unable to link connector to credential - ${credentialResponseJson.detail}`,
type: "error",
});
return;
}
const runConnectorErrorMsg = await runConnector(
connector.id,
[0]
);
if (runConnectorErrorMsg) {
setPopup({
message: `Unable to run connector - ${runConnectorErrorMsg}`,
type: "error",
});
return;
}
mutate("/api/manage/admin/connector/indexing-status");
setSelectedFiles([]);
formikHelpers.resetForm();
setPopup({
type: "success",
message: "Successfully uploaded files!",
});
};
setFilesAreUploading(true);
try {
await uploadCreateAndTriggerConnector();
} catch (e) {
console.log("Failed to index filels: ", e);
}
setFilesAreUploading(false);
}}
>
{({ values, isSubmitting }) => (
<Form className="p-3 border border-gray-600 rounded">
<TextFormField
name="base_url"
label="Base URL:"
placeholder={`Base URL of your Google Site e.g. https://sites.google.com/view/your-site`}
subtext="This will be used to generate links for each page."
autoCompleteDisabled={true}
/>
<p className="mb-1 font-medium">Files:</p>
<FileUpload
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
message="Upload a zip file containing the HTML of your Google Site"
/>
<button
className={
"bg-slate-500 hover:bg-slate-700 text-white " +
"font-bold py-2 px-4 rounded focus:outline-none " +
"focus:shadow-outline w-full mx-auto mt-4"
}
type="submit"
disabled={
selectedFiles.length !== 1 ||
!values.base_url ||
isSubmitting
}
>
Upload!
</button>
</Form>
)}
</Formik>
</div>
</div>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Existing Google Site Connectors
</h2>
{isConnectorIndexingStatusesLoading ? (
<LoadingAnimation text="Loading" />
) : isConnectorIndexingStatusesError || !connectorIndexingStatuses ? (
<div>Error loading indexing history</div>
) : googleSitesIndexingStatuses.length > 0 ? (
<SingleUseConnectorsTable<GoogleSitesConfig, {}>
connectorIndexingStatuses={googleSitesIndexingStatuses}
specialColumns={[
{
header: "Base URL",
key: "base_url",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return (
<a
className="text-blue-500"
href={connectorConfig.base_url}
>
{connectorConfig.base_url}
</a>
);
},
},
]}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
) : (
<p className="text-sm">No indexed Google Sites found</p>
)}
</div>
</>
);
}

View File

@@ -22,6 +22,7 @@ import {
HubSpotIcon,
BookmarkIcon,
CPUIcon,
GoogleSitesIcon,
} from "@/components/icons/icons";
import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
import { redirect } from "next/navigation";
@@ -173,6 +174,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
),
link: "/admin/connectors/zulip",
},
{
name: (
<div className="flex">
<GoogleSitesIcon size={16} />
<div className="ml-1">Google Sites</div>
</div>
),
link: "/admin/connectors/google-sites",
},
{
name: (
<div className="flex">

View File

@@ -1,16 +1,17 @@
// components/FileUpload.tsx
import { ChangeEvent, FC, useState } from "react";
import { FC, useState } from "react";
import React from "react";
import Dropzone from "react-dropzone";
interface FileUploadProps {
selectedFiles: File[];
setSelectedFiles: (files: File[]) => void;
message?: string;
}
export const FileUpload: FC<FileUploadProps> = ({
selectedFiles,
setSelectedFiles,
message,
}) => {
const [dragActive, setDragActive] = useState(false);
@@ -35,7 +36,10 @@ export const FileUpload: FC<FileUploadProps> = ({
}
>
<input {...getInputProps()} />
<b>Drag and drop some files here, or click to select files</b>
<b>
{message ||
"Drag and drop some files here, or click to select files"}
</b>
</div>
</section>
)}

View File

@@ -1,10 +1,4 @@
import {
Connector,
ConnectorIndexingStatus,
Credential,
DeletionAttemptSnapshot,
ValidStatuses,
} from "@/lib/types";
import { DeletionAttemptSnapshot, ValidStatuses } from "@/lib/types";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { Popup } from "@/components/admin/connectors/Popup";
import { useState } from "react";
@@ -64,17 +58,19 @@ export function SingleUseConnectorsTable<
const connectorIncludesCredential =
getCredential !== undefined && onCredentialLink !== undefined;
const columns = [
{
const columns = [];
if (includeName) {
columns.push({
header: "Name",
key: "name",
},
...(specialColumns ?? []),
{
header: "Status",
key: "status",
},
];
});
}
columns.push(...(specialColumns ?? []));
columns.push({
header: "Status",
key: "status",
});
if (connectorIncludesCredential) {
columns.push({
header: "Credential",

View File

@@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
import zulipIcon from "../../../public/Zulip.png";
import linearIcon from "../../../public/Linear.png";
import hubSpotIcon from "../../../public/HubSpot.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
interface IconProps {
size?: number;
@@ -450,3 +451,17 @@ export const HubSpotIcon = ({
</div>
);
};
export const GoogleSitesIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => {
return (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={googleSitesIcon} alt="Logo" width="96" height="96" />
</div>
);
};

View File

@@ -29,6 +29,7 @@ const sources: Source[] = [
{ displayName: "Zulip", internalName: "zulip" },
{ displayName: "Linear", internalName: "linear" },
{ displayName: "HubSpot", internalName: "hubspot" },
{ displayName: "Google Sites", internalName: "google_sites" },
];
interface SourceSelectorProps {

View File

@@ -16,6 +16,7 @@ import {
SlackIcon,
ZulipIcon,
HubSpotIcon,
GoogleSitesIcon,
} from "./icons/icons";
interface SourceMetadata {
@@ -122,6 +123,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "HubSpot",
adminPageLink: "/admin/connectors/hubspot",
};
case "google_sites":
return {
icon: GoogleSitesIcon,
displayName: "Google Sites",
adminPageLink: "/admin/connectors/google-sites",
};
default:
throw new Error("Invalid source type");
}

View File

@@ -23,7 +23,8 @@ export type ValidSources =
| "zulip"
| "linear"
| "hubspot"
| "file";
| "file"
| "google_sites";
export type ValidInputTypes = "load_state" | "poll" | "event";
export type ValidStatuses =
| "success"
@@ -114,6 +115,11 @@ export interface NotionConfig {}
export interface HubSpotConfig {}
export interface GoogleSitesConfig {
zip_path: string;
base_url: string;
}
export interface IndexAttemptSnapshot {
status: ValidStatuses | null;
num_docs_indexed: number;