mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-26 20:08:38 +02:00
Add Google Sites connector (#532)
This commit is contained in:
@@ -61,6 +61,7 @@ class DocumentSource(str, Enum):
|
||||
LINEAR = "linear"
|
||||
HUBSPOT = "hubspot"
|
||||
GONG = "gong"
|
||||
GOOGLE_SITES = "google_sites"
|
||||
|
||||
|
||||
class DocumentIndexType(str, Enum):
|
||||
|
@@ -0,0 +1,49 @@
|
||||
import json
|
||||
import os
|
||||
import zipfile
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import IO
|
||||
|
||||
_METADATA_FLAG = "#DANSWER_METADATA="
|
||||
|
||||
|
||||
def is_macos_resource_fork_file(file_name: str) -> bool:
|
||||
return os.path.basename(file_name).startswith("._") and file_name.startswith(
|
||||
"__MACOSX"
|
||||
)
|
||||
|
||||
|
||||
def load_files_from_zip(
|
||||
zip_location: str | Path,
|
||||
ignore_macos_resource_fork_files: bool = True,
|
||||
ignore_dirs: bool = True,
|
||||
) -> Generator[tuple[zipfile.ZipInfo, IO[Any]], None, None]:
|
||||
with zipfile.ZipFile(zip_location, "r") as zip_file:
|
||||
for file_info in zip_file.infolist():
|
||||
with zip_file.open(file_info.filename, "r") as file:
|
||||
if ignore_dirs and file_info.is_dir():
|
||||
continue
|
||||
|
||||
if ignore_macos_resource_fork_files and is_macos_resource_fork_file(
|
||||
file_info.filename
|
||||
):
|
||||
continue
|
||||
yield file_info, file
|
||||
|
||||
|
||||
def read_file(file_reader: IO[Any]) -> tuple[str, dict[str, Any]]:
|
||||
metadata = {}
|
||||
file_content_raw = ""
|
||||
for ind, line in enumerate(file_reader):
|
||||
if isinstance(line, bytes):
|
||||
line = line.decode("utf-8")
|
||||
line = str(line)
|
||||
|
||||
if ind == 0 and line.startswith(_METADATA_FLAG):
|
||||
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
|
||||
else:
|
||||
file_content_raw += line
|
||||
|
||||
return file_content_raw, metadata
|
@@ -0,0 +1,57 @@
|
||||
from copy import copy
|
||||
from dataclasses import dataclass
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
|
||||
from danswer.utils.text_processing import format_document_soup
|
||||
|
||||
MINTLIFY_UNWANTED = ["sticky", "hidden"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedHTML:
|
||||
title: str | None
|
||||
cleaned_text: str
|
||||
|
||||
|
||||
def standard_html_cleanup(
|
||||
page_content: str | BeautifulSoup,
|
||||
mintlify_cleanup_enabled: bool = True,
|
||||
additional_element_types_to_discard: list[str] | None = None,
|
||||
) -> ParsedHTML:
|
||||
if isinstance(page_content, str):
|
||||
soup = BeautifulSoup(page_content, "html.parser")
|
||||
else:
|
||||
soup = page_content
|
||||
|
||||
title_tag = soup.find("title")
|
||||
title = None
|
||||
if title_tag and title_tag.text:
|
||||
title = title_tag.text
|
||||
title_tag.extract()
|
||||
|
||||
# Heuristics based cleaning of elements based on css classes
|
||||
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
|
||||
if mintlify_cleanup_enabled:
|
||||
unwanted_classes.extend(MINTLIFY_UNWANTED)
|
||||
for undesired_element in unwanted_classes:
|
||||
[
|
||||
tag.extract()
|
||||
for tag in soup.find_all(
|
||||
class_=lambda x: x and undesired_element in x.split()
|
||||
)
|
||||
]
|
||||
|
||||
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
|
||||
[tag.extract() for tag in soup.find_all(undesired_tag)]
|
||||
|
||||
if additional_element_types_to_discard:
|
||||
for undesired_tag in additional_element_types_to_discard:
|
||||
[tag.extract() for tag in soup.find_all(undesired_tag)]
|
||||
|
||||
# 200B is ZeroWidthSpace which we don't care for
|
||||
page_text = format_document_soup(soup).replace("\u200B", "")
|
||||
|
||||
return ParsedHTML(title=title, cleaned_text=page_text)
|
@@ -9,6 +9,7 @@ from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
from danswer.connectors.gong.connector import GongConnector
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.google_site.connector import GoogleSitesConnector
|
||||
from danswer.connectors.guru.connector import GuruConnector
|
||||
from danswer.connectors.hubspot.connector import HubSpotConnector
|
||||
from danswer.connectors.interfaces import BaseConnector
|
||||
@@ -54,6 +55,7 @@ def identify_connector_class(
|
||||
DocumentSource.LINEAR: LinearConnector,
|
||||
DocumentSource.HUBSPOT: HubSpotConnector,
|
||||
DocumentSource.GONG: GongConnector,
|
||||
DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
|
||||
}
|
||||
connector_by_source = connector_map.get(source, {})
|
||||
|
||||
|
@@ -1,6 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import zipfile
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@@ -10,6 +8,8 @@ from PyPDF2 import PdfReader
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
|
||||
from danswer.connectors.cross_connector_utils.file_utils import read_file
|
||||
from danswer.connectors.file.utils import check_file_ext_is_valid
|
||||
from danswer.connectors.file.utils import get_file_ext
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
@@ -21,17 +21,6 @@ from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_METADATA_FLAG = "#DANSWER_METADATA="
|
||||
|
||||
|
||||
def _get_files_from_zip(
|
||||
zip_location: str | Path,
|
||||
) -> Generator[tuple[str, IO[Any]], None, None]:
|
||||
with zipfile.ZipFile(zip_location, "r") as zip_file:
|
||||
for file_name in zip_file.namelist():
|
||||
with zip_file.open(file_name, "r") as file:
|
||||
yield os.path.basename(file_name), file
|
||||
|
||||
|
||||
def _open_files_at_location(
|
||||
file_path: str | Path,
|
||||
@@ -39,7 +28,8 @@ def _open_files_at_location(
|
||||
extension = get_file_ext(file_path)
|
||||
|
||||
if extension == ".zip":
|
||||
yield from _get_files_from_zip(file_path)
|
||||
for file_info, file in load_files_from_zip(file_path, ignore_dirs=True):
|
||||
yield file_info.filename, file
|
||||
elif extension == ".txt" or extension == ".pdf":
|
||||
mode = "r"
|
||||
if extension == ".pdf":
|
||||
@@ -56,7 +46,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
|
||||
logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
|
||||
return []
|
||||
|
||||
metadata = {}
|
||||
metadata: dict[str, Any] = {}
|
||||
file_content_raw = ""
|
||||
if extension == ".pdf":
|
||||
pdf_reader = PdfReader(file)
|
||||
@@ -65,15 +55,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
|
||||
page.extract_text() for page in pdf_reader.pages
|
||||
)
|
||||
else:
|
||||
for ind, line in enumerate(file):
|
||||
if isinstance(line, bytes):
|
||||
line = line.decode("utf-8")
|
||||
line = str(line)
|
||||
|
||||
if ind == 0 and line.startswith(_METADATA_FLAG):
|
||||
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
|
||||
else:
|
||||
file_content_raw += line
|
||||
file_content_raw, metadata = read_file(file)
|
||||
|
||||
return [
|
||||
Document(
|
||||
|
139
backend/danswer/connectors/google_site/connector.py
Normal file
139
backend/danswer/connectors/google_site/connector.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import os
|
||||
import urllib.parse
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import Tag
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
|
||||
from danswer.connectors.cross_connector_utils.file_utils import read_file
|
||||
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
|
||||
|
||||
def process_link(element: BeautifulSoup | Tag) -> str:
|
||||
href = cast(str | None, element.get("href"))
|
||||
if not href:
|
||||
raise RuntimeError(f"Invalid link - {element}")
|
||||
|
||||
# cleanup href
|
||||
href = urllib.parse.unquote(href)
|
||||
href = href.rstrip(".html").lower()
|
||||
href = href.replace("_", "")
|
||||
href = href.replace(" ", "-")
|
||||
|
||||
return href
|
||||
|
||||
|
||||
def find_google_sites_page_path_from_navbar(
|
||||
element: BeautifulSoup | Tag, path: str, is_initial: bool
|
||||
) -> str | None:
|
||||
ul = cast(Tag | None, element.find("ul"))
|
||||
if ul:
|
||||
if not is_initial:
|
||||
a = cast(Tag, element.find("a"))
|
||||
new_path = f"{path}/{process_link(a)}"
|
||||
if a.get("aria-selected") == "true":
|
||||
return new_path
|
||||
else:
|
||||
new_path = ""
|
||||
for li in ul.find_all("li", recursive=False):
|
||||
found_link = find_google_sites_page_path_from_navbar(li, new_path, False)
|
||||
if found_link:
|
||||
return found_link
|
||||
else:
|
||||
a = cast(Tag, element.find("a"))
|
||||
if a:
|
||||
href = process_link(a)
|
||||
if href and a.get("aria-selected") == "true":
|
||||
return path + "/" + href
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class GoogleSitesConnector(LoadConnector):
|
||||
def __init__(
|
||||
self,
|
||||
zip_path: str,
|
||||
base_url: str,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
):
|
||||
self.zip_path = zip_path
|
||||
self.base_url = base_url
|
||||
self.batch_size = batch_size
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
pass
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
documents: list[Document] = []
|
||||
|
||||
# load the HTML files
|
||||
files = load_files_from_zip(self.zip_path)
|
||||
for file_info, file_io in files:
|
||||
# skip non-published files
|
||||
if "/PUBLISHED/" not in file_info.filename:
|
||||
continue
|
||||
|
||||
file_path, extension = os.path.splitext(file_info.filename)
|
||||
if extension != ".html":
|
||||
continue
|
||||
|
||||
file_content, _ = read_file(file_io)
|
||||
soup = BeautifulSoup(file_content, "html.parser")
|
||||
|
||||
# get the link out of the navbar
|
||||
header = cast(Tag, soup.find("header"))
|
||||
nav = cast(Tag, header.find("nav"))
|
||||
path = find_google_sites_page_path_from_navbar(nav, "", True)
|
||||
if not path:
|
||||
raise RuntimeError(f"Could not find path for {file_info.filename}")
|
||||
|
||||
# cleanup the hidden `Skip to main content` and `Skip to navigation` that
|
||||
# appears at the top of every page
|
||||
for div in soup.find_all("div", attrs={"data-is-touch-wrapper": "true"}):
|
||||
div.extract()
|
||||
|
||||
# get the body of the page
|
||||
parsed_html = standard_html_cleanup(
|
||||
soup, additional_element_types_to_discard=["header", "nav"]
|
||||
)
|
||||
|
||||
title = parsed_html.title or file_path.split("/")[-1]
|
||||
documents.append(
|
||||
Document(
|
||||
id=f"{DocumentSource.GOOGLE_SITES.value}:{path}",
|
||||
source=DocumentSource.GOOGLE_SITES,
|
||||
semantic_identifier=title,
|
||||
sections=[
|
||||
Section(
|
||||
link=self.base_url.rstrip("/") + "/" + path.lstrip("/"),
|
||||
text=parsed_html.cleaned_text,
|
||||
)
|
||||
],
|
||||
metadata={},
|
||||
)
|
||||
)
|
||||
|
||||
if len(documents) >= self.batch_size:
|
||||
yield documents
|
||||
documents = []
|
||||
|
||||
if documents:
|
||||
yield documents
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
connector = GoogleSitesConnector(
|
||||
os.environ["GOOGLE_SITES_ZIP_PATH"],
|
||||
os.environ.get("GOOGLE_SITES_BASE_URL", ""),
|
||||
)
|
||||
for doc_batch in connector.load_from_state():
|
||||
for doc in doc_batch:
|
||||
print(doc)
|
@@ -1,5 +1,4 @@
|
||||
import io
|
||||
from copy import copy
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
@@ -18,25 +17,20 @@ from PyPDF2 import PdfReader
|
||||
from requests_oauthlib import OAuth2Session # type:ignore
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
|
||||
from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.text_processing import format_document_soup
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
MINTLIFY_UNWANTED = ["sticky", "hidden"]
|
||||
|
||||
|
||||
class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
|
||||
# Given a base site, index everything under that path
|
||||
RECURSIVE = "recursive"
|
||||
@@ -224,36 +218,16 @@ class WebConnector(LoadConnector):
|
||||
if link not in visited_links:
|
||||
to_visit.append(link)
|
||||
|
||||
title_tag = soup.find("title")
|
||||
title = None
|
||||
if title_tag and title_tag.text:
|
||||
title = title_tag.text
|
||||
title_tag.extract()
|
||||
|
||||
# Heuristics based cleaning of elements based on css classes
|
||||
unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
|
||||
if self.mintlify_cleanup:
|
||||
unwanted_classes.extend(MINTLIFY_UNWANTED)
|
||||
for undesired_element in unwanted_classes:
|
||||
[
|
||||
tag.extract()
|
||||
for tag in soup.find_all(
|
||||
class_=lambda x: x and undesired_element in x.split()
|
||||
)
|
||||
]
|
||||
|
||||
for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
|
||||
[tag.extract() for tag in soup.find_all(undesired_tag)]
|
||||
|
||||
# 200B is ZeroWidthSpace which we don't care for
|
||||
page_text = format_document_soup(soup).replace("\u200B", "")
|
||||
parsed_html = standard_html_cleanup(soup, self.mintlify_cleanup)
|
||||
|
||||
doc_batch.append(
|
||||
Document(
|
||||
id=current_url,
|
||||
sections=[Section(link=current_url, text=page_text)],
|
||||
sections=[
|
||||
Section(link=current_url, text=parsed_html.cleaned_text)
|
||||
],
|
||||
source=DocumentSource.WEB,
|
||||
semantic_identifier=title or current_url,
|
||||
semantic_identifier=parsed_html.title or current_url,
|
||||
metadata={},
|
||||
)
|
||||
)
|
||||
|
BIN
web/public/GoogleSites.png
Normal file
BIN
web/public/GoogleSites.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.4 KiB |
@@ -8,7 +8,6 @@ import { fetcher } from "@/lib/fetcher";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { ConnectorIndexingStatus, FileConfig } from "@/lib/types";
|
||||
import { linkCredential } from "@/lib/credential";
|
||||
import { FileUpload } from "./FileUpload";
|
||||
import { useState } from "react";
|
||||
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||
import { createConnector, runConnector } from "@/lib/connector";
|
||||
@@ -17,6 +16,7 @@ import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/Si
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { Form, Formik } from "formik";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import { FileUpload } from "@/components/admin/connectors/FileUpload";
|
||||
|
||||
const getNameFromPath = (path: string) => {
|
||||
const pathParts = path.split("/");
|
||||
|
241
web/src/app/admin/connectors/google-sites/page.tsx
Normal file
241
web/src/app/admin/connectors/google-sites/page.tsx
Normal file
@@ -0,0 +1,241 @@
|
||||
"use client";
|
||||
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import * as Yup from "yup";
|
||||
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { GoogleSitesIcon } from "@/components/icons/icons";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { ConnectorIndexingStatus, GoogleSitesConfig } from "@/lib/types";
|
||||
import { Form, Formik } from "formik";
|
||||
import { useState } from "react";
|
||||
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||
import { createConnector, runConnector } from "@/lib/connector";
|
||||
import { linkCredential } from "@/lib/credential";
|
||||
import { FileUpload } from "@/components/admin/connectors/FileUpload";
|
||||
import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable";
|
||||
import { Spinner } from "@/components/Spinner";
|
||||
|
||||
export default function GoogleSites() {
|
||||
const { mutate } = useSWRConfig();
|
||||
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
|
||||
const [filesAreUploading, setFilesAreUploading] = useState<boolean>(false);
|
||||
const { popup, setPopup } = usePopup();
|
||||
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
|
||||
const googleSitesIndexingStatuses: ConnectorIndexingStatus<
|
||||
GoogleSitesConfig,
|
||||
{}
|
||||
>[] =
|
||||
connectorIndexingStatuses?.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "google_sites"
|
||||
) ?? [];
|
||||
|
||||
return (
|
||||
<>
|
||||
{popup}
|
||||
{filesAreUploading && <Spinner />}
|
||||
<div className="mx-auto container">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
<div className="border-solid border-gray-600 border-b pb-2 mb-4 flex">
|
||||
<GoogleSitesIcon size={32} />
|
||||
<h1 className="text-3xl font-bold pl-2">Google Sites</h1>
|
||||
</div>
|
||||
<p className="text-sm mb-2">
|
||||
For an in-depth guide on how to setup this connector, check out{" "}
|
||||
<a
|
||||
href="https://docs.danswer.dev/connectors/google-sites"
|
||||
target="_blank"
|
||||
className="text-blue-500"
|
||||
>
|
||||
the documentation
|
||||
</a>
|
||||
.
|
||||
</p>
|
||||
|
||||
<div className="mt-4">
|
||||
<h2 className="font-bold text-xl mb-2">Upload Files</h2>
|
||||
<div className="mx-auto w-full">
|
||||
<Formik
|
||||
initialValues={{
|
||||
base_url: "",
|
||||
}}
|
||||
validationSchema={Yup.object().shape({
|
||||
base_url: Yup.string().required("Base URL is required"),
|
||||
})}
|
||||
onSubmit={async (values, formikHelpers) => {
|
||||
const uploadCreateAndTriggerConnector = async () => {
|
||||
const formData = new FormData();
|
||||
|
||||
selectedFiles.forEach((file) => {
|
||||
formData.append("files", file);
|
||||
});
|
||||
|
||||
const response = await fetch(
|
||||
"/api/manage/admin/connector/file/upload",
|
||||
{ method: "POST", body: formData }
|
||||
);
|
||||
const responseJson = await response.json();
|
||||
if (!response.ok) {
|
||||
setPopup({
|
||||
message: `Unable to upload files - ${responseJson.detail}`,
|
||||
type: "error",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const filePaths = responseJson.file_paths as string[];
|
||||
const [connectorErrorMsg, connector] =
|
||||
await createConnector<GoogleSitesConfig>({
|
||||
name: `GoogleSitesConnector-${values.base_url}`,
|
||||
source: "google_sites",
|
||||
input_type: "load_state",
|
||||
connector_specific_config: {
|
||||
base_url: values.base_url,
|
||||
zip_path: filePaths[0],
|
||||
},
|
||||
refresh_freq: null,
|
||||
disabled: false,
|
||||
});
|
||||
if (connectorErrorMsg || !connector) {
|
||||
setPopup({
|
||||
message: `Unable to create connector - ${connectorErrorMsg}`,
|
||||
type: "error",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const credentialResponse = await linkCredential(
|
||||
connector.id,
|
||||
0,
|
||||
values.base_url
|
||||
);
|
||||
if (!credentialResponse.ok) {
|
||||
const credentialResponseJson =
|
||||
await credentialResponse.json();
|
||||
setPopup({
|
||||
message: `Unable to link connector to credential - ${credentialResponseJson.detail}`,
|
||||
type: "error",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const runConnectorErrorMsg = await runConnector(
|
||||
connector.id,
|
||||
[0]
|
||||
);
|
||||
if (runConnectorErrorMsg) {
|
||||
setPopup({
|
||||
message: `Unable to run connector - ${runConnectorErrorMsg}`,
|
||||
type: "error",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
setSelectedFiles([]);
|
||||
formikHelpers.resetForm();
|
||||
setPopup({
|
||||
type: "success",
|
||||
message: "Successfully uploaded files!",
|
||||
});
|
||||
};
|
||||
|
||||
setFilesAreUploading(true);
|
||||
try {
|
||||
await uploadCreateAndTriggerConnector();
|
||||
} catch (e) {
|
||||
console.log("Failed to index filels: ", e);
|
||||
}
|
||||
setFilesAreUploading(false);
|
||||
}}
|
||||
>
|
||||
{({ values, isSubmitting }) => (
|
||||
<Form className="p-3 border border-gray-600 rounded">
|
||||
<TextFormField
|
||||
name="base_url"
|
||||
label="Base URL:"
|
||||
placeholder={`Base URL of your Google Site e.g. https://sites.google.com/view/your-site`}
|
||||
subtext="This will be used to generate links for each page."
|
||||
autoCompleteDisabled={true}
|
||||
/>
|
||||
|
||||
<p className="mb-1 font-medium">Files:</p>
|
||||
<FileUpload
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
message="Upload a zip file containing the HTML of your Google Site"
|
||||
/>
|
||||
<button
|
||||
className={
|
||||
"bg-slate-500 hover:bg-slate-700 text-white " +
|
||||
"font-bold py-2 px-4 rounded focus:outline-none " +
|
||||
"focus:shadow-outline w-full mx-auto mt-4"
|
||||
}
|
||||
type="submit"
|
||||
disabled={
|
||||
selectedFiles.length !== 1 ||
|
||||
!values.base_url ||
|
||||
isSubmitting
|
||||
}
|
||||
>
|
||||
Upload!
|
||||
</button>
|
||||
</Form>
|
||||
)}
|
||||
</Formik>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||
Existing Google Site Connectors
|
||||
</h2>
|
||||
{isConnectorIndexingStatusesLoading ? (
|
||||
<LoadingAnimation text="Loading" />
|
||||
) : isConnectorIndexingStatusesError || !connectorIndexingStatuses ? (
|
||||
<div>Error loading indexing history</div>
|
||||
) : googleSitesIndexingStatuses.length > 0 ? (
|
||||
<SingleUseConnectorsTable<GoogleSitesConfig, {}>
|
||||
connectorIndexingStatuses={googleSitesIndexingStatuses}
|
||||
specialColumns={[
|
||||
{
|
||||
header: "Base URL",
|
||||
key: "base_url",
|
||||
getValue: (ccPairStatus) => {
|
||||
const connectorConfig =
|
||||
ccPairStatus.connector.connector_specific_config;
|
||||
return (
|
||||
<a
|
||||
className="text-blue-500"
|
||||
href={connectorConfig.base_url}
|
||||
>
|
||||
{connectorConfig.base_url}
|
||||
</a>
|
||||
);
|
||||
},
|
||||
},
|
||||
]}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
/>
|
||||
) : (
|
||||
<p className="text-sm">No indexed Google Sites found</p>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
@@ -22,6 +22,7 @@ import {
|
||||
HubSpotIcon,
|
||||
BookmarkIcon,
|
||||
CPUIcon,
|
||||
GoogleSitesIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
|
||||
import { redirect } from "next/navigation";
|
||||
@@ -173,6 +174,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
|
||||
),
|
||||
link: "/admin/connectors/zulip",
|
||||
},
|
||||
{
|
||||
name: (
|
||||
<div className="flex">
|
||||
<GoogleSitesIcon size={16} />
|
||||
<div className="ml-1">Google Sites</div>
|
||||
</div>
|
||||
),
|
||||
link: "/admin/connectors/google-sites",
|
||||
},
|
||||
{
|
||||
name: (
|
||||
<div className="flex">
|
||||
|
@@ -1,16 +1,17 @@
|
||||
// components/FileUpload.tsx
|
||||
import { ChangeEvent, FC, useState } from "react";
|
||||
import { FC, useState } from "react";
|
||||
import React from "react";
|
||||
import Dropzone from "react-dropzone";
|
||||
|
||||
interface FileUploadProps {
|
||||
selectedFiles: File[];
|
||||
setSelectedFiles: (files: File[]) => void;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export const FileUpload: FC<FileUploadProps> = ({
|
||||
selectedFiles,
|
||||
setSelectedFiles,
|
||||
message,
|
||||
}) => {
|
||||
const [dragActive, setDragActive] = useState(false);
|
||||
|
||||
@@ -35,7 +36,10 @@ export const FileUpload: FC<FileUploadProps> = ({
|
||||
}
|
||||
>
|
||||
<input {...getInputProps()} />
|
||||
<b>Drag and drop some files here, or click to select files</b>
|
||||
<b>
|
||||
{message ||
|
||||
"Drag and drop some files here, or click to select files"}
|
||||
</b>
|
||||
</div>
|
||||
</section>
|
||||
)}
|
@@ -1,10 +1,4 @@
|
||||
import {
|
||||
Connector,
|
||||
ConnectorIndexingStatus,
|
||||
Credential,
|
||||
DeletionAttemptSnapshot,
|
||||
ValidStatuses,
|
||||
} from "@/lib/types";
|
||||
import { DeletionAttemptSnapshot, ValidStatuses } from "@/lib/types";
|
||||
import { BasicTable } from "@/components/admin/connectors/BasicTable";
|
||||
import { Popup } from "@/components/admin/connectors/Popup";
|
||||
import { useState } from "react";
|
||||
@@ -64,17 +58,19 @@ export function SingleUseConnectorsTable<
|
||||
const connectorIncludesCredential =
|
||||
getCredential !== undefined && onCredentialLink !== undefined;
|
||||
|
||||
const columns = [
|
||||
{
|
||||
const columns = [];
|
||||
|
||||
if (includeName) {
|
||||
columns.push({
|
||||
header: "Name",
|
||||
key: "name",
|
||||
},
|
||||
...(specialColumns ?? []),
|
||||
{
|
||||
header: "Status",
|
||||
key: "status",
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
columns.push(...(specialColumns ?? []));
|
||||
columns.push({
|
||||
header: "Status",
|
||||
key: "status",
|
||||
});
|
||||
if (connectorIncludesCredential) {
|
||||
columns.push({
|
||||
header: "Credential",
|
||||
|
@@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
|
||||
import zulipIcon from "../../../public/Zulip.png";
|
||||
import linearIcon from "../../../public/Linear.png";
|
||||
import hubSpotIcon from "../../../public/HubSpot.png";
|
||||
import googleSitesIcon from "../../../public/GoogleSites.png";
|
||||
|
||||
interface IconProps {
|
||||
size?: number;
|
||||
@@ -450,3 +451,17 @@ export const HubSpotIcon = ({
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const GoogleSitesIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => {
|
||||
return (
|
||||
<div
|
||||
style={{ width: `${size}px`, height: `${size}px` }}
|
||||
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||
>
|
||||
<Image src={googleSitesIcon} alt="Logo" width="96" height="96" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
@@ -29,6 +29,7 @@ const sources: Source[] = [
|
||||
{ displayName: "Zulip", internalName: "zulip" },
|
||||
{ displayName: "Linear", internalName: "linear" },
|
||||
{ displayName: "HubSpot", internalName: "hubspot" },
|
||||
{ displayName: "Google Sites", internalName: "google_sites" },
|
||||
];
|
||||
|
||||
interface SourceSelectorProps {
|
||||
|
@@ -16,6 +16,7 @@ import {
|
||||
SlackIcon,
|
||||
ZulipIcon,
|
||||
HubSpotIcon,
|
||||
GoogleSitesIcon,
|
||||
} from "./icons/icons";
|
||||
|
||||
interface SourceMetadata {
|
||||
@@ -122,6 +123,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
|
||||
displayName: "HubSpot",
|
||||
adminPageLink: "/admin/connectors/hubspot",
|
||||
};
|
||||
case "google_sites":
|
||||
return {
|
||||
icon: GoogleSitesIcon,
|
||||
displayName: "Google Sites",
|
||||
adminPageLink: "/admin/connectors/google-sites",
|
||||
};
|
||||
default:
|
||||
throw new Error("Invalid source type");
|
||||
}
|
||||
|
@@ -23,7 +23,8 @@ export type ValidSources =
|
||||
| "zulip"
|
||||
| "linear"
|
||||
| "hubspot"
|
||||
| "file";
|
||||
| "file"
|
||||
| "google_sites";
|
||||
export type ValidInputTypes = "load_state" | "poll" | "event";
|
||||
export type ValidStatuses =
|
||||
| "success"
|
||||
@@ -114,6 +115,11 @@ export interface NotionConfig {}
|
||||
|
||||
export interface HubSpotConfig {}
|
||||
|
||||
export interface GoogleSitesConfig {
|
||||
zip_path: string;
|
||||
base_url: string;
|
||||
}
|
||||
|
||||
export interface IndexAttemptSnapshot {
|
||||
status: ValidStatuses | null;
|
||||
num_docs_indexed: number;
|
||||
|
Reference in New Issue
Block a user