Add Google Sites connector (#532)

2025-09-28 21:05:17 +02:00 · 2023-10-08 19:20:38 -07:00
parent fb1fbbee5c
commit d95da554ea
17 changed files with 561 additions and 77 deletions
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -61,6 +61,7 @@ class DocumentSource(str, Enum):
    LINEAR = "linear"
    HUBSPOT = "hubspot"
    GONG = "gong"
    GOOGLE_SITES = "google_sites"
 class DocumentIndexType(str, Enum):
--- a/backend/danswer/connectors/cross_connector_utils/file_utils.py
+++ b/backend/danswer/connectors/cross_connector_utils/file_utils.py
@@ -0,0 +1,49 @@
 import json
 import os
 import zipfile
 from collections.abc import Generator
 from pathlib import Path
 from typing import Any
 from typing import IO
 _METADATA_FLAG = "#DANSWER_METADATA="
 def is_macos_resource_fork_file(file_name: str) -> bool:
    return os.path.basename(file_name).startswith("._") and file_name.startswith(
        "__MACOSX"
    )
 def load_files_from_zip(
    zip_location: str | Path,
    ignore_macos_resource_fork_files: bool = True,
    ignore_dirs: bool = True,
 ) -> Generator[tuple[zipfile.ZipInfo, IO[Any]], None, None]:
    with zipfile.ZipFile(zip_location, "r") as zip_file:
        for file_info in zip_file.infolist():
            with zip_file.open(file_info.filename, "r") as file:
                if ignore_dirs and file_info.is_dir():
                    continue
                if ignore_macos_resource_fork_files and is_macos_resource_fork_file(
                    file_info.filename
                ):
                    continue
                yield file_info, file
 def read_file(file_reader: IO[Any]) -> tuple[str, dict[str, Any]]:
    metadata = {}
    file_content_raw = ""
    for ind, line in enumerate(file_reader):
        if isinstance(line, bytes):
            line = line.decode("utf-8")
        line = str(line)
        if ind == 0 and line.startswith(_METADATA_FLAG):
            metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
        else:
            file_content_raw += line
    return file_content_raw, metadata
--- a/backend/danswer/connectors/cross_connector_utils/html_utils.py
+++ b/backend/danswer/connectors/cross_connector_utils/html_utils.py
@@ -0,0 +1,57 @@
 from copy import copy
 from dataclasses import dataclass
 from bs4 import BeautifulSoup
 from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
 from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
 from danswer.utils.text_processing import format_document_soup
 MINTLIFY_UNWANTED = ["sticky", "hidden"]
@dataclass
 class ParsedHTML:
    title: str | None
    cleaned_text: str
 def standard_html_cleanup(
    page_content: str | BeautifulSoup,
    mintlify_cleanup_enabled: bool = True,
    additional_element_types_to_discard: list[str] | None = None,
 ) -> ParsedHTML:
    if isinstance(page_content, str):
        soup = BeautifulSoup(page_content, "html.parser")
    else:
        soup = page_content
    title_tag = soup.find("title")
    title = None
    if title_tag and title_tag.text:
        title = title_tag.text
        title_tag.extract()
    # Heuristics based cleaning of elements based on css classes
    unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
    if mintlify_cleanup_enabled:
        unwanted_classes.extend(MINTLIFY_UNWANTED)
    for undesired_element in unwanted_classes:
        [
            tag.extract()
            for tag in soup.find_all(
                class_=lambda x: x and undesired_element in x.split()
            )
        ]
    for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
        [tag.extract() for tag in soup.find_all(undesired_tag)]
    if additional_element_types_to_discard:
        for undesired_tag in additional_element_types_to_discard:
            [tag.extract() for tag in soup.find_all(undesired_tag)]
    # 200B is ZeroWidthSpace which we don't care for
    page_text = format_document_soup(soup).replace("\u200B", "")
    return ParsedHTML(title=title, cleaned_text=page_text)
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -9,6 +9,7 @@ from danswer.connectors.file.connector import LocalFileConnector
 from danswer.connectors.github.connector import GithubConnector
 from danswer.connectors.gong.connector import GongConnector
 from danswer.connectors.google_drive.connector import GoogleDriveConnector
 from danswer.connectors.google_site.connector import GoogleSitesConnector
 from danswer.connectors.guru.connector import GuruConnector
 from danswer.connectors.hubspot.connector import HubSpotConnector
 from danswer.connectors.interfaces import BaseConnector
@@ -54,6 +55,7 @@ def identify_connector_class(
        DocumentSource.LINEAR: LinearConnector,
        DocumentSource.HUBSPOT: HubSpotConnector,
        DocumentSource.GONG: GongConnector,
        DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
    }
    connector_by_source = connector_map.get(source, {})
--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -1,6 +1,4 @@
 import json
 import os
 import zipfile
 from collections.abc import Generator
 from pathlib import Path
 from typing import Any
@@ -10,6 +8,8 @@ from PyPDF2 import PdfReader
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
 from danswer.connectors.cross_connector_utils.file_utils import read_file
 from danswer.connectors.file.utils import check_file_ext_is_valid
 from danswer.connectors.file.utils import get_file_ext
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -21,17 +21,6 @@ from danswer.utils.logger import setup_logger
 logger = setup_logger()
 _METADATA_FLAG = "#DANSWER_METADATA="
 def _get_files_from_zip(
    zip_location: str | Path,
 ) -> Generator[tuple[str, IO[Any]], None, None]:
    with zipfile.ZipFile(zip_location, "r") as zip_file:
        for file_name in zip_file.namelist():
            with zip_file.open(file_name, "r") as file:
                yield os.path.basename(file_name), file
 def _open_files_at_location(
    file_path: str | Path,
@@ -39,7 +28,8 @@ def _open_files_at_location(
    extension = get_file_ext(file_path)
    if extension == ".zip":
-        yield from _get_files_from_zip(file_path)
+        for file_info, file in load_files_from_zip(file_path, ignore_dirs=True):
            yield file_info.filename, file
    elif extension == ".txt" or extension == ".pdf":
        mode = "r"
        if extension == ".pdf":
@@ -56,7 +46,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
        logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
        return []
-    metadata = {}
+    metadata: dict[str, Any] = {}
    file_content_raw = ""
    if extension == ".pdf":
        pdf_reader = PdfReader(file)
@@ -65,15 +55,7 @@ def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
                page.extract_text() for page in pdf_reader.pages
            )
    else:
-        for ind, line in enumerate(file):
+        file_content_raw, metadata = read_file(file)
            if isinstance(line, bytes):
                line = line.decode("utf-8")
            line = str(line)
            if ind == 0 and line.startswith(_METADATA_FLAG):
                metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
            else:
                file_content_raw += line
    return [
        Document(
--- a/backend/danswer/connectors/google_site/connector.py
+++ b/backend/danswer/connectors/google_site/connector.py
@@ -0,0 +1,139 @@
 import os
 import urllib.parse
 from typing import Any
 from typing import cast
 from bs4 import BeautifulSoup
 from bs4 import Tag
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
 from danswer.connectors.cross_connector_utils.file_utils import read_file
 from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
 from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 def process_link(element: BeautifulSoup | Tag) -> str:
    href = cast(str | None, element.get("href"))
    if not href:
        raise RuntimeError(f"Invalid link - {element}")
    # cleanup href
    href = urllib.parse.unquote(href)
    href = href.rstrip(".html").lower()
    href = href.replace("_", "")
    href = href.replace(" ", "-")
    return href
 def find_google_sites_page_path_from_navbar(
    element: BeautifulSoup | Tag, path: str, is_initial: bool
 ) -> str | None:
    ul = cast(Tag | None, element.find("ul"))
    if ul:
        if not is_initial:
            a = cast(Tag, element.find("a"))
            new_path = f"{path}/{process_link(a)}"
            if a.get("aria-selected") == "true":
                return new_path
        else:
            new_path = ""
        for li in ul.find_all("li", recursive=False):
            found_link = find_google_sites_page_path_from_navbar(li, new_path, False)
            if found_link:
                return found_link
    else:
        a = cast(Tag, element.find("a"))
        if a:
            href = process_link(a)
            if href and a.get("aria-selected") == "true":
                return path + "/" + href
    return None
 class GoogleSitesConnector(LoadConnector):
    def __init__(
        self,
        zip_path: str,
        base_url: str,
        batch_size: int = INDEX_BATCH_SIZE,
    ):
        self.zip_path = zip_path
        self.base_url = base_url
        self.batch_size = batch_size
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        pass
    def load_from_state(self) -> GenerateDocumentsOutput:
        documents: list[Document] = []
        # load the HTML files
        files = load_files_from_zip(self.zip_path)
        for file_info, file_io in files:
            # skip non-published files
            if "/PUBLISHED/" not in file_info.filename:
                continue
            file_path, extension = os.path.splitext(file_info.filename)
            if extension != ".html":
                continue
            file_content, _ = read_file(file_io)
            soup = BeautifulSoup(file_content, "html.parser")
            # get the link out of the navbar
            header = cast(Tag, soup.find("header"))
            nav = cast(Tag, header.find("nav"))
            path = find_google_sites_page_path_from_navbar(nav, "", True)
            if not path:
                raise RuntimeError(f"Could not find path for {file_info.filename}")
            # cleanup the hidden `Skip to main content` and `Skip to navigation` that
            # appears at the top of every page
            for div in soup.find_all("div", attrs={"data-is-touch-wrapper": "true"}):
                div.extract()
            # get the body of the page
            parsed_html = standard_html_cleanup(
                soup, additional_element_types_to_discard=["header", "nav"]
            )
            title = parsed_html.title or file_path.split("/")[-1]
            documents.append(
                Document(
                    id=f"{DocumentSource.GOOGLE_SITES.value}:{path}",
                    source=DocumentSource.GOOGLE_SITES,
                    semantic_identifier=title,
                    sections=[
                        Section(
                            link=self.base_url.rstrip("/") + "/" + path.lstrip("/"),
                            text=parsed_html.cleaned_text,
                        )
                    ],
                    metadata={},
                )
            )
            if len(documents) >= self.batch_size:
                yield documents
                documents = []
        if documents:
            yield documents
 if __name__ == "__main__":
    connector = GoogleSitesConnector(
        os.environ["GOOGLE_SITES_ZIP_PATH"],
        os.environ.get("GOOGLE_SITES_BASE_URL", ""),
    )
    for doc_batch in connector.load_from_state():
        for doc in doc_batch:
            print(doc)
--- a/backend/danswer/connectors/web/connector.py
+++ b/backend/danswer/connectors/web/connector.py
@@ -1,5 +1,4 @@
 import io
 from copy import copy
 from datetime import datetime
 from enum import Enum
 from typing import Any
@@ -18,25 +17,20 @@ from PyPDF2 import PdfReader
 from requests_oauthlib import OAuth2Session  # type:ignore
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
 from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.html_utils import standard_html_cleanup
 from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 from danswer.utils.logger import setup_logger
 from danswer.utils.text_processing import format_document_soup
 logger = setup_logger()
 MINTLIFY_UNWANTED = ["sticky", "hidden"]
 class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
    # Given a base site, index everything under that path
    RECURSIVE = "recursive"
@@ -224,36 +218,16 @@ class WebConnector(LoadConnector):
                        if link not in visited_links:
                            to_visit.append(link)
-                title_tag = soup.find("title")
+                parsed_html = standard_html_cleanup(soup, self.mintlify_cleanup)
                title = None
                if title_tag and title_tag.text:
                    title = title_tag.text
                    title_tag.extract()
                # Heuristics based cleaning of elements based on css classes
                unwanted_classes = copy(WEB_CONNECTOR_IGNORED_CLASSES)
                if self.mintlify_cleanup:
                    unwanted_classes.extend(MINTLIFY_UNWANTED)
                for undesired_element in unwanted_classes:
                    [
                        tag.extract()
                        for tag in soup.find_all(
                            class_=lambda x: x and undesired_element in x.split()
                        )
                    ]
                for undesired_tag in WEB_CONNECTOR_IGNORED_ELEMENTS:
                    [tag.extract() for tag in soup.find_all(undesired_tag)]
                # 200B is ZeroWidthSpace which we don't care for
                page_text = format_document_soup(soup).replace("\u200B", "")
                doc_batch.append(
                    Document(
                        id=current_url,
-                        sections=[Section(link=current_url, text=page_text)],
+                        sections=[
                            Section(link=current_url, text=parsed_html.cleaned_text)
                        ],
                        source=DocumentSource.WEB,
-                        semantic_identifier=title or current_url,
+                        semantic_identifier=parsed_html.title or current_url,
                        metadata={},
                    )
                )
--- a/web/public/GoogleSites.png
+++ b/web/public/GoogleSites.png
--- a/web/src/app/admin/connectors/file/page.tsx
+++ b/web/src/app/admin/connectors/file/page.tsx
@@ -8,7 +8,6 @@ import { fetcher } from "@/lib/fetcher";
 import { HealthCheckBanner } from "@/components/health/healthcheck";
 import { ConnectorIndexingStatus, FileConfig } from "@/lib/types";
 import { linkCredential } from "@/lib/credential";
 import { FileUpload } from "./FileUpload";
 import { useState } from "react";
 import { usePopup } from "@/components/admin/connectors/Popup";
 import { createConnector, runConnector } from "@/lib/connector";
@@ -17,6 +16,7 @@ import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/Si
 import { LoadingAnimation } from "@/components/Loading";
 import { Form, Formik } from "formik";
 import { TextFormField } from "@/components/admin/connectors/Field";
 import { FileUpload } from "@/components/admin/connectors/FileUpload";
 const getNameFromPath = (path: string) => {
  const pathParts = path.split("/");
--- a/web/src/app/admin/connectors/google-sites/page.tsx
+++ b/web/src/app/admin/connectors/google-sites/page.tsx
@@ -0,0 +1,241 @@
 "use client";
 import useSWR, { useSWRConfig } from "swr";
 import * as Yup from "yup";
 import { LoadingAnimation } from "@/components/Loading";
 import { GoogleSitesIcon } from "@/components/icons/icons";
 import { fetcher } from "@/lib/fetcher";
 import { TextFormField } from "@/components/admin/connectors/Field";
 import { HealthCheckBanner } from "@/components/health/healthcheck";
 import { ConnectorIndexingStatus, GoogleSitesConfig } from "@/lib/types";
 import { Form, Formik } from "formik";
 import { useState } from "react";
 import { usePopup } from "@/components/admin/connectors/Popup";
 import { createConnector, runConnector } from "@/lib/connector";
 import { linkCredential } from "@/lib/credential";
 import { FileUpload } from "@/components/admin/connectors/FileUpload";
 import { SingleUseConnectorsTable } from "@/components/admin/connectors/table/SingleUseConnectorsTable";
 import { Spinner } from "@/components/Spinner";
 export default function GoogleSites() {
  const { mutate } = useSWRConfig();
  const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
  const [filesAreUploading, setFilesAreUploading] = useState<boolean>(false);
  const { popup, setPopup } = usePopup();
  const {
    data: connectorIndexingStatuses,
    isLoading: isConnectorIndexingStatusesLoading,
    error: isConnectorIndexingStatusesError,
  } = useSWR<ConnectorIndexingStatus<any, any>[]>(
    "/api/manage/admin/connector/indexing-status",
    fetcher
  );
  const googleSitesIndexingStatuses: ConnectorIndexingStatus<
    GoogleSitesConfig,
    {}
  >[] =
    connectorIndexingStatuses?.filter(
      (connectorIndexingStatus) =>
        connectorIndexingStatus.connector.source === "google_sites"
    ) ?? [];
  return (
    <>
      {popup}
      {filesAreUploading && <Spinner />}
      <div className="mx-auto container">
        <div className="mb-4">
          <HealthCheckBanner />
        </div>
        <div className="border-solid border-gray-600 border-b pb-2 mb-4 flex">
          <GoogleSitesIcon size={32} />
          <h1 className="text-3xl font-bold pl-2">Google Sites</h1>
        </div>
        <p className="text-sm mb-2">
          For an in-depth guide on how to setup this connector, check out{" "}
          <a
            href="https://docs.danswer.dev/connectors/google-sites"
            target="_blank"
            className="text-blue-500"
          >
            the documentation
          </a>
          .
        </p>
        <div className="mt-4">
          <h2 className="font-bold text-xl mb-2">Upload Files</h2>
          <div className="mx-auto w-full">
            <Formik
              initialValues={{
                base_url: "",
              }}
              validationSchema={Yup.object().shape({
                base_url: Yup.string().required("Base URL is required"),
              })}
              onSubmit={async (values, formikHelpers) => {
                const uploadCreateAndTriggerConnector = async () => {
                  const formData = new FormData();
                  selectedFiles.forEach((file) => {
                    formData.append("files", file);
                  });
                  const response = await fetch(
                    "/api/manage/admin/connector/file/upload",
                    { method: "POST", body: formData }
                  );
                  const responseJson = await response.json();
                  if (!response.ok) {
                    setPopup({
                      message: `Unable to upload files - ${responseJson.detail}`,
                      type: "error",
                    });
                    return;
                  }
                  const filePaths = responseJson.file_paths as string[];
                  const [connectorErrorMsg, connector] =
                    await createConnector<GoogleSitesConfig>({
                      name: `GoogleSitesConnector-${values.base_url}`,
                      source: "google_sites",
                      input_type: "load_state",
                      connector_specific_config: {
                        base_url: values.base_url,
                        zip_path: filePaths[0],
                      },
                      refresh_freq: null,
                      disabled: false,
                    });
                  if (connectorErrorMsg || !connector) {
                    setPopup({
                      message: `Unable to create connector - ${connectorErrorMsg}`,
                      type: "error",
                    });
                    return;
                  }
                  const credentialResponse = await linkCredential(
                    connector.id,
                    0,
                    values.base_url
                  );
                  if (!credentialResponse.ok) {
                    const credentialResponseJson =
                      await credentialResponse.json();
                    setPopup({
                      message: `Unable to link connector to credential - ${credentialResponseJson.detail}`,
                      type: "error",
                    });
                    return;
                  }
                  const runConnectorErrorMsg = await runConnector(
                    connector.id,
                    [0]
                  );
                  if (runConnectorErrorMsg) {
                    setPopup({
                      message: `Unable to run connector - ${runConnectorErrorMsg}`,
                      type: "error",
                    });
                    return;
                  }
                  mutate("/api/manage/admin/connector/indexing-status");
                  setSelectedFiles([]);
                  formikHelpers.resetForm();
                  setPopup({
                    type: "success",
                    message: "Successfully uploaded files!",
                  });
                };
                setFilesAreUploading(true);
                try {
                  await uploadCreateAndTriggerConnector();
                } catch (e) {
                  console.log("Failed to index filels: ", e);
                }
                setFilesAreUploading(false);
              }}
            >
              {({ values, isSubmitting }) => (
                <Form className="p-3 border border-gray-600 rounded">
                  <TextFormField
                    name="base_url"
                    label="Base URL:"
                    placeholder={`Base URL of your Google Site e.g. https://sites.google.com/view/your-site`}
                    subtext="This will be used to generate links for each page."
                    autoCompleteDisabled={true}
                  />
                  <p className="mb-1 font-medium">Files:</p>
                  <FileUpload
                    selectedFiles={selectedFiles}
                    setSelectedFiles={setSelectedFiles}
                    message="Upload a zip file containing the HTML of your Google Site"
                  />
                  <button
                    className={
                      "bg-slate-500 hover:bg-slate-700 text-white " +
                      "font-bold py-2 px-4 rounded focus:outline-none " +
                      "focus:shadow-outline w-full mx-auto mt-4"
                    }
                    type="submit"
                    disabled={
                      selectedFiles.length !== 1 ||
                      !values.base_url ||
                      isSubmitting
                    }
                  >
                    Upload!
                  </button>
                </Form>
              )}
            </Formik>
          </div>
        </div>
        <h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
          Existing Google Site Connectors
        </h2>
        {isConnectorIndexingStatusesLoading ? (
          <LoadingAnimation text="Loading" />
        ) : isConnectorIndexingStatusesError || !connectorIndexingStatuses ? (
          <div>Error loading indexing history</div>
        ) : googleSitesIndexingStatuses.length > 0 ? (
          <SingleUseConnectorsTable<GoogleSitesConfig, {}>
            connectorIndexingStatuses={googleSitesIndexingStatuses}
            specialColumns={[
              {
                header: "Base URL",
                key: "base_url",
                getValue: (ccPairStatus) => {
                  const connectorConfig =
                    ccPairStatus.connector.connector_specific_config;
                  return (
                    <a
                      className="text-blue-500"
                      href={connectorConfig.base_url}
                    >
                      {connectorConfig.base_url}
                    </a>
                  );
                },
              },
            ]}
            onUpdate={() =>
              mutate("/api/manage/admin/connector/indexing-status")
            }
          />
        ) : (
          <p className="text-sm">No indexed Google Sites found</p>
        )}
      </div>
    </>
  );
 }
--- a/web/src/components/admin/Layout.tsx
+++ b/web/src/components/admin/Layout.tsx
@@ -22,6 +22,7 @@ import {
  HubSpotIcon,
  BookmarkIcon,
  CPUIcon,
  GoogleSitesIcon,
 } from "@/components/icons/icons";
 import { getAuthDisabledSS, getCurrentUserSS } from "@/lib/userSS";
 import { redirect } from "next/navigation";
@@ -173,6 +174,15 @@ export async function Layout({ children }: { children: React.ReactNode }) {
                  ),
                  link: "/admin/connectors/zulip",
                },
                {
                  name: (
                    <div className="flex">
                      <GoogleSitesIcon size={16} />
                      <div className="ml-1">Google Sites</div>
                    </div>
                  ),
                  link: "/admin/connectors/google-sites",
                },
                {
                  name: (
                    <div className="flex">
--- a/web/src/components/admin/connectors/FileUpload.tsx
+++ b/web/src/components/admin/connectors/FileUpload.tsx
@@ -1,16 +1,17 @@
-// components/FileUpload.tsx
+import { FC, useState } from "react";
 import { ChangeEvent, FC, useState } from "react";
 import React from "react";
 import Dropzone from "react-dropzone";
 interface FileUploadProps {
  selectedFiles: File[];
  setSelectedFiles: (files: File[]) => void;
  message?: string;
 }
 export const FileUpload: FC<FileUploadProps> = ({
  selectedFiles,
  setSelectedFiles,
  message,
 }) => {
  const [dragActive, setDragActive] = useState(false);
@@ -35,7 +36,10 @@ export const FileUpload: FC<FileUploadProps> = ({
              }
            >
              <input {...getInputProps()} />
-              <b>Drag and drop some files here, or click to select files</b>
+              <b>
                {message ||
                  "Drag and drop some files here, or click to select files"}
              </b>
            </div>
          </section>
        )}
--- a/web/src/components/admin/connectors/table/SingleUseConnectorsTable.tsx
+++ b/web/src/components/admin/connectors/table/SingleUseConnectorsTable.tsx
@@ -1,10 +1,4 @@
-import {
+import { DeletionAttemptSnapshot, ValidStatuses } from "@/lib/types";
  Connector,
  ConnectorIndexingStatus,
  Credential,
  DeletionAttemptSnapshot,
  ValidStatuses,
 } from "@/lib/types";
 import { BasicTable } from "@/components/admin/connectors/BasicTable";
 import { Popup } from "@/components/admin/connectors/Popup";
 import { useState } from "react";
@@ -64,17 +58,19 @@ export function SingleUseConnectorsTable<
  const connectorIncludesCredential =
    getCredential !== undefined && onCredentialLink !== undefined;
-  const columns = [
+  const columns = [];
-    {
+
  if (includeName) {
    columns.push({
      header: "Name",
      key: "name",
-    },
+    });
-    ...(specialColumns ?? []),
+  }
-    {
+  columns.push(...(specialColumns ?? []));
-      header: "Status",
+  columns.push({
-      key: "status",
+    header: "Status",
-    },
+    key: "status",
-  ];
+  });
  if (connectorIncludesCredential) {
    columns.push({
      header: "Credential",
--- a/web/src/components/icons/icons.tsx
+++ b/web/src/components/icons/icons.tsx
@@ -43,6 +43,7 @@ import gongIcon from "../../../public/Gong.png";
 import zulipIcon from "../../../public/Zulip.png";
 import linearIcon from "../../../public/Linear.png";
 import hubSpotIcon from "../../../public/HubSpot.png";
 import googleSitesIcon from "../../../public/GoogleSites.png";
 interface IconProps {
  size?: number;
@@ -450,3 +451,17 @@ export const HubSpotIcon = ({
    </div>
  );
 };
 export const GoogleSitesIcon = ({
  size = 16,
  className = defaultTailwindCSS,
 }: IconProps) => {
  return (
    <div
      style={{ width: `${size}px`, height: `${size}px` }}
      className={`w-[${size}px] h-[${size}px] ` + className}
    >
      <Image src={googleSitesIcon} alt="Logo" width="96" height="96" />
    </div>
  );
 };
--- a/web/src/components/search/Filters.tsx
+++ b/web/src/components/search/Filters.tsx
@@ -29,6 +29,7 @@ const sources: Source[] = [
  { displayName: "Zulip", internalName: "zulip" },
  { displayName: "Linear", internalName: "linear" },
  { displayName: "HubSpot", internalName: "hubspot" },
  { displayName: "Google Sites", internalName: "google_sites" },
 ];
 interface SourceSelectorProps {
--- a/web/src/components/source.tsx
+++ b/web/src/components/source.tsx
@@ -16,6 +16,7 @@ import {
  SlackIcon,
  ZulipIcon,
  HubSpotIcon,
  GoogleSitesIcon,
 } from "./icons/icons";
 interface SourceMetadata {
@@ -122,6 +123,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
        displayName: "HubSpot",
        adminPageLink: "/admin/connectors/hubspot",
      };
    case "google_sites":
      return {
        icon: GoogleSitesIcon,
        displayName: "Google Sites",
        adminPageLink: "/admin/connectors/google-sites",
      };
    default:
      throw new Error("Invalid source type");
  }
--- a/web/src/lib/types.ts
+++ b/web/src/lib/types.ts
@@ -23,7 +23,8 @@ export type ValidSources =
  | "zulip"
  | "linear"
  | "hubspot"
-  | "file";
+  | "file"
  | "google_sites";
 export type ValidInputTypes = "load_state" | "poll" | "event";
 export type ValidStatuses =
  | "success"
@@ -114,6 +115,11 @@ export interface NotionConfig {}
 export interface HubSpotConfig {}
 export interface GoogleSitesConfig {
  zip_path: string;
  base_url: string;
 }
 export interface IndexAttemptSnapshot {
  status: ValidStatuses | null;
  num_docs_indexed: number;