File connector (#93)

* Initial backend changes for file connector

* Add another background job to clean up files

* UI + tweaks for backend
This commit is contained in:
Chris Weaver 2023-06-09 21:28:50 -07:00 committed by GitHub
parent f10ece4411
commit f20563c9bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 774 additions and 38 deletions

View File

@ -3,6 +3,7 @@ FROM python:3.11-slim-bullseye
RUN apt-get update \
&& apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
supervisor \
&& rm -rf /var/lib/apt/lists/*
COPY ./requirements/default.txt /tmp/requirements.txt
@ -12,6 +13,7 @@ RUN playwright install-deps
WORKDIR /app
COPY ./danswer /app/danswer
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
ENV PYTHONPATH /app
CMD ["python3", "danswer/background/update.py"]
CMD ["/usr/bin/supervisord"]

View File

@ -0,0 +1,6 @@
from danswer.background.utils import interval_run_job
from danswer.connectors.file.utils import clean_old_temp_files
if __name__ == "__main__":
interval_run_job(clean_old_temp_files, 30 * 60) # run every 30 minutes

View File

@ -0,0 +1,11 @@
#!/bin/bash
python danswer/background/update.py &
python danswer/background/file_deletion.py &
# Wait for any process to exit
wait -n
# Exit with status of process that exited first
exit $?

View File

@ -10,7 +10,7 @@ from danswer.db.credentials import backend_update_credential_json
from danswer.db.engine import build_engine
from danswer.db.engine import get_db_current_time
from danswer.db.index_attempt import create_index_attempt
from danswer.db.index_attempt import get_incomplete_index_attempts
from danswer.db.index_attempt import get_inprogress_index_attempts
from danswer.db.index_attempt import get_last_finished_attempt
from danswer.db.index_attempt import get_not_started_index_attempts
from danswer.db.index_attempt import mark_attempt_failed
@ -42,7 +42,7 @@ def should_create_new_indexing(
def create_indexing_jobs(db_session: Session) -> None:
connectors = fetch_connectors(db_session, disabled_status=False)
for connector in connectors:
in_progress_indexing_attempts = get_incomplete_index_attempts(
in_progress_indexing_attempts = get_inprogress_index_attempts(
connector.id, db_session
)
if in_progress_indexing_attempts:
@ -50,6 +50,9 @@ def create_indexing_jobs(db_session: Session) -> None:
# Currently single threaded so any still in-progress must have errored
for attempt in in_progress_indexing_attempts:
logger.warning(
f"Marking in-progress attempt 'connector: {attempt.connector_id}, credential: {attempt.credential_id}' as failed"
)
mark_attempt_failed(attempt, db_session)
last_finished_indexing_attempt = get_last_finished_attempt(

View File

@ -0,0 +1,21 @@
import time
from collections.abc import Callable
from typing import Any
from danswer.utils.logging import setup_logger
logger = setup_logger()
def interval_run_job(job: Callable[[], Any], delay: int | float) -> None:
while True:
start = time.time()
logger.info(f"Running '{job.__name__}', current time: {time.ctime(start)}")
try:
job()
except Exception as e:
logger.exception(f"Failed to run update due to {e}")
sleep_time = delay - (time.time() - start)
if sleep_time > 0:
time.sleep(sleep_time)

View File

@ -83,7 +83,9 @@ POSTGRES_DB = os.environ.get("POSTGRES_DB", "postgres")
# Connector Configs
#####
GOOGLE_DRIVE_INCLUDE_SHARED = False
FILE_CONNECTOR_TMP_STORAGE_PATH = os.environ.get(
"FILE_CONNECTOR_TMP_STORAGE_PATH", "/home/file_connector_storage"
)
#####
# Query Configs

View File

@ -22,3 +22,4 @@ class DocumentSource(str, Enum):
GOOGLE_DRIVE = "google_drive"
GITHUB = "github"
CONFLUENCE = "confluence"
FILE = "file"

View File

@ -3,6 +3,7 @@ from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.interfaces import BaseConnector
@ -27,6 +28,7 @@ def identify_connector_class(
) -> Type[BaseConnector]:
connector_map = {
DocumentSource.WEB: WebConnector,
DocumentSource.FILE: LocalFileConnector,
DocumentSource.SLACK: {
InputType.LOAD_STATE: SlackLoadConnector,
InputType.POLL: SlackPollConnector,

View File

@ -0,0 +1,103 @@
import json
import os
import zipfile
from collections.abc import Generator
from enum import Enum
from pathlib import Path
from typing import Any
from typing import IO
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.file.utils import check_file_ext_is_valid
from danswer.connectors.file.utils import get_file_ext
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logging import setup_logger
logger = setup_logger()
_METADATA_FLAG = "#DANSWER_METADATA="
def _get_files_from_zip(
zip_location: str | Path,
) -> Generator[tuple[str, IO[Any]], None, None]:
with zipfile.ZipFile(zip_location, "r") as zip_file:
for file_name in zip_file.namelist():
with zip_file.open(file_name, "r") as file:
yield os.path.basename(file_name), file
def _open_files_at_location(
file_path: str | Path,
) -> Generator[tuple[str, IO[Any]], Any, None]:
extension = get_file_ext(file_path)
if extension == ".zip":
yield from _get_files_from_zip(file_path)
elif extension == ".txt":
with open(file_path, "r") as file:
yield os.path.basename(file_path), file
else:
logger.warning(f"Skipping file '{file_path}' with extension '{extension}'")
def _process_file(file_name: str, file: IO[Any]) -> list[Document]:
extension = get_file_ext(file_name)
if not check_file_ext_is_valid(extension):
logger.warning(f"Skipping file '{file_name}' with extension '{extension}'")
return []
metadata = {}
file_content_raw = ""
for ind, line in enumerate(file):
if isinstance(line, bytes):
line = line.decode("utf-8")
line = str(line)
if ind == 0 and line.startswith(_METADATA_FLAG):
metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip())
else:
file_content_raw += line
return [
Document(
id=file_name,
sections=[Section(link=metadata.get("link", ""), text=file_content_raw)],
source=DocumentSource.FILE,
semantic_identifier=file_name,
metadata={},
)
]
class LocalFileConnector(LoadConnector):
def __init__(
self,
file_locations: list[Path | str],
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.file_locations = [Path(file_location) for file_location in file_locations]
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
pass
def load_from_state(self) -> GenerateDocumentsOutput:
documents: list[Document] = []
for file_location in self.file_locations:
files = _open_files_at_location(file_location)
for file_name, file in files:
documents.extend(_process_file(file_name, file))
if len(documents) >= self.batch_size:
yield documents
documents = []
if documents:
yield documents

View File

@ -0,0 +1,65 @@
import os
import shutil
import time
import uuid
from pathlib import Path
from typing import Any
from typing import IO
from danswer.configs.app_configs import FILE_CONNECTOR_TMP_STORAGE_PATH
_FILE_AGE_CLEANUP_THRESHOLD_HOURS = 24 * 7 # 1 week
_VALID_FILE_EXTENSIONS = [".txt", ".zip"]
def get_file_ext(file_path_or_name: str | Path) -> str:
_, extension = os.path.splitext(file_path_or_name)
return extension
def check_file_ext_is_valid(ext: str) -> bool:
return ext in _VALID_FILE_EXTENSIONS
def write_temp_files(
files: list[tuple[str, IO[Any]]],
base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH,
) -> list[str]:
"""Writes temporary files to disk and returns their paths
NOTE: need to pass in (file_name, File) tuples since FastAPI's `UploadFile` class
exposed SpooledTemporaryFile does not include a name.
"""
file_location = Path(base_path) / str(uuid.uuid4())
os.makedirs(file_location, exist_ok=True)
file_paths: list[str] = []
for file_name, file in files:
extension = get_file_ext(file_name)
if not check_file_ext_is_valid(extension):
raise ValueError(
f"Invalid file extension for file: '{file_name}'. Must be one of {_VALID_FILE_EXTENSIONS}"
)
file_path = file_location / file_name
with open(file_path, "wb") as buffer:
# copy file content from uploaded file to the newly created file
shutil.copyfileobj(file, buffer)
file_paths.append(str(file_path.absolute()))
return file_paths
def file_age_in_hours(filepath: str | Path) -> float:
return (time.time() - os.path.getmtime(filepath)) / (60 * 60)
def clean_old_temp_files(
age_threshold_in_hours: float | int = _FILE_AGE_CLEANUP_THRESHOLD_HOURS,
base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH,
) -> None:
os.makedirs(base_path, exist_ok=True)
for file in os.listdir(base_path):
if file_age_in_hours(file) > age_threshold_in_hours:
os.remove(Path(base_path) / file)

View File

@ -25,16 +25,14 @@ def create_index_attempt(
return new_attempt.id
def get_incomplete_index_attempts(
def get_inprogress_index_attempts(
connector_id: int | None,
db_session: Session,
) -> list[IndexAttempt]:
stmt = select(IndexAttempt)
if connector_id is not None:
stmt = stmt.where(IndexAttempt.connector_id == connector_id)
stmt = stmt.where(
IndexAttempt.status.notin_([IndexingStatus.SUCCESS, IndexingStatus.FAILED])
)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS)
incomplete_attempts = db_session.scalars(stmt)
return list(incomplete_attempts.all())

View File

@ -7,6 +7,7 @@ from danswer.auth.users import current_user
from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import OPENAI_API_KEY_STORAGE_KEY
from danswer.connectors.file.utils import write_temp_files
from danswer.connectors.google_drive.connector_auth import DB_CREDENTIALS_DICT_KEY
from danswer.connectors.google_drive.connector_auth import get_auth_url
from danswer.connectors.google_drive.connector_auth import get_drive_tokens
@ -51,6 +52,7 @@ from danswer.server.models import ConnectorIndexingStatus
from danswer.server.models import ConnectorSnapshot
from danswer.server.models import CredentialBase
from danswer.server.models import CredentialSnapshot
from danswer.server.models import FileUploadResponse
from danswer.server.models import GDriveCallback
from danswer.server.models import GoogleAppCredentials
from danswer.server.models import IndexAttemptSnapshot
@ -65,6 +67,7 @@ from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from fastapi_users.db import SQLAlchemyUserDatabase
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
@ -153,6 +156,22 @@ def admin_google_drive_auth(
return AuthUrl(auth_url=get_auth_url(credential_id=int(credential_id)))
@router.post("/admin/connector/file/upload")
def upload_files(
files: list[UploadFile], _: User = Depends(current_admin_user)
) -> FileUploadResponse:
for file in files:
if not file.filename:
raise HTTPException(status_code=400, detail="File name cannot be empty")
try:
file_paths = write_temp_files(
[(cast(str, file.filename), file.file) for file in files]
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return FileUploadResponse(file_paths=file_paths)
@router.get("/admin/latest-index-attempt")
def list_all_index_attempts(
_: User = Depends(current_admin_user),
@ -344,9 +363,9 @@ def connector_run_once(
run_info.connector_id, db_session
)
except ValueError:
return StatusResponse(
success=False,
message=f"Connector by id {connector_id} does not exist.",
raise HTTPException(
status_code=404,
detail=f"Connector by id {connector_id} does not exist.",
)
if not specified_credential_ids:
@ -355,15 +374,15 @@ def connector_run_once(
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
return StatusResponse(
success=False,
message=f"Not all specified credentials are associated with connector",
raise HTTPException(
status_code=400,
detail="Not all specified credentials are associated with connector",
)
if not credential_ids:
return StatusResponse(
success=False,
message=f"Connector has no valid credentials, cannot create index attempts.",
raise HTTPException(
status_code=400,
detail="Connector has no valid credentials, cannot create index attempts.",
)
index_attempt_ids = [

View File

@ -49,6 +49,10 @@ class GoogleAppCredentials(BaseModel):
web: GoogleAppWebCredentials
class FileUploadResponse(BaseModel):
file_paths: list[str]
class HealthCheckResponse(BaseModel):
status: Literal["ok"]

View File

@ -22,6 +22,7 @@ pydantic==1.10.7
PyGithub==1.58.2
PyPDF2==3.0.1
pytest-playwright==0.3.2
python-multipart==0.0.6
qdrant-client==1.2.0
requests==2.28.2
rfc3986==1.5.0

16
backend/supervisord.conf Normal file
View File

@ -0,0 +1,16 @@
[supervisord]
nodaemon=true
logfile=/dev/null
logfile_maxbytes=0
[program:indexing]
command=python danswer/background/update.py
stdout_logfile=/var/log/supervisor/update.log
redirect_stderr=true
autorestart=true
[program:file_deletion]
command=python danswer/background/file_deletion.py
stdout_logfile=/var/log/supervisor/file_deletion.log
redirect_stderr=true
autorestart=true

View File

@ -21,6 +21,7 @@ services:
- DISABLE_AUTH=True
volumes:
- local_dynamic_storage:/home/storage
- file_connector_tmp_storage:/home/file_connector_storage
background:
build:
context: ../backend
@ -34,8 +35,11 @@ services:
environment:
- POSTGRES_HOST=relational_db
- QDRANT_HOST=vector_db
- TYPESENSE_HOST=search_engine
- TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-local_dev_typesense}
volumes:
- local_dynamic_storage:/home/storage
- file_connector_tmp_storage:/home/file_connector_storage
web_server:
build:
context: ../web
@ -99,6 +103,7 @@ services:
&& while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\""
volumes:
local_dynamic_storage:
file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them
db_volume:
qdrant_volume:
typesense_volume:

View File

@ -18,6 +18,7 @@ services:
- TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-local_dev_typesense}
volumes:
- local_dynamic_storage:/home/storage
- file_connector_tmp_storage:/home/file_connector_storage
background:
build:
context: ../backend
@ -33,6 +34,7 @@ services:
- QDRANT_HOST=vector_db
volumes:
- local_dynamic_storage:/home/storage
- file_connector_tmp_storage:/home/file_connector_storage
web_server:
build:
context: ../web
@ -98,6 +100,7 @@ services:
entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"
volumes:
local_dynamic_storage:
file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them
db_volume:
qdrant_volume:
typesense_volume:

59
web/package-lock.json generated
View File

@ -22,6 +22,7 @@
"postcss": "^8.4.23",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-dropzone": "^14.2.3",
"react-icons": "^4.8.0",
"swr": "^2.1.5",
"tailwindcss": "^3.3.1",
@ -713,6 +714,14 @@
"resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz",
"integrity": "sha512-eBvWn1lvIApYMhzQMsu9ciLfkBY499mFZlNqG+/9WR7PVlroQw0vG30cOQQbaKz3sCEc44TAOu2ykzqXSNnwag=="
},
"node_modules/attr-accept": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.2.tgz",
"integrity": "sha512-7prDjvt9HmqiZ0cl5CRjtS84sEyhsHP2coDkaZKRKVfCDo9s7iw7ChVmar78Gu9pC4SoR/28wFu/G5JJhTnqEg==",
"engines": {
"node": ">=4"
}
},
"node_modules/autoprefixer": {
"version": "10.4.14",
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.14.tgz",
@ -1744,6 +1753,17 @@
"node": "^10.12.0 || >=12.0.0"
}
},
"node_modules/file-selector": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/file-selector/-/file-selector-0.6.0.tgz",
"integrity": "sha512-QlZ5yJC0VxHxQQsQhXvBaC7VRJ2uaxTf+Tfpu4Z/OcVQJVpZO+DGU0rkoVW5ce2SccxugvpBJoMvUs59iILYdw==",
"dependencies": {
"tslib": "^2.4.0"
},
"engines": {
"node": ">= 12"
}
},
"node_modules/fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -3267,6 +3287,22 @@
"react": "^18.2.0"
}
},
"node_modules/react-dropzone": {
"version": "14.2.3",
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.2.3.tgz",
"integrity": "sha512-O3om8I+PkFKbxCukfIR3QAGftYXDZfOE2N1mr/7qebQJHs7U+/RSL/9xomJNpRg9kM5h9soQSdf0Gc7OHF5Fug==",
"dependencies": {
"attr-accept": "^2.2.2",
"file-selector": "^0.6.0",
"prop-types": "^15.8.1"
},
"engines": {
"node": ">= 10.13"
},
"peerDependencies": {
"react": ">= 16.8 || 18.0.0"
}
},
"node_modules/react-fast-compare": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz",
@ -4562,6 +4598,11 @@
"resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz",
"integrity": "sha512-eBvWn1lvIApYMhzQMsu9ciLfkBY499mFZlNqG+/9WR7PVlroQw0vG30cOQQbaKz3sCEc44TAOu2ykzqXSNnwag=="
},
"attr-accept": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.2.tgz",
"integrity": "sha512-7prDjvt9HmqiZ0cl5CRjtS84sEyhsHP2coDkaZKRKVfCDo9s7iw7ChVmar78Gu9pC4SoR/28wFu/G5JJhTnqEg=="
},
"autoprefixer": {
"version": "10.4.14",
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.14.tgz",
@ -5311,6 +5352,14 @@
"flat-cache": "^3.0.4"
}
},
"file-selector": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/file-selector/-/file-selector-0.6.0.tgz",
"integrity": "sha512-QlZ5yJC0VxHxQQsQhXvBaC7VRJ2uaxTf+Tfpu4Z/OcVQJVpZO+DGU0rkoVW5ce2SccxugvpBJoMvUs59iILYdw==",
"requires": {
"tslib": "^2.4.0"
}
},
"fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -6315,6 +6364,16 @@
"scheduler": "^0.23.0"
}
},
"react-dropzone": {
"version": "14.2.3",
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.2.3.tgz",
"integrity": "sha512-O3om8I+PkFKbxCukfIR3QAGftYXDZfOE2N1mr/7qebQJHs7U+/RSL/9xomJNpRg9kM5h9soQSdf0Gc7OHF5Fug==",
"requires": {
"attr-accept": "^2.2.2",
"file-selector": "^0.6.0",
"prop-types": "^15.8.1"
}
},
"react-fast-compare": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz",

View File

@ -23,6 +23,7 @@
"postcss": "^8.4.23",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-dropzone": "^14.2.3",
"react-icons": "^4.8.0",
"swr": "^2.1.5",
"tailwindcss": "^3.3.1",

View File

@ -0,0 +1,58 @@
// components/FileUpload.tsx
import { ChangeEvent, FC, useState } from "react";
import React from "react";
import Dropzone from "react-dropzone";
interface FileUploadProps {
selectedFiles: File[];
setSelectedFiles: (files: File[]) => void;
}
export const FileUpload: FC<FileUploadProps> = ({
selectedFiles,
setSelectedFiles,
}) => {
const [dragActive, setDragActive] = useState(false);
return (
<div>
<Dropzone
onDrop={(acceptedFiles) => {
setSelectedFiles(acceptedFiles);
setDragActive(false);
}}
onDragLeave={() => setDragActive(false)}
onDragEnter={() => setDragActive(true)}
>
{({ getRootProps, getInputProps }) => (
<section>
<div
{...getRootProps()}
className={
"flex flex-col items-center w-full px-4 py-12 rounded " +
"shadow-lg tracking-wide border border-gray-700 cursor-pointer" +
(dragActive ? " border-blue-500" : "")
}
>
<input {...getInputProps()} />
<b>Drag and drop some files here, or click to select files</b>
</div>
</section>
)}
</Dropzone>
{selectedFiles.length > 0 && (
<div className="mt-4">
<h2 className="font-bold">Selected Files</h2>
<ul>
{selectedFiles.map((file) => (
<div key={file.name} className="flex">
<p className="text-sm mr-2">{file.name}</p>
</div>
))}
</ul>
</div>
)}
</div>
);
};

View File

@ -0,0 +1,262 @@
"use client";
import useSWR, { useSWRConfig } from "swr";
import { FileIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { ConnectorIndexingStatus, FileConfig } from "@/lib/types";
import { linkCredential } from "@/lib/credential";
import { FileUpload } from "./FileUpload";
import { useState } from "react";
import { Button } from "@/components/Button";
import { Popup, PopupSpec } from "@/components/admin/connectors/Popup";
import { createConnector, runConnector } from "@/lib/connector";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { CheckCircle, XCircle } from "@phosphor-icons/react";
import { Spinner } from "@/components/Spinner";
const COLUMNS = [
{ header: "File names", key: "fileNames" },
{ header: "Status", key: "status" },
];
const getNameFromPath = (path: string) => {
const pathParts = path.split("/");
return pathParts[pathParts.length - 1];
};
export default function File() {
const [selectedFiles, setSelectedFiles] = useState<File[]>([]);
const [filesAreUploading, setFilesAreUploading] = useState<boolean>(false);
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
const setPopupWithExpiration = (popupSpec: PopupSpec | null) => {
setPopup(popupSpec);
setTimeout(() => {
setPopup(null);
}, 4000);
};
const { mutate } = useSWRConfig();
const { data: connectorIndexingStatuses } = useSWR<
ConnectorIndexingStatus<any>[]
>("/api/manage/admin/connector/indexing-status", fetcher);
const fileIndexingStatuses: ConnectorIndexingStatus<FileConfig>[] =
connectorIndexingStatuses?.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "file"
) ?? [];
const inProgressFileIndexingStatuses =
fileIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.last_status === "in_progress" ||
connectorIndexingStatus.last_status === "not_started"
) ?? [];
const successfulFileIndexingStatuses = fileIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.last_status === "success"
);
const failedFileIndexingStatuses = fileIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.last_status === "failed"
);
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b pb-2 mb-4 flex">
<FileIcon size="32" />
<h1 className="text-3xl font-bold pl-2">File</h1>
</div>
{popup && <Popup message={popup.message} type={popup.type} />}
{filesAreUploading && <Spinner />}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">Upload Files</h2>
<p className="text-sm mb-2">
Specify files below, click the <b>Upload</b> button, and the contents of
these files will be searchable via Danswer!
</p>
<div className="flex">
<div className="mx-auto max-w-3xl w-full">
<FileUpload
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
<Button
className="mt-4 w-48"
fullWidth
disabled={selectedFiles.length === 0}
onClick={async () => {
const uploadCreateAndTriggerConnector = async () => {
const formData = new FormData();
selectedFiles.forEach((file) => {
formData.append("files", file);
});
const response = await fetch(
"/api/manage/admin/connector/file/upload",
{ method: "POST", body: formData }
);
const responseJson = await response.json();
if (!response.ok) {
setPopupWithExpiration({
message: `Unable to upload files - ${responseJson.detail}`,
type: "error",
});
return;
}
const filePaths = responseJson.file_paths as string[];
const [connectorErrorMsg, connector] =
await createConnector<FileConfig>({
name: "FileConnector-" + Date.now(),
source: "file",
input_type: "load_state",
connector_specific_config: {
file_locations: filePaths,
},
refresh_freq: null,
disabled: false,
});
if (connectorErrorMsg || !connector) {
setPopupWithExpiration({
message: `Unable to create connector - ${connectorErrorMsg}`,
type: "error",
});
return;
}
const credentialResponse = await linkCredential(
connector.id,
0
);
if (credentialResponse.detail) {
setPopupWithExpiration({
message: `Unable to link connector to credential - ${credentialResponse.detail}`,
type: "error",
});
return;
}
const runConnectorErrorMsg = await runConnector(connector.id, [
0,
]);
if (runConnectorErrorMsg) {
setPopupWithExpiration({
message: `Unable to run connector - ${runConnectorErrorMsg}`,
type: "error",
});
return;
}
mutate("/api/manage/admin/connector/indexing-status");
setSelectedFiles([]);
setPopupWithExpiration({
type: "success",
message: "Successfully uploaded files!",
});
};
setFilesAreUploading(true);
try {
await uploadCreateAndTriggerConnector();
} catch (e) {
console.log("Failed to index filels: ", e);
}
setFilesAreUploading(false);
}}
>
Upload!
</Button>
</div>
</div>
{inProgressFileIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
In Progress File Indexing
</h2>
<BasicTable
columns={COLUMNS}
data={inProgressFileIndexingStatuses.map(
(connectorIndexingStatus) => {
return {
fileNames:
connectorIndexingStatus.connector.connector_specific_config.file_locations
.map(getNameFromPath)
.join(", "),
status: "In Progress",
};
}
)}
/>
</>
)}
{successfulFileIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Successful File Indexing
</h2>
<BasicTable
columns={COLUMNS}
data={successfulFileIndexingStatuses.map(
(connectorIndexingStatus) => {
return {
fileNames:
connectorIndexingStatus.connector.connector_specific_config.file_locations
.map(getNameFromPath)
.join(", "),
status: (
<div className="text-emerald-600 flex">
<CheckCircle className="my-auto mr-1" size="18" /> Success
</div>
),
};
}
)}
/>
</>
)}
{failedFileIndexingStatuses.length > 0 && (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Failed File Indexing
</h2>
<p className="text-sm mb-3">
The following files failed to be indexed. Please contact an
administrator to resolve this issue.
</p>
<BasicTable
columns={COLUMNS}
data={failedFileIndexingStatuses.map((connectorIndexingStatus) => {
return {
fileNames:
connectorIndexingStatus.connector.connector_specific_config.file_locations
.map(getNameFromPath)
.join(", "),
status: (
<div className="text-red-600 flex">
<XCircle className="my-auto mr-1" size="18" /> Failed
</div>
),
};
})}
/>
</>
)}
</div>
);
}

View File

@ -8,6 +8,7 @@ import {
SlackIcon,
KeyIcon,
ConfluenceIcon,
FileIcon,
} from "@/components/icons/icons";
import { DISABLE_AUTH } from "@/lib/constants";
import { getCurrentUserSS } from "@/lib/userSS";
@ -62,15 +63,6 @@ export default async function AdminLayout({
),
link: "/admin/connectors/slack",
},
{
name: (
<div className="flex">
<GlobeIcon size="16" />
<div className="ml-1">Web</div>
</div>
),
link: "/admin/connectors/web",
},
{
name: (
<div className="flex">
@ -98,6 +90,24 @@ export default async function AdminLayout({
),
link: "/admin/connectors/confluence",
},
{
name: (
<div className="flex">
<GlobeIcon size="16" />
<div className="ml-1">Web</div>
</div>
),
link: "/admin/connectors/web",
},
{
name: (
<div className="flex">
<FileIcon size="16" />
<div className="ml-1">File</div>
</div>
),
link: "/admin/connectors/file",
},
],
},
{

View File

@ -3,6 +3,7 @@ interface Props {
children: JSX.Element | string;
disabled?: boolean;
fullWidth?: boolean;
className?: string;
}
export const Button = ({
@ -10,6 +11,7 @@ export const Button = ({
children,
disabled = false,
fullWidth = false,
className = "",
}: Props) => {
return (
<button
@ -17,9 +19,11 @@ export const Button = ({
"group relative " +
(fullWidth ? "w-full " : "") +
"py-1 px-2 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-800 " +
"hover:bg-red-900 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
"font-medium rounded-md text-white " +
"focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto " +
(disabled ? "bg-gray-700 " : "bg-red-800 hover:bg-red-900 ") +
className
}
onClick={onClick}
disabled={disabled}

View File

@ -0,0 +1,9 @@
import "./spinner.css";
export const Spinner = () => {
return (
<div className="fixed top-0 left-0 z-50 w-screen h-screen bg-black bg-opacity-50 flex items-center justify-center">
<div className="loader ease-linear rounded-full border-8 border-t-8 border-gray-200 h-8 w-8"></div>
</div>
);
};

View File

@ -11,7 +11,7 @@ import {
Plug,
} from "@phosphor-icons/react";
import { SiConfluence, SiGithub, SiGoogledrive, SiSlack } from "react-icons/si";
import { FaGlobe } from "react-icons/fa";
import { FaFile, FaGlobe } from "react-icons/fa";
interface IconProps {
size?: string;
@ -76,6 +76,13 @@ export const GlobeIcon = ({
return <FaGlobe size={size} className={className} />;
};
export const FileIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <FaFile size={size} className={className} />;
};
export const SlackIcon = ({
size = "16",
className = defaultTailwindCSS,

View File

@ -10,6 +10,7 @@ const sources: Source[] = [
{ displayName: "Confluence", internalName: "confluence" },
{ displayName: "Github PRs", internalName: "github" },
{ displayName: "Web", internalName: "web" },
{ displayName: "File", internalName: "file" },
];
interface SourceSelectorProps {

View File

@ -131,7 +131,10 @@ export const SearchResultsDisplay: React.FC<SearchResultsDisplayProps> = ({
className="text-sm border-b border-gray-800 mb-3"
>
<a
className="rounded-lg flex font-bold"
className={
"rounded-lg flex font-bold " +
(doc.link ? "" : "pointer-events-none")
}
href={doc.link}
target="_blank"
rel="noopener noreferrer"

View File

@ -1,6 +1,7 @@
import { ValidSources } from "@/lib/types";
import {
ConfluenceIcon,
FileIcon,
GithubIcon,
GlobeIcon,
GoogleDriveIcon,
@ -21,6 +22,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "Web",
adminPageLink: "/admin/connectors/web",
};
case "file":
return {
icon: FileIcon,
displayName: "File",
adminPageLink: "/admin/connectors/file",
};
case "slack":
return {
icon: SlackIcon,

View File

@ -0,0 +1,23 @@
.loader {
border-top-color: #2876aa;
-webkit-animation: spinner 1.5s linear infinite;
animation: spinner 1.5s linear infinite;
}
@-webkit-keyframes spinner {
0% {
-webkit-transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
}
}
@keyframes spinner {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}

View File

@ -1,8 +1,18 @@
import { Connector, ConnectorBase } from "./types";
async function handleResponse(
response: Response
): Promise<[string | null, any]> {
const responseJson = await response.json();
if (response.ok) {
return [null, responseJson];
}
return [responseJson.detail, null];
}
export async function createConnector<T>(
connector: ConnectorBase<T>
): Promise<Connector<T>> {
): Promise<[string | null, Connector<T> | null]> {
const response = await fetch(`/api/manage/admin/connector`, {
method: "POST",
headers: {
@ -10,7 +20,7 @@ export async function createConnector<T>(
},
body: JSON.stringify(connector),
});
return response.json();
return handleResponse(response);
}
export async function updateConnector<T>(
@ -23,7 +33,7 @@ export async function updateConnector<T>(
},
body: JSON.stringify(connector),
});
return response.json();
return await response.json();
}
export async function deleteConnector<T>(
@ -35,5 +45,20 @@ export async function deleteConnector<T>(
"Content-Type": "application/json",
},
});
return response.json();
return await response.json();
}
export async function runConnector(
connectorId: number,
credentialIds: number[] | null = null
): Promise<string | null> {
const response = await fetch("/api/manage/admin/connector/run-once", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ connector_id: connectorId, credentialIds }),
});
if (!response.ok) {
return (await response.json()).detail;
}
return null;
}

View File

@ -8,7 +8,7 @@ export async function deleteCredential<T>(credentialId: number) {
return response.json();
}
export async function linkCredential<T>(
export async function linkCredential(
connectorId: number,
credentialId: number
) {

View File

@ -12,7 +12,8 @@ export type ValidSources =
| "github"
| "slack"
| "google_drive"
| "confluence";
| "confluence"
| "file";
export type ValidInputTypes = "load_state" | "poll" | "event";
// CONNECTORS
@ -21,7 +22,7 @@ export interface ConnectorBase<T> {
input_type: ValidInputTypes;
source: ValidSources;
connector_specific_config: T;
refresh_freq: number;
refresh_freq: number | null;
disabled: boolean;
}
@ -49,6 +50,10 @@ export interface SlackConfig {
workspace: string;
}
export interface FileConfig {
file_locations: string[];
}
export interface ConnectorIndexingStatus<T> {
connector: Connector<T>;
public_doc: boolean;