From f20563c9bcc7b8ba5116cf4facbcc7a20c81d1d0 Mon Sep 17 00:00:00 2001 From: Chris Weaver <25087905+Weves@users.noreply.github.com> Date: Fri, 9 Jun 2023 21:28:50 -0700 Subject: [PATCH] File connector (#93) * Initial backend changes for file connector * Add another background job to clean up files * UI + tweaks for backend --- backend/Dockerfile.background | 4 +- backend/danswer/background/file_deletion.py | 6 + backend/danswer/background/run_all.sh | 11 + backend/danswer/background/update.py | 7 +- backend/danswer/background/utils.py | 21 ++ backend/danswer/configs/app_configs.py | 4 +- backend/danswer/configs/constants.py | 1 + backend/danswer/connectors/factory.py | 2 + backend/danswer/connectors/file/connector.py | 103 +++++++ backend/danswer/connectors/file/utils.py | 65 +++++ backend/danswer/db/index_attempt.py | 6 +- backend/danswer/server/manage.py | 37 ++- backend/danswer/server/models.py | 4 + backend/requirements/default.txt | 1 + backend/supervisord.conf | 16 ++ deployment/docker-compose.dev.yml | 5 + deployment/docker-compose.prod.yml | 3 + web/package-lock.json | 59 ++++ web/package.json | 1 + .../app/admin/connectors/file/FileUpload.tsx | 58 ++++ web/src/app/admin/connectors/file/page.tsx | 262 ++++++++++++++++++ web/src/app/admin/layout.tsx | 28 +- web/src/components/Button.tsx | 10 +- web/src/components/Spinner.tsx | 9 + web/src/components/icons/icons.tsx | 9 +- web/src/components/search/Filters.tsx | 1 + .../search/SearchResultsDisplay.tsx | 5 +- web/src/components/source.tsx | 7 + web/src/components/spinner.css | 23 ++ web/src/lib/connector.ts | 33 ++- web/src/lib/credential.ts | 2 +- web/src/lib/types.ts | 9 +- 32 files changed, 774 insertions(+), 38 deletions(-) create mode 100644 backend/danswer/background/file_deletion.py create mode 100644 backend/danswer/background/run_all.sh create mode 100644 backend/danswer/background/utils.py create mode 100644 backend/danswer/connectors/file/connector.py create mode 100644 backend/danswer/connectors/file/utils.py create mode 100644 backend/supervisord.conf create mode 100644 web/src/app/admin/connectors/file/FileUpload.tsx create mode 100644 web/src/app/admin/connectors/file/page.tsx create mode 100644 web/src/components/Spinner.tsx create mode 100644 web/src/components/spinner.css diff --git a/backend/Dockerfile.background b/backend/Dockerfile.background index ebd28293a..5cc26d903 100644 --- a/backend/Dockerfile.background +++ b/backend/Dockerfile.background @@ -3,6 +3,7 @@ FROM python:3.11-slim-bullseye RUN apt-get update \ && apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \ libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \ + supervisor \ && rm -rf /var/lib/apt/lists/* COPY ./requirements/default.txt /tmp/requirements.txt @@ -12,6 +13,7 @@ RUN playwright install-deps WORKDIR /app COPY ./danswer /app/danswer +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf ENV PYTHONPATH /app -CMD ["python3", "danswer/background/update.py"] +CMD ["/usr/bin/supervisord"] diff --git a/backend/danswer/background/file_deletion.py b/backend/danswer/background/file_deletion.py new file mode 100644 index 000000000..fe050d940 --- /dev/null +++ b/backend/danswer/background/file_deletion.py @@ -0,0 +1,6 @@ +from danswer.background.utils import interval_run_job +from danswer.connectors.file.utils import clean_old_temp_files + + +if __name__ == "__main__": + interval_run_job(clean_old_temp_files, 30 * 60) # run every 30 minutes diff --git a/backend/danswer/background/run_all.sh b/backend/danswer/background/run_all.sh new file mode 100644 index 000000000..67728604b --- /dev/null +++ b/backend/danswer/background/run_all.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +python danswer/background/update.py & + +python danswer/background/file_deletion.py & + +# Wait for any process to exit +wait -n + +# Exit with status of process that exited first +exit $? diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 23f2d4bf1..fc42f1dc7 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -10,7 +10,7 @@ from danswer.db.credentials import backend_update_credential_json from danswer.db.engine import build_engine from danswer.db.engine import get_db_current_time from danswer.db.index_attempt import create_index_attempt -from danswer.db.index_attempt import get_incomplete_index_attempts +from danswer.db.index_attempt import get_inprogress_index_attempts from danswer.db.index_attempt import get_last_finished_attempt from danswer.db.index_attempt import get_not_started_index_attempts from danswer.db.index_attempt import mark_attempt_failed @@ -42,7 +42,7 @@ def should_create_new_indexing( def create_indexing_jobs(db_session: Session) -> None: connectors = fetch_connectors(db_session, disabled_status=False) for connector in connectors: - in_progress_indexing_attempts = get_incomplete_index_attempts( + in_progress_indexing_attempts = get_inprogress_index_attempts( connector.id, db_session ) if in_progress_indexing_attempts: @@ -50,6 +50,9 @@ def create_indexing_jobs(db_session: Session) -> None: # Currently single threaded so any still in-progress must have errored for attempt in in_progress_indexing_attempts: + logger.warning( + f"Marking in-progress attempt 'connector: {attempt.connector_id}, credential: {attempt.credential_id}' as failed" + ) mark_attempt_failed(attempt, db_session) last_finished_indexing_attempt = get_last_finished_attempt( diff --git a/backend/danswer/background/utils.py b/backend/danswer/background/utils.py new file mode 100644 index 000000000..9a3079b32 --- /dev/null +++ b/backend/danswer/background/utils.py @@ -0,0 +1,21 @@ +import time +from collections.abc import Callable +from typing import Any + +from danswer.utils.logging import setup_logger + + +logger = setup_logger() + + +def interval_run_job(job: Callable[[], Any], delay: int | float) -> None: + while True: + start = time.time() + logger.info(f"Running '{job.__name__}', current time: {time.ctime(start)}") + try: + job() + except Exception as e: + logger.exception(f"Failed to run update due to {e}") + sleep_time = delay - (time.time() - start) + if sleep_time > 0: + time.sleep(sleep_time) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 21cee4b75..68513f2e8 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -83,7 +83,9 @@ POSTGRES_DB = os.environ.get("POSTGRES_DB", "postgres") # Connector Configs ##### GOOGLE_DRIVE_INCLUDE_SHARED = False - +FILE_CONNECTOR_TMP_STORAGE_PATH = os.environ.get( + "FILE_CONNECTOR_TMP_STORAGE_PATH", "/home/file_connector_storage" +) ##### # Query Configs diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 0c448ea1f..60540be55 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -22,3 +22,4 @@ class DocumentSource(str, Enum): GOOGLE_DRIVE = "google_drive" GITHUB = "github" CONFLUENCE = "confluence" + FILE = "file" diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 447555a0a..f6d30a5db 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -3,6 +3,7 @@ from typing import Type from danswer.configs.constants import DocumentSource from danswer.connectors.confluence.connector import ConfluenceConnector +from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.google_drive.connector import GoogleDriveConnector from danswer.connectors.interfaces import BaseConnector @@ -27,6 +28,7 @@ def identify_connector_class( ) -> Type[BaseConnector]: connector_map = { DocumentSource.WEB: WebConnector, + DocumentSource.FILE: LocalFileConnector, DocumentSource.SLACK: { InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py new file mode 100644 index 000000000..bc8bd1c5a --- /dev/null +++ b/backend/danswer/connectors/file/connector.py @@ -0,0 +1,103 @@ +import json +import os +import zipfile +from collections.abc import Generator +from enum import Enum +from pathlib import Path +from typing import Any +from typing import IO + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.file.utils import check_file_ext_is_valid +from danswer.connectors.file.utils import get_file_ext +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.utils.logging import setup_logger + + +logger = setup_logger() + +_METADATA_FLAG = "#DANSWER_METADATA=" + + +def _get_files_from_zip( + zip_location: str | Path, +) -> Generator[tuple[str, IO[Any]], None, None]: + with zipfile.ZipFile(zip_location, "r") as zip_file: + for file_name in zip_file.namelist(): + with zip_file.open(file_name, "r") as file: + yield os.path.basename(file_name), file + + +def _open_files_at_location( + file_path: str | Path, +) -> Generator[tuple[str, IO[Any]], Any, None]: + extension = get_file_ext(file_path) + + if extension == ".zip": + yield from _get_files_from_zip(file_path) + elif extension == ".txt": + with open(file_path, "r") as file: + yield os.path.basename(file_path), file + else: + logger.warning(f"Skipping file '{file_path}' with extension '{extension}'") + + +def _process_file(file_name: str, file: IO[Any]) -> list[Document]: + extension = get_file_ext(file_name) + if not check_file_ext_is_valid(extension): + logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") + return [] + + metadata = {} + file_content_raw = "" + for ind, line in enumerate(file): + if isinstance(line, bytes): + line = line.decode("utf-8") + line = str(line) + + if ind == 0 and line.startswith(_METADATA_FLAG): + metadata = json.loads(line.replace(_METADATA_FLAG, "", 1).strip()) + else: + file_content_raw += line + + return [ + Document( + id=file_name, + sections=[Section(link=metadata.get("link", ""), text=file_content_raw)], + source=DocumentSource.FILE, + semantic_identifier=file_name, + metadata={}, + ) + ] + + +class LocalFileConnector(LoadConnector): + def __init__( + self, + file_locations: list[Path | str], + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.file_locations = [Path(file_location) for file_location in file_locations] + self.batch_size = batch_size + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + pass + + def load_from_state(self) -> GenerateDocumentsOutput: + documents: list[Document] = [] + for file_location in self.file_locations: + files = _open_files_at_location(file_location) + + for file_name, file in files: + documents.extend(_process_file(file_name, file)) + + if len(documents) >= self.batch_size: + yield documents + documents = [] + + if documents: + yield documents diff --git a/backend/danswer/connectors/file/utils.py b/backend/danswer/connectors/file/utils.py new file mode 100644 index 000000000..df5a2e1d9 --- /dev/null +++ b/backend/danswer/connectors/file/utils.py @@ -0,0 +1,65 @@ +import os +import shutil +import time +import uuid +from pathlib import Path +from typing import Any +from typing import IO + +from danswer.configs.app_configs import FILE_CONNECTOR_TMP_STORAGE_PATH + +_FILE_AGE_CLEANUP_THRESHOLD_HOURS = 24 * 7 # 1 week +_VALID_FILE_EXTENSIONS = [".txt", ".zip"] + + +def get_file_ext(file_path_or_name: str | Path) -> str: + _, extension = os.path.splitext(file_path_or_name) + return extension + + +def check_file_ext_is_valid(ext: str) -> bool: + return ext in _VALID_FILE_EXTENSIONS + + +def write_temp_files( + files: list[tuple[str, IO[Any]]], + base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH, +) -> list[str]: + """Writes temporary files to disk and returns their paths + + NOTE: need to pass in (file_name, File) tuples since FastAPI's `UploadFile` class + exposed SpooledTemporaryFile does not include a name. + """ + file_location = Path(base_path) / str(uuid.uuid4()) + os.makedirs(file_location, exist_ok=True) + + file_paths: list[str] = [] + for file_name, file in files: + extension = get_file_ext(file_name) + if not check_file_ext_is_valid(extension): + raise ValueError( + f"Invalid file extension for file: '{file_name}'. Must be one of {_VALID_FILE_EXTENSIONS}" + ) + + file_path = file_location / file_name + with open(file_path, "wb") as buffer: + # copy file content from uploaded file to the newly created file + shutil.copyfileobj(file, buffer) + + file_paths.append(str(file_path.absolute())) + + return file_paths + + +def file_age_in_hours(filepath: str | Path) -> float: + return (time.time() - os.path.getmtime(filepath)) / (60 * 60) + + +def clean_old_temp_files( + age_threshold_in_hours: float | int = _FILE_AGE_CLEANUP_THRESHOLD_HOURS, + base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH, +) -> None: + os.makedirs(base_path, exist_ok=True) + for file in os.listdir(base_path): + if file_age_in_hours(file) > age_threshold_in_hours: + os.remove(Path(base_path) / file) diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 81ed5c565..03726087a 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -25,16 +25,14 @@ def create_index_attempt( return new_attempt.id -def get_incomplete_index_attempts( +def get_inprogress_index_attempts( connector_id: int | None, db_session: Session, ) -> list[IndexAttempt]: stmt = select(IndexAttempt) if connector_id is not None: stmt = stmt.where(IndexAttempt.connector_id == connector_id) - stmt = stmt.where( - IndexAttempt.status.notin_([IndexingStatus.SUCCESS, IndexingStatus.FAILED]) - ) + stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS) incomplete_attempts = db_session.scalars(stmt) return list(incomplete_attempts.all()) diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index 03188246d..d4548d3ee 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -7,6 +7,7 @@ from danswer.auth.users import current_user from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX from danswer.configs.constants import DocumentSource from danswer.configs.constants import OPENAI_API_KEY_STORAGE_KEY +from danswer.connectors.file.utils import write_temp_files from danswer.connectors.google_drive.connector_auth import DB_CREDENTIALS_DICT_KEY from danswer.connectors.google_drive.connector_auth import get_auth_url from danswer.connectors.google_drive.connector_auth import get_drive_tokens @@ -51,6 +52,7 @@ from danswer.server.models import ConnectorIndexingStatus from danswer.server.models import ConnectorSnapshot from danswer.server.models import CredentialBase from danswer.server.models import CredentialSnapshot +from danswer.server.models import FileUploadResponse from danswer.server.models import GDriveCallback from danswer.server.models import GoogleAppCredentials from danswer.server.models import IndexAttemptSnapshot @@ -65,6 +67,7 @@ from fastapi import Depends from fastapi import HTTPException from fastapi import Request from fastapi import Response +from fastapi import UploadFile from fastapi_users.db import SQLAlchemyUserDatabase from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Session @@ -153,6 +156,22 @@ def admin_google_drive_auth( return AuthUrl(auth_url=get_auth_url(credential_id=int(credential_id))) +@router.post("/admin/connector/file/upload") +def upload_files( + files: list[UploadFile], _: User = Depends(current_admin_user) +) -> FileUploadResponse: + for file in files: + if not file.filename: + raise HTTPException(status_code=400, detail="File name cannot be empty") + try: + file_paths = write_temp_files( + [(cast(str, file.filename), file.file) for file in files] + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + return FileUploadResponse(file_paths=file_paths) + + @router.get("/admin/latest-index-attempt") def list_all_index_attempts( _: User = Depends(current_admin_user), @@ -344,9 +363,9 @@ def connector_run_once( run_info.connector_id, db_session ) except ValueError: - return StatusResponse( - success=False, - message=f"Connector by id {connector_id} does not exist.", + raise HTTPException( + status_code=404, + detail=f"Connector by id {connector_id} does not exist.", ) if not specified_credential_ids: @@ -355,15 +374,15 @@ def connector_run_once( if set(specified_credential_ids).issubset(set(possible_credential_ids)): credential_ids = specified_credential_ids else: - return StatusResponse( - success=False, - message=f"Not all specified credentials are associated with connector", + raise HTTPException( + status_code=400, + detail="Not all specified credentials are associated with connector", ) if not credential_ids: - return StatusResponse( - success=False, - message=f"Connector has no valid credentials, cannot create index attempts.", + raise HTTPException( + status_code=400, + detail="Connector has no valid credentials, cannot create index attempts.", ) index_attempt_ids = [ diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 3e19bd49e..265c5800a 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -49,6 +49,10 @@ class GoogleAppCredentials(BaseModel): web: GoogleAppWebCredentials +class FileUploadResponse(BaseModel): + file_paths: list[str] + + class HealthCheckResponse(BaseModel): status: Literal["ok"] diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index f71e542ec..9896af788 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -22,6 +22,7 @@ pydantic==1.10.7 PyGithub==1.58.2 PyPDF2==3.0.1 pytest-playwright==0.3.2 +python-multipart==0.0.6 qdrant-client==1.2.0 requests==2.28.2 rfc3986==1.5.0 diff --git a/backend/supervisord.conf b/backend/supervisord.conf new file mode 100644 index 000000000..0ae893fe0 --- /dev/null +++ b/backend/supervisord.conf @@ -0,0 +1,16 @@ +[supervisord] +nodaemon=true +logfile=/dev/null +logfile_maxbytes=0 + +[program:indexing] +command=python danswer/background/update.py +stdout_logfile=/var/log/supervisor/update.log +redirect_stderr=true +autorestart=true + +[program:file_deletion] +command=python danswer/background/file_deletion.py +stdout_logfile=/var/log/supervisor/file_deletion.log +redirect_stderr=true +autorestart=true diff --git a/deployment/docker-compose.dev.yml b/deployment/docker-compose.dev.yml index 123eb878e..c48e5cfd9 100644 --- a/deployment/docker-compose.dev.yml +++ b/deployment/docker-compose.dev.yml @@ -21,6 +21,7 @@ services: - DISABLE_AUTH=True volumes: - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage background: build: context: ../backend @@ -34,8 +35,11 @@ services: environment: - POSTGRES_HOST=relational_db - QDRANT_HOST=vector_db + - TYPESENSE_HOST=search_engine + - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-local_dev_typesense} volumes: - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage web_server: build: context: ../web @@ -99,6 +103,7 @@ services: && while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\"" volumes: local_dynamic_storage: + file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: qdrant_volume: typesense_volume: diff --git a/deployment/docker-compose.prod.yml b/deployment/docker-compose.prod.yml index 1c499c33e..41ddf5383 100644 --- a/deployment/docker-compose.prod.yml +++ b/deployment/docker-compose.prod.yml @@ -18,6 +18,7 @@ services: - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-local_dev_typesense} volumes: - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage background: build: context: ../backend @@ -33,6 +34,7 @@ services: - QDRANT_HOST=vector_db volumes: - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage web_server: build: context: ../web @@ -98,6 +100,7 @@ services: entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'" volumes: local_dynamic_storage: + file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: qdrant_volume: typesense_volume: diff --git a/web/package-lock.json b/web/package-lock.json index f3e18d026..50265188b 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -22,6 +22,7 @@ "postcss": "^8.4.23", "react": "^18.2.0", "react-dom": "^18.2.0", + "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "swr": "^2.1.5", "tailwindcss": "^3.3.1", @@ -713,6 +714,14 @@ "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", "integrity": "sha512-eBvWn1lvIApYMhzQMsu9ciLfkBY499mFZlNqG+/9WR7PVlroQw0vG30cOQQbaKz3sCEc44TAOu2ykzqXSNnwag==" }, + "node_modules/attr-accept": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.2.tgz", + "integrity": "sha512-7prDjvt9HmqiZ0cl5CRjtS84sEyhsHP2coDkaZKRKVfCDo9s7iw7ChVmar78Gu9pC4SoR/28wFu/G5JJhTnqEg==", + "engines": { + "node": ">=4" + } + }, "node_modules/autoprefixer": { "version": "10.4.14", "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.14.tgz", @@ -1744,6 +1753,17 @@ "node": "^10.12.0 || >=12.0.0" } }, + "node_modules/file-selector": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/file-selector/-/file-selector-0.6.0.tgz", + "integrity": "sha512-QlZ5yJC0VxHxQQsQhXvBaC7VRJ2uaxTf+Tfpu4Z/OcVQJVpZO+DGU0rkoVW5ce2SccxugvpBJoMvUs59iILYdw==", + "dependencies": { + "tslib": "^2.4.0" + }, + "engines": { + "node": ">= 12" + } + }, "node_modules/fill-range": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", @@ -3267,6 +3287,22 @@ "react": "^18.2.0" } }, + "node_modules/react-dropzone": { + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.2.3.tgz", + "integrity": "sha512-O3om8I+PkFKbxCukfIR3QAGftYXDZfOE2N1mr/7qebQJHs7U+/RSL/9xomJNpRg9kM5h9soQSdf0Gc7OHF5Fug==", + "dependencies": { + "attr-accept": "^2.2.2", + "file-selector": "^0.6.0", + "prop-types": "^15.8.1" + }, + "engines": { + "node": ">= 10.13" + }, + "peerDependencies": { + "react": ">= 16.8 || 18.0.0" + } + }, "node_modules/react-fast-compare": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz", @@ -4562,6 +4598,11 @@ "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", "integrity": "sha512-eBvWn1lvIApYMhzQMsu9ciLfkBY499mFZlNqG+/9WR7PVlroQw0vG30cOQQbaKz3sCEc44TAOu2ykzqXSNnwag==" }, + "attr-accept": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.2.tgz", + "integrity": "sha512-7prDjvt9HmqiZ0cl5CRjtS84sEyhsHP2coDkaZKRKVfCDo9s7iw7ChVmar78Gu9pC4SoR/28wFu/G5JJhTnqEg==" + }, "autoprefixer": { "version": "10.4.14", "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.14.tgz", @@ -5311,6 +5352,14 @@ "flat-cache": "^3.0.4" } }, + "file-selector": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/file-selector/-/file-selector-0.6.0.tgz", + "integrity": "sha512-QlZ5yJC0VxHxQQsQhXvBaC7VRJ2uaxTf+Tfpu4Z/OcVQJVpZO+DGU0rkoVW5ce2SccxugvpBJoMvUs59iILYdw==", + "requires": { + "tslib": "^2.4.0" + } + }, "fill-range": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", @@ -6315,6 +6364,16 @@ "scheduler": "^0.23.0" } }, + "react-dropzone": { + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.2.3.tgz", + "integrity": "sha512-O3om8I+PkFKbxCukfIR3QAGftYXDZfOE2N1mr/7qebQJHs7U+/RSL/9xomJNpRg9kM5h9soQSdf0Gc7OHF5Fug==", + "requires": { + "attr-accept": "^2.2.2", + "file-selector": "^0.6.0", + "prop-types": "^15.8.1" + } + }, "react-fast-compare": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz", diff --git a/web/package.json b/web/package.json index d215334a1..099897503 100644 --- a/web/package.json +++ b/web/package.json @@ -23,6 +23,7 @@ "postcss": "^8.4.23", "react": "^18.2.0", "react-dom": "^18.2.0", + "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "swr": "^2.1.5", "tailwindcss": "^3.3.1", diff --git a/web/src/app/admin/connectors/file/FileUpload.tsx b/web/src/app/admin/connectors/file/FileUpload.tsx new file mode 100644 index 000000000..1d1f9fbd7 --- /dev/null +++ b/web/src/app/admin/connectors/file/FileUpload.tsx @@ -0,0 +1,58 @@ +// components/FileUpload.tsx +import { ChangeEvent, FC, useState } from "react"; +import React from "react"; +import Dropzone from "react-dropzone"; + +interface FileUploadProps { + selectedFiles: File[]; + setSelectedFiles: (files: File[]) => void; +} + +export const FileUpload: FC = ({ + selectedFiles, + setSelectedFiles, +}) => { + const [dragActive, setDragActive] = useState(false); + + return ( +
+ { + setSelectedFiles(acceptedFiles); + setDragActive(false); + }} + onDragLeave={() => setDragActive(false)} + onDragEnter={() => setDragActive(true)} + > + {({ getRootProps, getInputProps }) => ( +
+
+ + Drag and drop some files here, or click to select files +
+
+ )} +
+ + {selectedFiles.length > 0 && ( +
+

Selected Files

+
    + {selectedFiles.map((file) => ( +
    +

    {file.name}

    +
    + ))} +
+
+ )} +
+ ); +}; diff --git a/web/src/app/admin/connectors/file/page.tsx b/web/src/app/admin/connectors/file/page.tsx new file mode 100644 index 000000000..d88cdb87b --- /dev/null +++ b/web/src/app/admin/connectors/file/page.tsx @@ -0,0 +1,262 @@ +"use client"; + +import useSWR, { useSWRConfig } from "swr"; + +import { FileIcon } from "@/components/icons/icons"; +import { fetcher } from "@/lib/fetcher"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { ConnectorIndexingStatus, FileConfig } from "@/lib/types"; +import { linkCredential } from "@/lib/credential"; +import { FileUpload } from "./FileUpload"; +import { useState } from "react"; +import { Button } from "@/components/Button"; +import { Popup, PopupSpec } from "@/components/admin/connectors/Popup"; +import { createConnector, runConnector } from "@/lib/connector"; +import { BasicTable } from "@/components/admin/connectors/BasicTable"; +import { CheckCircle, XCircle } from "@phosphor-icons/react"; +import { Spinner } from "@/components/Spinner"; + +const COLUMNS = [ + { header: "File names", key: "fileNames" }, + { header: "Status", key: "status" }, +]; + +const getNameFromPath = (path: string) => { + const pathParts = path.split("/"); + return pathParts[pathParts.length - 1]; +}; + +export default function File() { + const [selectedFiles, setSelectedFiles] = useState([]); + const [filesAreUploading, setFilesAreUploading] = useState(false); + const [popup, setPopup] = useState<{ + message: string; + type: "success" | "error"; + } | null>(null); + const setPopupWithExpiration = (popupSpec: PopupSpec | null) => { + setPopup(popupSpec); + setTimeout(() => { + setPopup(null); + }, 4000); + }; + + const { mutate } = useSWRConfig(); + + const { data: connectorIndexingStatuses } = useSWR< + ConnectorIndexingStatus[] + >("/api/manage/admin/connector/indexing-status", fetcher); + + const fileIndexingStatuses: ConnectorIndexingStatus[] = + connectorIndexingStatuses?.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "file" + ) ?? []; + + const inProgressFileIndexingStatuses = + fileIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.last_status === "in_progress" || + connectorIndexingStatus.last_status === "not_started" + ) ?? []; + + const successfulFileIndexingStatuses = fileIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.last_status === "success" + ); + + const failedFileIndexingStatuses = fileIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.last_status === "failed" + ); + + return ( +
+
+ +
+
+ +

File

+
+ {popup && } + {filesAreUploading && } +

Upload Files

+

+ Specify files below, click the Upload button, and the contents of + these files will be searchable via Danswer! +

+
+
+ + + +
+
+ + {inProgressFileIndexingStatuses.length > 0 && ( + <> +

+ In Progress File Indexing +

+ { + return { + fileNames: + connectorIndexingStatus.connector.connector_specific_config.file_locations + .map(getNameFromPath) + .join(", "), + status: "In Progress", + }; + } + )} + /> + + )} + + {successfulFileIndexingStatuses.length > 0 && ( + <> +

+ Successful File Indexing +

+ { + return { + fileNames: + connectorIndexingStatus.connector.connector_specific_config.file_locations + .map(getNameFromPath) + .join(", "), + status: ( +
+ Success +
+ ), + }; + } + )} + /> + + )} + + {failedFileIndexingStatuses.length > 0 && ( + <> +

+ Failed File Indexing +

+

+ The following files failed to be indexed. Please contact an + administrator to resolve this issue. +

+ { + return { + fileNames: + connectorIndexingStatus.connector.connector_specific_config.file_locations + .map(getNameFromPath) + .join(", "), + status: ( +
+ Failed +
+ ), + }; + })} + /> + + )} +
+ ); +} diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx index daf7a81c8..658665a7a 100644 --- a/web/src/app/admin/layout.tsx +++ b/web/src/app/admin/layout.tsx @@ -8,6 +8,7 @@ import { SlackIcon, KeyIcon, ConfluenceIcon, + FileIcon, } from "@/components/icons/icons"; import { DISABLE_AUTH } from "@/lib/constants"; import { getCurrentUserSS } from "@/lib/userSS"; @@ -62,15 +63,6 @@ export default async function AdminLayout({ ), link: "/admin/connectors/slack", }, - { - name: ( -
- -
Web
-
- ), - link: "/admin/connectors/web", - }, { name: (
@@ -98,6 +90,24 @@ export default async function AdminLayout({ ), link: "/admin/connectors/confluence", }, + { + name: ( +
+ +
Web
+
+ ), + link: "/admin/connectors/web", + }, + { + name: ( +
+ +
File
+
+ ), + link: "/admin/connectors/file", + }, ], }, { diff --git a/web/src/components/Button.tsx b/web/src/components/Button.tsx index 1480d911d..80d842b39 100644 --- a/web/src/components/Button.tsx +++ b/web/src/components/Button.tsx @@ -3,6 +3,7 @@ interface Props { children: JSX.Element | string; disabled?: boolean; fullWidth?: boolean; + className?: string; } export const Button = ({ @@ -10,6 +11,7 @@ export const Button = ({ children, disabled = false, fullWidth = false, + className = "", }: Props) => { return (