Standardize connectors + permissioning + new frontend for admin pages + small fixes / improvements (#75)

Introducing permissioning, standardize onboarding for connectors, re-make the data model for connectors / credentials / index-attempts, making all environment variables optional, a bunch of small fixes + improvements.

Co-authored-by: Weves <chrisweaver101@gmail.com>
This commit is contained in:
Yuhong Sun 2023-05-30 19:59:57 -07:00 committed by GitHub
parent b05bf963bf
commit 6891e4f198
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
76 changed files with 6760 additions and 1468 deletions

View File

@ -0,0 +1,172 @@
"""Permission Framework
Revision ID: 27c6ecc08586
Revises: 2666d766cb9b
Create Date: 2023-05-24 18:45:17.244495
"""
import fastapi_users_db_sqlalchemy
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "27c6ecc08586"
down_revision = "2666d766cb9b"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"connector",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"source",
sa.Enum(
"SLACK",
"WEB",
"GOOGLE_DRIVE",
"GITHUB",
"CONFLUENCE",
name="documentsource",
native_enum=False,
),
nullable=False,
),
sa.Column(
"input_type",
sa.Enum(
"LOAD_STATE",
"POLL",
"EVENT",
name="inputtype",
native_enum=False,
),
nullable=True,
),
sa.Column(
"connector_specific_config",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
),
sa.Column("refresh_freq", sa.Integer(), nullable=True),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("disabled", sa.Boolean(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_table(
"credential",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"credential_json",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
sa.Column(
"user_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=True,
),
sa.Column("public_doc", sa.Boolean(), nullable=False),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.ForeignKeyConstraint(
["user_id"],
["user.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_table(
"connector_credential_association",
sa.Column("connector_id", sa.Integer(), nullable=False),
sa.Column("credential_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["connector_id"],
["connector.id"],
),
sa.ForeignKeyConstraint(
["credential_id"],
["credential.id"],
),
sa.PrimaryKeyConstraint("connector_id", "credential_id"),
)
op.add_column(
"index_attempt",
sa.Column("connector_id", sa.Integer(), nullable=True),
)
op.add_column(
"index_attempt",
sa.Column("credential_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"fk_index_attempt_credential_id",
"index_attempt",
"credential",
["credential_id"],
["id"],
)
op.create_foreign_key(
"fk_index_attempt_connector_id",
"index_attempt",
"connector",
["connector_id"],
["id"],
)
op.drop_column("index_attempt", "connector_specific_config")
op.drop_column("index_attempt", "source")
op.drop_column("index_attempt", "input_type")
def downgrade() -> None:
op.execute("TRUNCATE TABLE index_attempt")
op.add_column(
"index_attempt",
sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
)
op.add_column(
"index_attempt",
sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
)
op.add_column(
"index_attempt",
sa.Column(
"connector_specific_config",
postgresql.JSONB(astext_type=sa.Text()),
autoincrement=False,
nullable=False,
),
)
op.drop_constraint(
"fk_index_attempt_credential_id", "index_attempt", type_="foreignkey"
)
op.drop_constraint(
"fk_index_attempt_connector_id", "index_attempt", type_="foreignkey"
)
op.drop_column("index_attempt", "credential_id")
op.drop_column("index_attempt", "connector_id")
op.drop_table("connector_credential_association")
op.drop_table("credential")
op.drop_table("connector")

View File

@ -1,3 +1,4 @@
import contextlib
import smtplib
import uuid
from collections.abc import AsyncGenerator
@ -22,6 +23,7 @@ from danswer.configs.app_configs import WEB_DOMAIN
from danswer.db.auth import get_access_token_db
from danswer.db.auth import get_user_count
from danswer.db.auth import get_user_db
from danswer.db.engine import get_async_session
from danswer.db.models import AccessToken
from danswer.db.models import User
from danswer.utils.logging import setup_logger
@ -40,9 +42,13 @@ from fastapi_users.authentication.strategy.db import AccessTokenDatabase
from fastapi_users.authentication.strategy.db import DatabaseStrategy
from fastapi_users.db import SQLAlchemyUserDatabase
from httpx_oauth.clients.google import GoogleOAuth2
from pydantic import EmailStr
logger = setup_logger()
FAKE_USER_EMAIL = "fakeuser@fakedanswermail.com"
FAKE_USER_PASS = "foobar"
def send_user_verification_email(user_email: str, token: str) -> None:
msg = MIMEMultipart()
@ -141,14 +147,44 @@ google_oauth_client = GoogleOAuth2(GOOGLE_OAUTH_CLIENT_ID, GOOGLE_OAUTH_CLIENT_S
fastapi_users = FastAPIUsers[User, uuid.UUID](get_user_manager, [auth_backend])
# Currently unused, maybe useful later
async def create_get_fake_user() -> User:
get_async_session_context = contextlib.asynccontextmanager(
get_async_session
) # type:ignore
get_user_db_context = contextlib.asynccontextmanager(get_user_db)
get_user_manager_context = contextlib.asynccontextmanager(get_user_manager)
logger.info("Creating fake user due to Auth being turned off")
async with get_async_session_context() as session:
async with get_user_db_context(session) as user_db:
async with get_user_manager_context(user_db) as user_manager:
user = await user_manager.get_by_email(FAKE_USER_EMAIL)
if user:
return user
user = await user_manager.create(
UserCreate(email=EmailStr(FAKE_USER_EMAIL), password=FAKE_USER_PASS)
)
logger.info("Created fake user.")
return user
current_active_user = fastapi_users.current_user(
active=True, verified=REQUIRE_EMAIL_VERIFICATION, optional=DISABLE_AUTH
)
def current_admin_user(user: User = Depends(current_active_user)) -> User | None:
async def current_user(user: User = Depends(current_active_user)) -> User | None:
if DISABLE_AUTH:
return None
return user
async def current_admin_user(user: User = Depends(current_user)) -> User | None:
if DISABLE_AUTH:
return None
if not user or not hasattr(user, "role") or user.role != UserRole.ADMIN:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,

View File

@ -1,131 +1,146 @@
import time
from typing import cast
from danswer.configs.constants import DocumentSource
from danswer.connectors.factory import build_load_connector
from danswer.connectors.factory import instantiate_connector
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.models import InputType
from danswer.connectors.slack.config import get_pull_frequency
from danswer.connectors.slack.connector import SlackConnector
from danswer.db.index_attempt import fetch_index_attempts
from danswer.db.index_attempt import insert_index_attempt
from danswer.db.index_attempt import update_index_attempt
from danswer.db.connector import disable_connector
from danswer.db.connector import fetch_connectors
from danswer.db.credentials import backend_update_credential_json
from danswer.db.engine import build_engine
from danswer.db.engine import get_db_current_time
from danswer.db.index_attempt import create_index_attempt
from danswer.db.index_attempt import get_incomplete_index_attempts
from danswer.db.index_attempt import get_last_finished_attempt
from danswer.db.index_attempt import get_not_started_index_attempts
from danswer.db.index_attempt import mark_attempt_failed
from danswer.db.index_attempt import mark_attempt_in_progress
from danswer.db.index_attempt import mark_attempt_succeeded
from danswer.db.models import Connector
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.dynamic_configs import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.utils.indexing_pipeline import build_indexing_pipeline
from danswer.utils.logging import setup_logger
from sqlalchemy.orm import Session
logger = setup_logger()
LAST_POLL_KEY_TEMPLATE = "last_poll_{}"
def should_create_new_indexing(
connector: Connector, last_index: IndexAttempt | None, db_session: Session
) -> bool:
if connector.refresh_freq is None:
return False
if not last_index:
return True
current_db_time = get_db_current_time(db_session)
time_since_index = (
current_db_time - last_index.time_updated
) # Maybe better to do time created
return time_since_index.total_seconds() >= connector.refresh_freq
def _check_should_run(current_time: int, last_pull: int, pull_frequency: int) -> bool:
return current_time - last_pull > pull_frequency * 60
def create_indexing_jobs(db_session: Session) -> None:
connectors = fetch_connectors(db_session, disabled_status=False)
for connector in connectors:
in_progress_indexing_attempts = get_incomplete_index_attempts(
connector.id, db_session
)
# Currently single threaded so any still in-progress must have errored
for attempt in in_progress_indexing_attempts:
mark_attempt_failed(attempt, db_session)
def run_update() -> None:
logger.info("Running update")
# TODO (chris): implement a more generic way to run updates
# so we don't need to edit this file for future connectors
dynamic_config_store = get_dynamic_config_store()
indexing_pipeline = build_indexing_pipeline()
current_time = int(time.time())
# Slack
# TODO (chris): make Slack use the same approach as other connectors /
# make other connectors periodic
try:
pull_frequency = get_pull_frequency()
except ConfigNotFoundError:
pull_frequency = 0
if pull_frequency:
last_slack_pull_key = LAST_POLL_KEY_TEMPLATE.format(SlackConnector.__name__)
try:
last_pull = cast(int, dynamic_config_store.load(last_slack_pull_key))
except ConfigNotFoundError:
last_pull = None
if last_pull is None or _check_should_run(
current_time, last_pull, pull_frequency
last_finished_indexing_attempt = get_last_finished_attempt(
connector.id, db_session
)
if not should_create_new_indexing(
connector, last_finished_indexing_attempt, db_session
):
# TODO (chris): go back to only fetching messages that have changed
# since the last pull. Not supported for now due to how we compute the
# number of documents indexed for the admin dashboard (only look at latest)
logger.info("Scheduling periodic slack pull")
insert_index_attempt(
IndexAttempt(
source=DocumentSource.SLACK,
input_type=InputType.POLL,
status=IndexingStatus.NOT_STARTED,
connector_specific_config={},
)
)
# not 100% accurate, but the inaccuracy will result in more
# frequent pulling rather than less frequent, which is fine
# for now
dynamic_config_store.store(last_slack_pull_key, current_time)
continue
# TODO (chris): make this more efficient / in a single transaction to
# prevent race conditions across multiple background jobs. For now,
# this assumes we only ever run a single background job at a time
not_started_index_attempts = fetch_index_attempts(
input_types=[InputType.LOAD_STATE, InputType.POLL],
statuses=[IndexingStatus.NOT_STARTED],
)
for not_started_index_attempt in not_started_index_attempts:
for association in connector.credentials:
credential = association.credential
create_index_attempt(connector.id, credential.id, db_session)
def run_indexing_jobs(last_run_time: float, db_session: Session) -> None:
indexing_pipeline = build_indexing_pipeline()
new_indexing_attempts = get_not_started_index_attempts(db_session)
logger.info(f"Found {len(new_indexing_attempts)} new indexing tasks.")
for attempt in new_indexing_attempts:
logger.info(
"Attempting to index with IndexAttempt id: "
f"{not_started_index_attempt.id}, source: "
f"{not_started_index_attempt.source}, input_type: "
f"{not_started_index_attempt.input_type}, and connector_specific_config: "
f"{not_started_index_attempt.connector_specific_config}"
)
update_index_attempt(
index_attempt_id=not_started_index_attempt.id,
new_status=IndexingStatus.IN_PROGRESS,
f"Starting new indexing attempt for connector: '{attempt.connector.name}', "
f"with config: '{attempt.connector.connector_specific_config}', and "
f" with credentials: '{[c.credential_id for c in attempt.connector.credentials]}'"
)
mark_attempt_in_progress(attempt, db_session)
db_connector = attempt.connector
db_credential = attempt.credential
task = db_connector.input_type
error_msg = None
try:
# TODO (chris): spawn processes to parallelize / take advantage of
# multiple cores + implement retries
connector = build_load_connector(
source=not_started_index_attempt.source,
connector_specific_config=not_started_index_attempt.connector_specific_config,
runnable_connector, new_credential_json = instantiate_connector(
db_connector.source,
task,
db_connector.connector_specific_config,
db_credential.credential_json,
)
if new_credential_json is not None:
backend_update_credential_json(
db_credential, new_credential_json, db_session
)
except Exception as e:
logger.exception(f"Unable to instantiate connector due to {e}")
disable_connector(db_connector.id, db_session)
continue
try:
if task == InputType.LOAD_STATE:
assert isinstance(runnable_connector, LoadConnector)
doc_batch_generator = runnable_connector.load_from_state()
elif task == InputType.POLL:
assert isinstance(runnable_connector, PollConnector)
doc_batch_generator = runnable_connector.poll_source(
last_run_time, time.time()
)
else:
# Event types cannot be handled by a background type, leave these untouched
continue
document_ids: list[str] = []
for doc_batch in connector.load_from_state():
indexing_pipeline(doc_batch)
for doc_batch in doc_batch_generator:
# TODO introduce permissioning here
index_user_id = (
None if db_credential.public_doc else db_credential.user_id
)
indexing_pipeline(documents=doc_batch, user_id=index_user_id)
document_ids.extend([doc.id for doc in doc_batch])
mark_attempt_succeeded(attempt, document_ids, db_session)
except Exception as e:
logger.exception(
"Failed to index for source %s with config %s due to: %s",
not_started_index_attempt.source,
not_started_index_attempt.connector_specific_config,
e,
)
error_msg = str(e)
update_index_attempt(
index_attempt_id=not_started_index_attempt.id,
new_status=IndexingStatus.FAILED if error_msg else IndexingStatus.SUCCESS,
document_ids=document_ids if not error_msg else None,
error_msg=error_msg,
)
logger.info("Finished update")
logger.exception(f"Indexing job with id {attempt.id} failed due to {e}")
mark_attempt_failed(attempt, db_session, failure_reason=str(e))
def update_loop(delay: int = 60) -> None:
def update_loop(delay: int = 10) -> None:
last_run_time = 0.0
while True:
start = time.time()
logger.info(f"Running update, current time: {time.ctime(start)}")
try:
run_update()
except Exception:
logger.exception("Failed to run update")
with Session(
build_engine(), future=True, expire_on_commit=False
) as db_session:
create_indexing_jobs(db_session)
# TODO failed poll jobs won't recover data from failed runs, should fix
run_indexing_jobs(last_run_time, db_session)
except Exception as e:
logger.exception(f"Failed to run update due to {e}")
sleep_time = delay - (time.time() - start)
if sleep_time > 0:
time.sleep(sleep_time)

View File

@ -44,6 +44,9 @@ VALID_EMAIL_DOMAIN = os.environ.get("VALID_EMAIL_DOMAIN", "")
ENABLE_OAUTH = os.environ.get("ENABLE_OAUTH", "").lower() != "false"
GOOGLE_OAUTH_CLIENT_ID = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
GOOGLE_OAUTH_CLIENT_SECRET = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
MASK_CREDENTIAL_PREFIX = (
os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false"
)
#####
@ -72,21 +75,8 @@ POSTGRES_DB = os.environ.get("POSTGRES_DB", "postgres")
#####
# Connector Configs
#####
GOOGLE_DRIVE_CREDENTIAL_JSON = os.environ.get(
"GOOGLE_DRIVE_CREDENTIAL_JSON", "/home/storage/google_drive_creds.json"
)
GOOGLE_DRIVE_TOKENS_JSON = os.environ.get(
"GOOGLE_DRIVE_TOKENS_JSON", "/home/storage/google_drive_tokens.json"
)
GOOGLE_DRIVE_INCLUDE_SHARED = False
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN", "")
# example: username@companyemail.com
CONFLUENCE_USERNAME = os.environ.get("CONFLUENCE_USERNAME", "")
# https://id.atlassian.com/manage-profile/security/api-tokens
CONFLUENCE_ACCESS_TOKEN = os.environ.get("CONFLUENCE_ACCESS_TOKEN", "")
#####
# Query Configs

View File

@ -11,9 +11,9 @@ SEMANTIC_IDENTIFIER = "semantic_identifier"
SECTION_CONTINUATION = "section_continuation"
ALLOWED_USERS = "allowed_users"
ALLOWED_GROUPS = "allowed_groups"
NO_AUTH_USER = "FooBarUser" # TODO rework this temporary solution
OPENAI_API_KEY_STORAGE_KEY = "openai_api_key"
HTML_SEPARATOR = "\n"
PUBLIC_DOC_PAT = "PUBLIC"
class DocumentSource(str, Enum):

View File

@ -21,5 +21,5 @@ BATCH_SIZE_ENCODE_CHUNKS = 8
# QA Model API Configs
# https://platform.openai.com/docs/models/model-endpoint-compatibility
INTERNAL_MODEL_VERSION = os.environ.get("INTERNAL_MODEL", "openai-chat-completion")
OPENAI_MODEL_VERSION = os.environ.get("OPENAI_MODEL_VERSION", "gpt-4")
OPENAI_MODEL_VERSION = os.environ.get("OPENAI_MODEL_VERSION", "gpt-3.5-turbo")
OPENAI_MAX_OUTPUT_TOKENS = 512

View File

@ -4,11 +4,10 @@ from urllib.parse import urlparse
from atlassian import Confluence # type:ignore
from bs4 import BeautifulSoup
from danswer.configs.app_configs import CONFLUENCE_ACCESS_TOKEN
from danswer.configs.app_configs import CONFLUENCE_USERNAME
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import HTML_SEPARATOR
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
@ -41,6 +40,28 @@ def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str]:
return wiki_base, space
def _comment_dfs(
comments_str: str,
comment_pages: Generator[dict[str, Any], None, None],
confluence_client: Confluence,
) -> str:
for comment_page in comment_pages:
comment_html = comment_page["body"]["storage"]["value"]
soup = BeautifulSoup(comment_html, "html.parser")
comments_str += "\nComment:\n" + soup.get_text(HTML_SEPARATOR)
child_comment_pages = confluence_client.get_page_child_by_type(
comment_page["id"],
type="comment",
start=None,
limit=None,
expand="body.storage.value",
)
comments_str = _comment_dfs(
comments_str, child_comment_pages, confluence_client
)
return comments_str
class ConfluenceConnector(LoadConnector):
def __init__(
self,
@ -49,31 +70,25 @@ class ConfluenceConnector(LoadConnector):
) -> None:
self.batch_size = batch_size
self.wiki_base, self.space = extract_confluence_keys_from_url(wiki_page_url)
self.confluence_client: Confluence | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
username = credentials["confluence_username"]
access_token = credentials["confluence_access_token"]
self.confluence_client = Confluence(
url=self.wiki_base,
username=CONFLUENCE_USERNAME,
password=CONFLUENCE_ACCESS_TOKEN,
username=username,
password=access_token,
cloud=True,
)
return None
def _comment_dfs(
self, comments_str: str, comment_pages: Generator[dict[str, Any], None, None]
) -> str:
for comment_page in comment_pages:
comment_html = comment_page["body"]["storage"]["value"]
soup = BeautifulSoup(comment_html, "html.parser")
comments_str += "\nComment:\n" + soup.get_text(HTML_SEPARATOR)
child_comment_pages = self.confluence_client.get_page_child_by_type(
comment_page["id"],
type="comment",
start=None,
limit=None,
expand="body.storage.value",
def load_from_state(self) -> GenerateDocumentsOutput:
if self.confluence_client is None:
raise PermissionError(
"Confluence Client is not set up, was load_credentials called?"
)
comments_str = self._comment_dfs(comments_str, child_comment_pages)
return comments_str
def load_from_state(self) -> Generator[list[Document], None, None]:
start_ind = 0
while True:
doc_batch: list[Document] = []
@ -96,7 +111,7 @@ class ConfluenceConnector(LoadConnector):
limit=None,
expand="body.storage.value",
)
comments_text = self._comment_dfs("", comment_pages)
comments_text = _comment_dfs("", comment_pages, self.confluence_client)
page_text += comments_text
page_url = self.wiki_base + page["_links"]["webui"]

View File

@ -1,6 +1,5 @@
import time
from collections.abc import Generator
from typing import Any
from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.confluence.connector import ConfluenceConnector
@ -10,9 +9,9 @@ from danswer.connectors.interfaces import BaseConnector
from danswer.connectors.interfaces import EventConnector
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.models import Document
from danswer.connectors.models import InputType
from danswer.connectors.slack.connector import SlackConnector
from danswer.connectors.slack.connector import SlackLoadConnector
from danswer.connectors.slack.connector import SlackPollConnector
from danswer.connectors.web.connector import WebConnector
_NUM_SECONDS_IN_DAY = 86400
@ -22,30 +21,35 @@ class ConnectorMissingException(Exception):
pass
def build_connector(
def identify_connector_class(
source: DocumentSource,
input_type: InputType,
connector_specific_config: dict[str, Any],
) -> BaseConnector:
if source == DocumentSource.SLACK:
connector: BaseConnector = SlackConnector(**connector_specific_config)
elif source == DocumentSource.GOOGLE_DRIVE:
connector = GoogleDriveConnector(**connector_specific_config)
elif source == DocumentSource.GITHUB:
connector = GithubConnector(**connector_specific_config)
elif source == DocumentSource.WEB:
connector = WebConnector(**connector_specific_config)
elif source == DocumentSource.CONFLUENCE:
connector = ConfluenceConnector(**connector_specific_config)
) -> Type[BaseConnector]:
connector_map = {
DocumentSource.WEB: WebConnector,
DocumentSource.SLACK: {
InputType.LOAD_STATE: SlackLoadConnector,
InputType.POLL: SlackPollConnector,
},
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,
}
connector_by_source = connector_map.get(source, {})
if isinstance(connector_by_source, dict):
connector = connector_by_source.get(input_type)
else:
connector = connector_by_source
if connector is None:
raise ConnectorMissingException(f"Connector not found for source={source}")
if any(
[
input_type == InputType.LOAD_STATE
and not isinstance(connector, LoadConnector),
input_type == InputType.POLL and not isinstance(connector, PollConnector),
input_type == InputType.EVENT and not isinstance(connector, EventConnector),
and not issubclass(connector, LoadConnector),
input_type == InputType.POLL and not issubclass(connector, PollConnector),
input_type == InputType.EVENT and not issubclass(connector, EventConnector),
]
):
raise ConnectorMissingException(
@ -55,25 +59,14 @@ def build_connector(
return connector
# TODO this is some jank, rework at some point
def _poll_to_load_connector(range_pull_connector: PollConnector) -> LoadConnector:
class _Connector(LoadConnector):
def __init__(self) -> None:
self._connector = range_pull_connector
def instantiate_connector(
source: DocumentSource,
input_type: InputType,
connector_specific_config: dict[str, Any],
credentials: dict[str, Any],
) -> tuple[BaseConnector, dict[str, Any] | None]:
connector_class = identify_connector_class(source, input_type)
connector = connector_class(**connector_specific_config)
new_credentials = connector.load_credentials(credentials)
def load_from_state(self) -> Generator[list[Document], None, None]:
# adding some buffer to make sure we get all documents
return self._connector.poll_source(0, time.time() + _NUM_SECONDS_IN_DAY)
return _Connector()
# TODO this is some jank, rework at some point
def build_load_connector(
source: DocumentSource, connector_specific_config: dict[str, Any]
) -> LoadConnector:
connector = build_connector(source, InputType.LOAD_STATE, connector_specific_config)
if isinstance(connector, PollConnector):
return _poll_to_load_connector(connector)
assert isinstance(connector, LoadConnector)
return connector
return connector, new_credentials

View File

@ -1,9 +1,10 @@
import itertools
from collections.abc import Generator
from typing import Any
from danswer.configs.app_configs import GITHUB_ACCESS_TOKEN
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
@ -15,8 +16,6 @@ from github.PullRequest import PullRequest
logger = setup_logger()
github_client = Github(GITHUB_ACCESS_TOKEN)
def get_pr_batches(
pull_requests: PaginatedList, batch_size: int
@ -41,9 +40,18 @@ class GithubConnector(LoadConnector):
self.repo_name = repo_name
self.batch_size = batch_size
self.state_filter = state_filter
self.github_client: Github | None = None
def load_from_state(self) -> Generator[list[Document], None, None]:
repo = github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.github_client = Github(credentials["github_access_token"])
return None
def load_from_state(self) -> GenerateDocumentsOutput:
if self.github_client is None:
raise PermissionError(
"Github Client is not set up, was load_credentials called?"
)
repo = self.github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
pull_requests = repo.get_pulls(state=self.state_filter)
for pr_batch in get_pr_batches(pull_requests, self.batch_size):
doc_batch = []

View File

@ -1,10 +1,13 @@
import io
from collections.abc import Generator
from typing import Any
from danswer.configs.app_configs import GOOGLE_DRIVE_INCLUDE_SHARED
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.google_drive.connector_auth import DB_CREDENTIALS_DICT_KEY
from danswer.connectors.google_drive.connector_auth import get_drive_tokens
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
@ -89,12 +92,23 @@ class GoogleDriveConnector(LoadConnector):
) -> None:
self.batch_size = batch_size
self.include_shared = include_shared
self.creds = get_drive_tokens()
self.creds: Credentials | None = None
if not self.creds:
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
access_token_json_str = credentials[DB_CREDENTIALS_DICT_KEY]
creds = get_drive_tokens(token_json_str=access_token_json_str)
if creds is None:
raise PermissionError("Unable to access Google Drive.")
self.creds = creds
new_creds_json_str = creds.to_json()
if new_creds_json_str != access_token_json_str:
return {DB_CREDENTIALS_DICT_KEY: new_creds_json_str}
return None
def load_from_state(self) -> GenerateDocumentsOutput:
if self.creds is None:
raise PermissionError("Not logged into Google Drive")
def load_from_state(self) -> Generator[list[Document], None, None]:
service = discovery.build("drive", "v3", credentials=self.creds)
for files_batch in get_file_batches(
service, self.include_shared, self.batch_size

View File

@ -1,52 +1,41 @@
import os
from typing import Any
import json
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import ParseResult
from urllib.parse import urlparse
from danswer.configs.app_configs import GOOGLE_DRIVE_CREDENTIAL_JSON
from danswer.configs.app_configs import GOOGLE_DRIVE_TOKENS_JSON
from danswer.configs.app_configs import WEB_DOMAIN
from danswer.db.credentials import update_credential_json
from danswer.db.models import User
from danswer.dynamic_configs import get_dynamic_config_store
from danswer.server.models import GoogleAppCredentials
from danswer.utils.logging import setup_logger
from google.auth.transport.requests import Request # type: ignore
from google.oauth2.credentials import Credentials # type: ignore
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
from sqlalchemy.orm import Session
logger = setup_logger()
DB_CREDENTIALS_DICT_KEY = "google_drive_tokens"
CRED_KEY = "credential_id_{}"
GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential"
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
FRONTEND_GOOGLE_DRIVE_REDIRECT = (
f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"
)
def backend_get_credentials() -> Credentials:
"""This approach does not work for production builds as it requires
a browser to be opened. It is used for local development only."""
creds = None
if os.path.exists(GOOGLE_DRIVE_TOKENS_JSON):
creds = Credentials.from_authorized_user_file(GOOGLE_DRIVE_TOKENS_JSON, SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
GOOGLE_DRIVE_CREDENTIAL_JSON, SCOPES
)
creds = flow.run_local_server()
with open(GOOGLE_DRIVE_TOKENS_JSON, "w") as token_file:
token_file.write(creds.to_json())
return creds
def _build_frontend_google_drive_redirect() -> str:
return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"
def get_drive_tokens(token_path: str = GOOGLE_DRIVE_TOKENS_JSON) -> Any:
if not os.path.exists(token_path):
def get_drive_tokens(
*, creds: Credentials | None = None, token_json_str: str | None = None
) -> Credentials | None:
if creds is None and token_json_str is None:
return None
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
if token_json_str is not None:
creds_json = json.loads(token_json_str)
creds = Credentials.from_authorized_user_info(creds_json, SCOPES)
if not creds:
return None
@ -57,8 +46,6 @@ def get_drive_tokens(token_path: str = GOOGLE_DRIVE_TOKENS_JSON) -> Any:
try:
creds.refresh(Request())
if creds.valid:
with open(token_path, "w") as token_file:
token_file.write(creds.to_json())
return creds
except Exception as e:
logger.exception(f"Failed to refresh google drive access token due to: {e}")
@ -66,8 +53,8 @@ def get_drive_tokens(token_path: str = GOOGLE_DRIVE_TOKENS_JSON) -> Any:
return None
def verify_csrf(user_id: str, state: str) -> None:
csrf = get_dynamic_config_store().load(user_id)
def verify_csrf(credential_id: int, state: str) -> None:
csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id)))
if csrf != state:
raise PermissionError(
"State from Google Drive Connector callback does not match expected"
@ -75,37 +62,50 @@ def verify_csrf(user_id: str, state: str) -> None:
def get_auth_url(
user_id: str, credentials_file: str = GOOGLE_DRIVE_CREDENTIAL_JSON
credential_id: int,
) -> str:
flow = InstalledAppFlow.from_client_secrets_file(
credentials_file,
creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY))
credential_json = json.loads(creds_str)
flow = InstalledAppFlow.from_client_config(
credential_json,
scopes=SCOPES,
redirect_uri=FRONTEND_GOOGLE_DRIVE_REDIRECT,
redirect_uri=_build_frontend_google_drive_redirect(),
)
auth_url, _ = flow.authorization_url(prompt="consent")
parsed_url = urlparse(auth_url)
parsed_url = cast(ParseResult, urlparse(auth_url))
params = parse_qs(parsed_url.query)
get_dynamic_config_store().store(user_id, params.get("state", [None])[0]) # type: ignore
get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore
return str(auth_url)
def save_access_tokens(
def update_credential_access_tokens(
auth_code: str,
token_path: str = GOOGLE_DRIVE_TOKENS_JSON,
credentials_file: str = GOOGLE_DRIVE_CREDENTIAL_JSON,
) -> Any:
flow = InstalledAppFlow.from_client_secrets_file(
credentials_file, scopes=SCOPES, redirect_uri=FRONTEND_GOOGLE_DRIVE_REDIRECT
credential_id: int,
user: User,
db_session: Session,
) -> Credentials | None:
app_credentials = get_google_app_cred()
flow = InstalledAppFlow.from_client_config(
app_credentials.dict(),
scopes=SCOPES,
redirect_uri=_build_frontend_google_drive_redirect(),
)
flow.fetch_token(code=auth_code)
creds = flow.credentials
token_json_str = creds.to_json()
new_creds_dict = {DB_CREDENTIALS_DICT_KEY: token_json_str}
os.makedirs(os.path.dirname(token_path), exist_ok=True)
with open(token_path, "w+") as token_file:
token_file.write(creds.to_json())
if not get_drive_tokens(token_path):
raise PermissionError("Not able to access Google Drive.")
if not update_credential_json(credential_id, new_creds_dict, user, db_session):
return None
return creds
def get_google_app_cred() -> GoogleAppCredentials:
creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY))
return GoogleAppCredentials(**json.loads(creds_str))
def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None:
get_dynamic_config_store().store(GOOGLE_DRIVE_CRED_KEY, app_credentials.json())

View File

@ -7,16 +7,19 @@ from danswer.connectors.models import Document
SecondsSinceUnixEpoch = float
GenerateDocumentsOutput = Generator[list[Document], None, None]
class BaseConnector(abc.ABC):
# Reserved for future shared uses
pass
@abc.abstractmethod
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
raise NotImplementedError
# Large set update or reindex, generally pulling a complete state or from a savestate file
class LoadConnector(BaseConnector):
@abc.abstractmethod
def load_from_state(self) -> Generator[list[Document], None, None]:
def load_from_state(self) -> GenerateDocumentsOutput:
raise NotImplementedError
@ -25,12 +28,12 @@ class PollConnector(BaseConnector):
@abc.abstractmethod
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> Generator[list[Document], None, None]:
) -> GenerateDocumentsOutput:
raise NotImplementedError
# Event driven
class EventConnector(BaseConnector):
@abc.abstractmethod
def handle_event(self, event: Any) -> Generator[list[Document], None, None]:
def handle_event(self, event: Any) -> GenerateDocumentsOutput:
raise NotImplementedError

View File

@ -21,10 +21,6 @@ class Document:
metadata: dict[str, Any] | None
def get_raw_document_text(document: Document) -> str:
return "\n\n".join([section.text for section in document.sections])
class InputType(str, Enum):
LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file
POLL = "poll" # e.g. calling an API to get all documents in the last hour

View File

@ -1,32 +0,0 @@
from danswer.dynamic_configs import get_dynamic_config_store
from pydantic import BaseModel
SLACK_CONFIG_KEY = "slack_connector_config"
class SlackConfig(BaseModel):
slack_bot_token: str
workspace_id: str
pull_frequency: int = 0 # in minutes, 0 => no pulling
def get_slack_config() -> SlackConfig:
slack_config = get_dynamic_config_store().load(SLACK_CONFIG_KEY)
return SlackConfig.parse_obj(slack_config)
def get_slack_bot_token() -> str:
return get_slack_config().slack_bot_token
def get_workspace_id() -> str:
return get_slack_config().workspace_id
def get_pull_frequency() -> int:
return get_slack_config().pull_frequency
def update_slack_config(slack_config: SlackConfig) -> None:
get_dynamic_config_store().store(SLACK_CONFIG_KEY, slack_config.dict())

View File

@ -2,20 +2,18 @@ import json
import os
import time
from collections.abc import Callable
from collections.abc import Generator
from pathlib import Path
from typing import Any
from typing import cast
from typing import List
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.connectors.slack.utils import get_client
from danswer.connectors.slack.utils import get_message_link
from danswer.utils.logging import setup_logger
from slack_sdk import WebClient
@ -139,13 +137,15 @@ def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType
return threads
def thread_to_doc(channel: ChannelType, thread: ThreadType) -> Document:
def thread_to_doc(workspace: str, channel: ChannelType, thread: ThreadType) -> Document:
channel_id = channel["id"]
return Document(
id=f"{channel_id}__{thread[0]['ts']}",
sections=[
Section(
link=get_message_link(m, channel_id=channel_id),
link=get_message_link(
event=m, workspace=workspace, channel_id=channel_id
),
text=cast(str, m["text"]),
)
for m in thread
@ -162,6 +162,7 @@ def _default_msg_filter(message: MessageType) -> bool:
def get_all_docs(
client: WebClient,
workspace: str,
oldest: str | None = None,
latest: str | None = None,
msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
@ -197,71 +198,80 @@ def get_all_docs(
docs: list[Document] = []
for channel_id, threads in channel_id_to_threads.items():
docs.extend(
thread_to_doc(channel=channel_id_to_channel_info[channel_id], thread=thread)
thread_to_doc(
workspace=workspace,
channel=channel_id_to_channel_info[channel_id],
thread=thread,
)
for thread in threads
)
logger.info(f"Pulled {len(docs)} documents from slack")
return docs
def _process_batch_event(
slack_event: dict[str, Any],
channel: dict[str, Any],
matching_doc: Document | None,
workspace: str | None = None,
) -> Document | None:
if (
slack_event["type"] == "message"
and slack_event.get("subtype") != "channel_join"
):
if matching_doc:
class SlackLoadConnector(LoadConnector):
def __init__(
self, workspace: str, export_path_str: str, batch_size: int = INDEX_BATCH_SIZE
) -> None:
self.workspace = workspace
self.export_path_str = export_path_str
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
if credentials:
logger.warning("Unexpected credentials provided for Slack Load Connector")
return None
@staticmethod
def _process_batch_event(
slack_event: dict[str, Any],
channel: dict[str, Any],
matching_doc: Document | None,
workspace: str,
) -> Document | None:
if (
slack_event["type"] == "message"
and slack_event.get("subtype") != "channel_join"
):
if matching_doc:
return Document(
id=matching_doc.id,
sections=matching_doc.sections
+ [
Section(
link=get_message_link(
event=slack_event,
workspace=workspace,
channel_id=channel["id"],
),
text=slack_event["text"],
)
],
source=matching_doc.source,
semantic_identifier=matching_doc.semantic_identifier,
metadata=matching_doc.metadata,
)
return Document(
id=matching_doc.id,
sections=matching_doc.sections
+ [
id=slack_event["ts"],
sections=[
Section(
link=get_message_link(
slack_event, workspace=workspace, channel_id=channel["id"]
event=slack_event,
workspace=workspace,
channel_id=channel["id"],
),
text=slack_event["text"],
)
],
source=matching_doc.source,
semantic_identifier=matching_doc.semantic_identifier,
metadata=matching_doc.metadata,
source=DocumentSource.SLACK,
semantic_identifier=channel["name"],
metadata={},
)
return Document(
id=slack_event["ts"],
sections=[
Section(
link=get_message_link(
slack_event, workspace=workspace, channel_id=channel["id"]
),
text=slack_event["text"],
)
],
source=DocumentSource.SLACK,
semantic_identifier=channel["name"],
metadata={},
)
return None
return None
class SlackConnector(LoadConnector, PollConnector):
def __init__(
self, export_path_str: str | None = None, batch_size: int = INDEX_BATCH_SIZE
) -> None:
self.export_path_str = export_path_str
self.batch_size = batch_size
self.client = get_client()
def load_from_state(self) -> Generator[list[Document], None, None]:
if self.export_path_str is None:
raise ValueError(
"This Slack connector was not set up with a state-export file."
)
def load_from_state(self) -> GenerateDocumentsOutput:
export_path = Path(self.export_path_str)
with open(export_path / "channels.json") as f:
@ -278,12 +288,13 @@ class SlackConnector(LoadConnector, PollConnector):
with open(path) as f:
events = cast(list[dict[str, Any]], json.load(f))
for slack_event in events:
doc = _process_batch_event(
doc = self._process_batch_event(
slack_event=slack_event,
channel=channel_info,
matching_doc=document_batch.get(
slack_event.get("thread_ts", "")
),
workspace=self.workspace,
)
if doc:
document_batch[doc.id] = doc
@ -292,9 +303,33 @@ class SlackConnector(LoadConnector, PollConnector):
yield list(document_batch.values())
class SlackPollConnector(PollConnector):
def __init__(self, workspace: str, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.workspace = workspace
self.batch_size = batch_size
self.client: WebClient | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
bot_token = credentials["slack_bot_token"]
self.client = WebClient(token=bot_token)
return None
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> Generator[List[Document], None, None]:
all_docs = get_all_docs(client=self.client, oldest=str(start), latest=str(end))
) -> GenerateDocumentsOutput:
if self.client is None:
raise PermissionError(
"Slack Client is not set up, was load_credentials called?"
)
all_docs = get_all_docs(
client=self.client,
workspace=self.workspace,
# NOTE: need to impute to `None` instead of using 0.0, since Slack will
# throw an error if we use 0.0 on an account without infinite data
# retention
oldest=str(start) if start else None,
latest=str(end),
)
for i in range(0, len(all_docs), self.batch_size):
yield all_docs[i : i + self.batch_size]

View File

@ -1,22 +1,15 @@
from typing import Any
from typing import cast
from danswer.connectors.slack.config import get_slack_bot_token
from danswer.connectors.slack.config import get_workspace_id
from slack_sdk import WebClient
def get_client() -> WebClient:
"""NOTE: assumes token is present in environment variable SLACK_BOT_TOKEN"""
return WebClient(token=get_slack_bot_token())
def get_message_link(
event: dict[str, Any], workspace: str | None = None, channel_id: str | None = None
event: dict[str, Any], workspace: str, channel_id: str | None = None
) -> str:
channel_id = channel_id or cast(
str, event["channel"]
) # channel must either be present in the event or passed in
message_ts = cast(str, event["ts"])
message_ts_without_dot = message_ts.replace(".", "")
return f"https://{workspace or get_workspace_id()}.slack.com/archives/{channel_id}/p{message_ts_without_dot}"
return (
f"https://{workspace}.slack.com/archives/{channel_id}/p{message_ts_without_dot}"
)

View File

@ -1,5 +1,4 @@
import io
from collections.abc import Generator
from typing import Any
from typing import cast
from urllib.parse import urljoin
@ -10,6 +9,7 @@ from bs4 import BeautifulSoup
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import HTML_SEPARATOR
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
@ -57,10 +57,17 @@ class WebConnector(LoadConnector):
base_url: str,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
if "://" not in base_url:
base_url = "https://" + base_url
self.base_url = base_url
self.batch_size = batch_size
def load_from_state(self) -> Generator[list[Document], None, None]:
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
if credentials:
logger.warning("Unexpected credentials provided for Web Connector")
return None
def load_from_state(self) -> GenerateDocumentsOutput:
"""Traverses through all pages found on the website
and converts them into documents"""
visited_links: set[str] = set()

View File

@ -8,11 +8,15 @@ DatastoreFilter = dict[str, str | list[str] | None]
class Datastore:
@abc.abstractmethod
def index(self, chunks: list[EmbeddedIndexChunk]) -> bool:
def index(self, chunks: list[EmbeddedIndexChunk], user_id: int | None) -> bool:
raise NotImplementedError
@abc.abstractmethod
def semantic_retrieval(
self, query: str, filters: list[DatastoreFilter] | None, num_to_retrieve: int
self,
query: str,
user_id: int | None,
filters: list[DatastoreFilter] | None,
num_to_retrieve: int,
) -> list[InferenceChunk]:
raise NotImplementedError

View File

@ -7,6 +7,7 @@ from danswer.configs.constants import BLURB
from danswer.configs.constants import CHUNK_ID
from danswer.configs.constants import CONTENT
from danswer.configs.constants import DOCUMENT_ID
from danswer.configs.constants import PUBLIC_DOC_PAT
from danswer.configs.constants import SECTION_CONTINUATION
from danswer.configs.constants import SEMANTIC_IDENTIFIER
from danswer.configs.constants import SOURCE_LINKS
@ -14,7 +15,9 @@ from danswer.configs.constants import SOURCE_TYPE
from danswer.configs.model_configs import DOC_EMBEDDING_DIM
from danswer.utils.clients import get_qdrant_client
from danswer.utils.logging import setup_logger
from danswer.utils.timing import log_function_time
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.exceptions import ResponseHandlingException
from qdrant_client.http.models.models import UpdateResult
from qdrant_client.http.models.models import UpdateStatus
@ -44,6 +47,44 @@ def create_collection(
raise RuntimeError("Could not create Qdrant collection")
@log_function_time()
def get_document_whitelists(
doc_chunk_id: str, collection_name: str, q_client: QdrantClient
) -> tuple[int, list[str], list[str]]:
results = q_client.retrieve(
collection_name=collection_name,
ids=[doc_chunk_id],
with_payload=[ALLOWED_USERS, ALLOWED_GROUPS],
)
if len(results) == 0:
return 0, [], []
payload = results[0].payload
if not payload:
raise RuntimeError(
"Qdrant Index is corrupted, Document found with no access lists."
)
return len(results), payload[ALLOWED_USERS], payload[ALLOWED_GROUPS]
@log_function_time()
def delete_doc_chunks(
document_id: str, collection_name: str, q_client: QdrantClient
) -> None:
q_client.delete(
collection_name=collection_name,
points_selector=models.FilterSelector(
filter=models.Filter(
must=[
models.FieldCondition(
key=DOCUMENT_ID,
match=models.MatchValue(value=document_id),
),
],
)
),
)
def recreate_collection(
collection_name: str, embedding_dim: int = DOC_EMBEDDING_DIM
) -> None:
@ -63,18 +104,47 @@ def get_uuid_from_chunk(chunk: EmbeddedIndexChunk) -> uuid.UUID:
def index_chunks(
chunks: list[EmbeddedIndexChunk],
user_id: int | None,
collection: str,
client: QdrantClient | None = None,
batch_upsert: bool = True,
) -> bool:
# Public documents will have the PUBLIC string in ALLOWED_USERS
# If credential that kicked this off has no user associated, either Auth is off or the doc is public
user_str = PUBLIC_DOC_PAT if user_id is None else str(user_id)
q_client: QdrantClient = client if client else get_qdrant_client()
point_structs = []
# Maps document id to dict of whitelists for users/groups each containing list of users/groups as strings
doc_user_map: dict[str, dict[str, list[str]]] = {}
for chunk in chunks:
chunk_uuid = str(get_uuid_from_chunk(chunk))
document = chunk.source_document
if document.id not in doc_user_map:
num_doc_chunks, whitelist_users, whitelist_groups = get_document_whitelists(
chunk_uuid, collection, q_client
)
if num_doc_chunks == 0:
doc_user_map[document.id] = {
ALLOWED_USERS: [user_str],
# TODO introduce groups logic here
ALLOWED_GROUPS: whitelist_groups,
}
else:
if user_str not in whitelist_users:
whitelist_users.append(user_str)
# TODO introduce groups logic here
doc_user_map[document.id] = {
ALLOWED_USERS: whitelist_users,
ALLOWED_GROUPS: whitelist_groups,
}
# Need to delete document chunks because number of chunks may decrease
delete_doc_chunks(document.id, collection, q_client)
point_structs.append(
PointStruct(
id=str(get_uuid_from_chunk(chunk)),
id=chunk_uuid,
payload={
DOCUMENT_ID: document.id,
CHUNK_ID: chunk.chunk_id,
@ -84,8 +154,8 @@ def index_chunks(
SOURCE_LINKS: chunk.source_links,
SEMANTIC_IDENTIFIER: document.semantic_identifier,
SECTION_CONTINUATION: chunk.section_continuation,
ALLOWED_USERS: [], # TODO
ALLOWED_GROUPS: [], # TODO
ALLOWED_USERS: doc_user_map[document.id][ALLOWED_USERS],
ALLOWED_GROUPS: doc_user_map[document.id][ALLOWED_GROUPS],
},
vector=chunk.embedding,
)

View File

@ -1,6 +1,8 @@
from danswer.chunking.models import EmbeddedIndexChunk
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
from danswer.configs.constants import ALLOWED_USERS
from danswer.configs.constants import PUBLIC_DOC_PAT
from danswer.datastores.interfaces import Datastore
from danswer.datastores.interfaces import DatastoreFilter
from danswer.datastores.qdrant.indexing import index_chunks
@ -23,14 +25,21 @@ class QdrantDatastore(Datastore):
self.collection = collection
self.client = get_qdrant_client()
def index(self, chunks: list[EmbeddedIndexChunk]) -> bool:
def index(self, chunks: list[EmbeddedIndexChunk], user_id: int | None) -> bool:
return index_chunks(
chunks=chunks, collection=self.collection, client=self.client
chunks=chunks,
user_id=user_id,
collection=self.collection,
client=self.client,
)
@log_function_time()
def semantic_retrieval(
self, query: str, filters: list[DatastoreFilter] | None, num_to_retrieve: int
self,
query: str,
user_id: int | None,
filters: list[DatastoreFilter] | None,
num_to_retrieve: int,
) -> list[InferenceChunk]:
query_embedding = get_default_embedding_model().encode(
query
@ -41,6 +50,23 @@ class QdrantDatastore(Datastore):
hits = []
filter_conditions = []
try:
# Permissions filter
if user_id:
filter_conditions.append(
FieldCondition(
key=ALLOWED_USERS,
match=MatchAny(any=[str(user_id), PUBLIC_DOC_PAT]),
)
)
else:
filter_conditions.append(
FieldCondition(
key=ALLOWED_USERS,
match=MatchValue(value=PUBLIC_DOC_PAT),
)
)
# Provided query filters
if filters:
for filter_dict in filters:
valid_filters = {

View File

@ -0,0 +1,293 @@
from typing import cast
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from danswer.db.credentials import fetch_credential_by_id
from danswer.db.models import Connector
from danswer.db.models import ConnectorCredentialAssociation
from danswer.db.models import IndexAttempt
from danswer.db.models import User
from danswer.server.models import ConnectorBase
from danswer.server.models import ObjectCreationIdResponse
from danswer.server.models import StatusResponse
from danswer.utils.logging import setup_logger
from fastapi import HTTPException
from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
logger = setup_logger()
def fetch_connectors(
db_session: Session,
sources: list[DocumentSource] | None = None,
input_types: list[InputType] | None = None,
disabled_status: bool | None = None,
) -> list[Connector]:
stmt = select(Connector)
if sources is not None:
stmt = stmt.where(Connector.source.in_(sources))
if input_types is not None:
stmt = stmt.where(Connector.input_type.in_(input_types))
if disabled_status is not None:
stmt = stmt.where(Connector.disabled == disabled_status)
results = db_session.scalars(stmt)
return list(results.all())
def connector_by_name_exists(connector_name: str, db_session: Session) -> bool:
stmt = select(Connector).where(Connector.name == connector_name)
result = db_session.execute(stmt)
connector = result.scalar_one_or_none()
return connector is not None
def fetch_connector_by_id(connector_id: int, db_session: Session) -> Connector | None:
stmt = select(Connector).where(Connector.id == connector_id)
result = db_session.execute(stmt)
connector = result.scalar_one_or_none()
return connector
def create_connector(
connector_data: ConnectorBase,
db_session: Session,
) -> ObjectCreationIdResponse:
if connector_by_name_exists(connector_data.name, db_session):
raise ValueError(
"Connector by this name already exists, duplicate naming not allowed."
)
connector = Connector(
name=connector_data.name,
source=connector_data.source,
input_type=connector_data.input_type,
connector_specific_config=connector_data.connector_specific_config,
refresh_freq=connector_data.refresh_freq,
disabled=connector_data.disabled,
)
db_session.add(connector)
db_session.commit()
return ObjectCreationIdResponse(id=connector.id)
def update_connector(
connector_id: int,
connector_data: ConnectorBase,
db_session: Session,
) -> Connector | None:
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
return None
if connector_data.name != connector.name and connector_by_name_exists(
connector_data.name, db_session
):
raise ValueError(
"Connector by this name already exists, duplicate naming not allowed."
)
connector.name = connector_data.name
connector.source = connector_data.source
connector.input_type = connector_data.input_type
connector.connector_specific_config = connector_data.connector_specific_config
connector.refresh_freq = connector_data.refresh_freq
connector.disabled = connector_data.disabled
db_session.commit()
return connector
def disable_connector(
connector_id: int,
db_session: Session,
) -> StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
raise HTTPException(status_code=404, detail="Connector does not exist")
connector.disabled = True
db_session.commit()
return StatusResponse(
success=True, message="Connector deleted successfully", data=connector_id
)
def delete_connector(
connector_id: int,
db_session: Session,
) -> StatusResponse[int]:
"""Currently unused due to foreign key restriction from IndexAttempt
Use disable_connector instead"""
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
return StatusResponse(
success=True, message="Connector was already deleted", data=connector_id
)
db_session.delete(connector)
db_session.commit()
return StatusResponse(
success=True, message="Connector deleted successfully", data=connector_id
)
def get_connector_credential_ids(
connector_id: int,
db_session: Session,
) -> list[int]:
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
raise ValueError(f"Connector by id {connector_id} does not exist")
return [association.credential.id for association in connector.credentials]
def add_credential_to_connector(
connector_id: int,
credential_id: int,
user: User,
db_session: Session,
) -> StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session)
credential = fetch_credential_by_id(credential_id, user, db_session)
if connector is None:
raise HTTPException(status_code=404, detail="Connector does not exist")
if credential is None:
raise HTTPException(
status_code=401,
detail="Credential does not exist or does not belong to user",
)
existing_association = (
db_session.query(ConnectorCredentialAssociation)
.filter(
ConnectorCredentialAssociation.connector_id == connector_id,
ConnectorCredentialAssociation.credential_id == credential_id,
)
.one_or_none()
)
if existing_association is not None:
return StatusResponse(
success=False,
message=f"Connector already has Credential {credential_id}",
data=connector_id,
)
association = ConnectorCredentialAssociation(
connector_id=connector_id, credential_id=credential_id
)
db_session.add(association)
db_session.commit()
return StatusResponse(
success=True,
message=f"New Credential {credential_id} added to Connector",
data=connector_id,
)
def remove_credential_from_connector(
connector_id: int,
credential_id: int,
user: User,
db_session: Session,
) -> StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session)
credential = fetch_credential_by_id(credential_id, user, db_session)
if connector is None:
raise HTTPException(status_code=404, detail="Connector does not exist")
if credential is None:
raise HTTPException(
status_code=404,
detail="Credential does not exist or does not belong to user",
)
association = (
db_session.query(ConnectorCredentialAssociation)
.filter(
ConnectorCredentialAssociation.connector_id == connector_id,
ConnectorCredentialAssociation.credential_id == credential_id,
)
.one_or_none()
)
if association is not None:
db_session.delete(association)
db_session.commit()
return StatusResponse(
success=True,
message=f"Credential {credential_id} removed from Connector",
data=connector_id,
)
return StatusResponse(
success=False,
message=f"Connector already does not have Credential {credential_id}",
data=connector_id,
)
def fetch_latest_index_attempt_by_connector(
db_session: Session,
source: DocumentSource | None = None,
) -> list[IndexAttempt]:
latest_index_attempts: list[IndexAttempt] = []
if source:
connectors = fetch_connectors(
db_session, sources=[source], disabled_status=False
)
else:
connectors = fetch_connectors(db_session, disabled_status=False)
if not connectors:
return []
for connector in connectors:
latest_index_attempt = (
db_session.query(IndexAttempt)
.filter(IndexAttempt.connector_id == connector.id)
.order_by(IndexAttempt.time_updated.desc())
.first()
)
if latest_index_attempt is not None:
latest_index_attempts.append(latest_index_attempt)
return latest_index_attempts
def fetch_latest_index_attempts_by_status(
db_session: Session,
) -> list[IndexAttempt]:
subquery = (
db_session.query(
IndexAttempt.connector_id,
IndexAttempt.status,
func.max(IndexAttempt.time_updated).label("time_updated"),
)
.group_by(IndexAttempt.connector_id)
.group_by(IndexAttempt.status)
.subquery()
)
alias = aliased(IndexAttempt, subquery)
query = db_session.query(IndexAttempt).join(
alias,
and_(
IndexAttempt.connector_id == alias.connector_id,
IndexAttempt.status == alias.status,
IndexAttempt.time_updated == alias.time_updated,
),
)
return cast(list[IndexAttempt], query.all())

View File

@ -0,0 +1,157 @@
from typing import Any
from danswer.db.engine import build_engine
from danswer.db.models import Credential
from danswer.db.models import User
from danswer.server.models import CredentialBase
from danswer.server.models import ObjectCreationIdResponse
from danswer.utils.logging import setup_logger
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import or_
logger = setup_logger()
def mask_string(sensitive_str: str) -> str:
return "****...**" + sensitive_str[-4:]
def mask_credential_dict(credential_dict: dict[str, Any]) -> dict[str, str]:
masked_creds = {}
for key, val in credential_dict.items():
if not isinstance(val, str):
raise ValueError(
"Unable to mask credentials of type other than string, cannot process request."
)
masked_creds[key] = mask_string(val)
return masked_creds
def fetch_credentials(
user: User | None,
db_session: Session,
) -> list[Credential]:
stmt = select(Credential)
if user:
stmt = stmt.where(
or_(Credential.user_id == user.id, Credential.user_id.is_(None))
)
results = db_session.scalars(stmt)
return list(results.all())
def fetch_credential_by_id(
credential_id: int, user: User | None, db_session: Session
) -> Credential | None:
stmt = select(Credential).where(Credential.id == credential_id)
if user:
stmt = stmt.where(
or_(Credential.user_id == user.id, Credential.user_id.is_(None))
)
result = db_session.execute(stmt)
credential = result.scalar_one_or_none()
return credential
def create_credential(
credential_data: CredentialBase,
user: User,
db_session: Session,
) -> ObjectCreationIdResponse:
credential = Credential(
credential_json=credential_data.credential_json,
user_id=int(user.id) if user else None,
public_doc=credential_data.public_doc,
)
db_session.add(credential)
db_session.commit()
return ObjectCreationIdResponse(id=credential.id)
def update_credential(
credential_id: int,
credential_data: CredentialBase,
user: User,
db_session: Session,
) -> Credential | None:
credential = fetch_credential_by_id(credential_id, user, db_session)
if credential is None:
return None
credential.credential_json = credential_data.credential_json
credential.user_id = int(user.id) if user is not None else None
credential.public_doc = credential_data.public_doc
db_session.commit()
return credential
def update_credential_json(
credential_id: int,
credential_json: dict[str, Any],
user: User,
db_session: Session,
) -> Credential | None:
logger.info("HIIII")
logger.info(credential_id)
logger.info(credential_json)
credential = fetch_credential_by_id(credential_id, user, db_session)
if credential is None:
return None
credential.credential_json = credential_json
db_session.commit()
return credential
def backend_update_credential_json(
credential: Credential,
credential_json: dict[str, Any],
db_session: Session,
) -> None:
"""This should not be used in any flows involving the frontend or users"""
credential.credential_json = credential_json
db_session.commit()
def delete_credential(
credential_id: int,
user: User,
db_session: Session,
) -> None:
credential = fetch_credential_by_id(credential_id, user, db_session)
if credential is None:
raise ValueError(
f"Credential by provided id {credential_id} does not exist or does not belong to user"
)
db_session.delete(credential)
db_session.commit()
def create_initial_public_credential() -> None:
public_cred_id = 0
error_msg = (
"DB is not in a valid initial state."
"There must exist an empty public credential for data connectors that do not require additional Auth."
)
with Session(build_engine(), future=True, expire_on_commit=False) as db_session:
first_credential = fetch_credential_by_id(public_cred_id, None, db_session)
if first_credential is not None:
if (
first_credential.credential_json != {}
or first_credential.public_doc is False
):
raise ValueError(error_msg)
return
credential = Credential(
id=public_cred_id, credential_json={}, user_id=None, public_doc=True
)
db_session.add(credential)
db_session.commit()

View File

@ -1,21 +1,32 @@
from collections.abc import AsyncGenerator
from collections.abc import Generator
from datetime import datetime
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD
from danswer.configs.app_configs import POSTGRES_PORT
from danswer.configs.app_configs import POSTGRES_USER
from sqlalchemy import text
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.ext.asyncio import AsyncEngine
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import Session
SYNC_DB_API = "psycopg2"
ASYNC_DB_API = "asyncpg"
def get_db_current_time(db_session: Session) -> datetime:
result = db_session.execute(text("SELECT NOW()")).scalar()
if result is None:
raise ValueError("Database did not return a time")
return result
def build_connection_string(
*,
db_api: str = ASYNC_DB_API,
@ -38,6 +49,11 @@ def build_async_engine() -> AsyncEngine:
return create_async_engine(connection_string)
def get_session() -> Generator[Session, None, None]:
with Session(build_engine(), future=True, expire_on_commit=False) as session:
yield session
async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
async with AsyncSession(
build_async_engine(), future=True, expire_on_commit=False

View File

@ -1,55 +1,88 @@
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from danswer.db.engine import build_engine
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.utils.logging import setup_logger
from sqlalchemy import desc
from sqlalchemy import select
from sqlalchemy.orm import Session
logger = setup_logger()
def insert_index_attempt(index_attempt: IndexAttempt) -> None:
logger.info(f"Inserting {index_attempt}")
with Session(build_engine()) as session:
session.add(index_attempt)
session.commit()
def create_index_attempt(
connector_id: int,
credential_id: int,
db_session: Session,
) -> int:
new_attempt = IndexAttempt(
connector_id=connector_id,
credential_id=credential_id,
status=IndexingStatus.NOT_STARTED,
)
db_session.add(new_attempt)
db_session.commit()
return new_attempt.id
def fetch_index_attempts(
*,
sources: list[DocumentSource] | None = None,
statuses: list[IndexingStatus] | None = None,
input_types: list[InputType] | None = None,
def get_incomplete_index_attempts(
connector_id: int | None,
db_session: Session,
) -> list[IndexAttempt]:
with Session(build_engine(), future=True, expire_on_commit=False) as session:
stmt = select(IndexAttempt)
if sources:
stmt = stmt.where(IndexAttempt.source.in_(sources))
if statuses:
stmt = stmt.where(IndexAttempt.status.in_(statuses))
if input_types:
stmt = stmt.where(IndexAttempt.input_type.in_(input_types))
results = session.scalars(stmt)
return list(results.all())
stmt = select(IndexAttempt)
if connector_id is not None:
stmt = stmt.where(IndexAttempt.connector_id == connector_id)
stmt = stmt.where(
IndexAttempt.status.notin_([IndexingStatus.SUCCESS, IndexingStatus.FAILED])
)
incomplete_attempts = db_session.scalars(stmt)
return list(incomplete_attempts.all())
def update_index_attempt(
*,
index_attempt_id: int,
new_status: IndexingStatus,
document_ids: list[str] | None = None,
error_msg: str | None = None,
) -> bool:
"""Returns `True` if successfully updated, `False` if cannot find matching ID"""
with Session(build_engine(), future=True, expire_on_commit=False) as session:
stmt = select(IndexAttempt).where(IndexAttempt.id == index_attempt_id)
result = session.scalar(stmt)
if result:
result.status = new_status
result.document_ids = document_ids
result.error_msg = error_msg
session.commit()
return True
return False
def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]:
stmt = select(IndexAttempt)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.NOT_STARTED)
new_attempts = db_session.scalars(stmt)
return list(new_attempts.all())
def mark_attempt_in_progress(
index_attempt: IndexAttempt,
db_session: Session,
) -> None:
index_attempt.status = IndexingStatus.IN_PROGRESS
db_session.add(index_attempt)
db_session.commit()
def mark_attempt_succeeded(
index_attempt: IndexAttempt,
docs_indexed: list[str],
db_session: Session,
) -> None:
index_attempt.status = IndexingStatus.SUCCESS
index_attempt.document_ids = docs_indexed
db_session.add(index_attempt)
db_session.commit()
def mark_attempt_failed(
index_attempt: IndexAttempt, db_session: Session, failure_reason: str = "Unknown"
) -> None:
index_attempt.status = IndexingStatus.FAILED
index_attempt.error_msg = failure_reason
db_session.add(index_attempt)
db_session.commit()
def get_last_finished_attempt(
connector_id: int,
db_session: Session,
) -> IndexAttempt | None:
stmt = select(IndexAttempt)
stmt = stmt.where(IndexAttempt.connector_id == connector_id)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.SUCCESS)
stmt = stmt.order_by(desc(IndexAttempt.time_updated))
return db_session.execute(stmt).scalars().first()

View File

@ -9,9 +9,12 @@ from danswer.connectors.models import InputType
from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID
from fastapi_users.db import SQLAlchemyBaseUserTableUUID
from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID
from sqlalchemy import Boolean
from sqlalchemy import DateTime
from sqlalchemy import Enum
from sqlalchemy import ForeignKey
from sqlalchemy import func
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import DeclarativeBase
@ -35,12 +38,93 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
role: Mapped[UserRole] = mapped_column(
Enum(UserRole, native_enum=False, default=UserRole.BASIC)
)
credentials: Mapped[List["Credential"]] = relationship(
"Credential", back_populates="user", lazy="joined"
)
class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):
pass
class ConnectorCredentialAssociation(Base):
"""Connectors and Credentials can have a many-to-many relationship
I.e. A Confluence Connector may have multiple admin users who can run it with their own credentials
I.e. An admin user may use the same credential to index multiple Confluence Spaces
"""
__tablename__ = "connector_credential_association"
connector_id: Mapped[int] = mapped_column(
ForeignKey("connector.id"), primary_key=True
)
credential_id: Mapped[int] = mapped_column(
ForeignKey("credential.id"), primary_key=True
)
connector: Mapped["Connector"] = relationship(
"Connector", back_populates="credentials"
)
credential: Mapped["Credential"] = relationship(
"Credential", back_populates="connectors"
)
class Connector(Base):
__tablename__ = "connector"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String)
source: Mapped[DocumentSource] = mapped_column(
Enum(DocumentSource, native_enum=False)
)
input_type = mapped_column(Enum(InputType, native_enum=False))
connector_specific_config: Mapped[dict[str, Any]] = mapped_column(
postgresql.JSONB()
)
refresh_freq: Mapped[int | None] = mapped_column(Integer, nullable=True)
time_created: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
time_updated: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
disabled: Mapped[bool] = mapped_column(Boolean, default=False)
credentials: Mapped[List["ConnectorCredentialAssociation"]] = relationship(
"ConnectorCredentialAssociation",
back_populates="connector",
cascade="all, delete-orphan",
)
index_attempts: Mapped[List["IndexAttempt"]] = relationship(
"IndexAttempt", back_populates="connector"
)
class Credential(Base):
__tablename__ = "credential"
id: Mapped[int] = mapped_column(primary_key=True)
credential_json: Mapped[dict[str, Any]] = mapped_column(postgresql.JSONB())
user_id: Mapped[int | None] = mapped_column(ForeignKey("user.id"), nullable=True)
public_doc: Mapped[bool] = mapped_column(Boolean, default=False)
time_created: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
time_updated: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
connectors: Mapped[List["ConnectorCredentialAssociation"]] = relationship(
"ConnectorCredentialAssociation",
back_populates="credential",
cascade="all, delete-orphan",
)
index_attempts: Mapped[List["IndexAttempt"]] = relationship(
"IndexAttempt", back_populates="credential"
)
user: Mapped[User] = relationship("User", back_populates="credentials")
class IndexingStatus(str, PyEnum):
NOT_STARTED = "not_started"
IN_PROGRESS = "in_progress"
@ -58,22 +142,11 @@ class IndexAttempt(Base):
__tablename__ = "index_attempt"
id: Mapped[int] = mapped_column(primary_key=True)
# would like this to be a single JSONB column with structure described by
# `ConnectorDescriptor`, but this is not easily supported and requires
# some difficult to understand magic
source: Mapped[DocumentSource] = mapped_column(
Enum(DocumentSource, native_enum=False)
connector_id: Mapped[int | None] = mapped_column(
ForeignKey("connector.id"), nullable=True
)
input_type: Mapped[InputType] = mapped_column(Enum(InputType, native_enum=False))
connector_specific_config: Mapped[dict[str, Any]] = mapped_column(
postgresql.JSONB(), nullable=False
)
# TODO (chris): potentially add metadata for the chunker, embedder, and datastore
time_created: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
time_updated: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
credential_id: Mapped[int | None] = mapped_column(
ForeignKey("credential.id"), nullable=True
)
status: Mapped[IndexingStatus] = mapped_column(Enum(IndexingStatus))
document_ids: Mapped[list[str] | None] = mapped_column(
@ -82,16 +155,27 @@ class IndexAttempt(Base):
error_msg: Mapped[str | None] = mapped_column(
String(), default=None
) # only filled if status = "failed"
time_created: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now()
)
time_updated: Mapped[datetime.datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
connector: Mapped[Connector] = relationship(
"Connector", back_populates="index_attempts"
)
credential: Mapped[Credential] = relationship(
"Credential", back_populates="index_attempts"
)
def __repr__(self) -> str:
return (
f"<IndexAttempt(id={self.id!r}, "
f"source={self.source!r}, "
f"input_type={self.input_type!r}, "
f"connector_specific_config={self.connector_specific_config!r}, "
f"time_created={self.time_created!r}, "
f"time_updated={self.time_updated!r}, "
f"connector_id={self.connector_id!r}, "
f"status={self.status!r}, "
f"document_ids={self.document_ids!r}, "
f"error_msg={self.error_msg!r})>"
f"time_created={self.time_created!r}, "
f"time_updated={self.time_updated!r}, "
)

View File

@ -7,7 +7,7 @@ from danswer.direct_qa.question_answer import OpenAICompletionQA
def get_default_backend_qa_model(
internal_model: str = INTERNAL_MODEL_VERSION, **kwargs: dict[str, Any]
internal_model: str = INTERNAL_MODEL_VERSION, **kwargs: Any
) -> QAModel:
if internal_model == "openai-completion":
return OpenAICompletionQA(**kwargs)

View File

@ -45,8 +45,10 @@ from openai.error import Timeout
logger = setup_logger()
def get_openai_api_key():
return OPENAI_API_KEY or get_dynamic_config_store().load(OPENAI_API_KEY_STORAGE_KEY)
def get_openai_api_key() -> str:
return OPENAI_API_KEY or cast(
str, get_dynamic_config_store().load(OPENAI_API_KEY_STORAGE_KEY)
)
def get_json_line(json_dict: dict) -> str:
@ -198,7 +200,7 @@ ModelType = Literal["ChatCompletion", "Completion"]
PromptProcessor = Callable[[str, list[str]], str]
def _build_openai_settings(**kwargs: dict[str, Any]) -> dict[str, Any]:
def _build_openai_settings(**kwargs: Any) -> dict[str, Any]:
"""
Utility to add in some common default values so they don't have to be set every time.
"""
@ -218,7 +220,7 @@ def _handle_openai_exceptions_wrapper(openai_call: F, query: str) -> F:
# if streamed, the call returns a generator
if kwargs.get("stream"):
def _generator():
def _generator() -> Generator[Any, None, None]:
yield from openai_call(*args, **kwargs)
return _generator()

View File

@ -11,6 +11,7 @@ from danswer.configs.app_configs import ENABLE_OAUTH
from danswer.configs.app_configs import SECRET
from danswer.configs.app_configs import WEB_DOMAIN
from danswer.datastores.qdrant.indexing import list_collections
from danswer.db.credentials import create_initial_public_credential
from danswer.server.admin import router as admin_router
from danswer.server.event_loading import router as event_processing_router
from danswer.server.health import router as health_router
@ -35,6 +36,13 @@ def validation_exception_handler(
return JSONResponse(content=content, status_code=422)
def value_error_handler(_: Request, exc: ValueError) -> JSONResponse:
return JSONResponse(
status_code=400,
content={"message": str(exc)},
)
def get_application() -> FastAPI:
application = FastAPI(title="Internal Search QA Backend", debug=True, version="0.1")
application.include_router(backend_router)
@ -94,6 +102,8 @@ def get_application() -> FastAPI:
RequestValidationError, validation_exception_handler
)
application.add_exception_handler(ValueError, value_error_handler)
@application.on_event("startup")
def startup_event() -> None:
# To avoid circular imports
@ -112,6 +122,9 @@ def get_application() -> FastAPI:
warm_up_models()
logger.info("Semantic Search models are ready.")
logger.info("Verifying public credential exists.")
create_initial_public_credential()
return application

View File

@ -80,11 +80,12 @@ def semantic_reranking(
@log_function_time()
def retrieve_ranked_documents(
query: str,
user_id: int | None,
filters: list[DatastoreFilter] | None,
datastore: Datastore,
num_hits: int = NUM_RETURNED_HITS,
) -> list[InferenceChunk] | None:
top_chunks = datastore.semantic_retrieval(query, filters, num_hits)
top_chunks = datastore.semantic_retrieval(query, user_id, filters, num_hits)
if not top_chunks:
filters_log_msg = json.dumps(filters, separators=(",", ":")).replace("\n", "")
logger.warning(

View File

@ -1,153 +1,518 @@
from typing import Any
from collections import defaultdict
from typing import cast
from danswer.auth.users import current_admin_user
from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import NO_AUTH_USER
from danswer.configs.constants import OPENAI_API_KEY_STORAGE_KEY
from danswer.connectors.factory import build_connector
from danswer.connectors.google_drive.connector_auth import DB_CREDENTIALS_DICT_KEY
from danswer.connectors.google_drive.connector_auth import get_auth_url
from danswer.connectors.google_drive.connector_auth import get_drive_tokens
from danswer.connectors.google_drive.connector_auth import save_access_tokens
from danswer.connectors.google_drive.connector_auth import get_google_app_cred
from danswer.connectors.google_drive.connector_auth import (
update_credential_access_tokens,
)
from danswer.connectors.google_drive.connector_auth import upsert_google_app_cred
from danswer.connectors.google_drive.connector_auth import verify_csrf
from danswer.connectors.models import InputType
from danswer.connectors.slack.config import get_slack_config
from danswer.connectors.slack.config import SlackConfig
from danswer.connectors.slack.config import update_slack_config
from danswer.db.index_attempt import fetch_index_attempts
from danswer.db.index_attempt import insert_index_attempt
from danswer.db.connector import add_credential_to_connector
from danswer.db.connector import create_connector
from danswer.db.connector import delete_connector
from danswer.db.connector import fetch_connector_by_id
from danswer.db.connector import fetch_connectors
from danswer.db.connector import fetch_latest_index_attempt_by_connector
from danswer.db.connector import fetch_latest_index_attempts_by_status
from danswer.db.connector import get_connector_credential_ids
from danswer.db.connector import remove_credential_from_connector
from danswer.db.connector import update_connector
from danswer.db.credentials import create_credential
from danswer.db.credentials import delete_credential
from danswer.db.credentials import fetch_credential_by_id
from danswer.db.credentials import fetch_credentials
from danswer.db.credentials import mask_credential_dict
from danswer.db.credentials import update_credential
from danswer.db.engine import get_session
from danswer.db.index_attempt import create_index_attempt
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.db.models import User
from danswer.direct_qa.key_validation import (
check_openai_api_key_is_valid,
)
from danswer.direct_qa.key_validation import check_openai_api_key_is_valid
from danswer.direct_qa.question_answer import get_openai_api_key
from danswer.dynamic_configs import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.server.models import ApiKey
from danswer.server.models import AuthStatus
from danswer.server.models import AuthUrl
from danswer.server.models import ConnectorBase
from danswer.server.models import ConnectorIndexingStatus
from danswer.server.models import ConnectorSnapshot
from danswer.server.models import CredentialBase
from danswer.server.models import CredentialSnapshot
from danswer.server.models import GDriveCallback
from danswer.server.models import IndexAttemptRequest
from danswer.server.models import GoogleAppCredentials
from danswer.server.models import IndexAttemptSnapshot
from danswer.server.models import ListIndexAttemptsResponse
from danswer.server.models import ObjectCreationIdResponse
from danswer.server.models import RunConnectorRequest
from danswer.server.models import StatusResponse
from danswer.utils.logging import setup_logger
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from fastapi import Request
from fastapi import Response
from sqlalchemy.orm import Session
router = APIRouter(prefix="/admin")
logger = setup_logger()
@router.get("/connectors/google-drive/check-auth", response_model=AuthStatus)
def check_drive_tokens(_: User = Depends(current_admin_user)) -> AuthStatus:
tokens = get_drive_tokens()
authenticated = tokens is not None
return AuthStatus(authenticated=authenticated)
@router.get("/connectors/google-drive/authorize", response_model=AuthUrl)
def google_drive_auth(user: User = Depends(current_admin_user)) -> AuthUrl:
user_id = str(user.id) if user else NO_AUTH_USER
return AuthUrl(auth_url=get_auth_url(user_id))
@router.get("/connectors/google-drive/callback", status_code=201)
def google_drive_callback(
callback: GDriveCallback = Depends(), user: User = Depends(current_admin_user)
) -> None:
user_id = str(user.id) if user else NO_AUTH_USER
verify_csrf(user_id, callback.state)
return save_access_tokens(callback.code)
@router.get("/connectors/slack/config", response_model=SlackConfig)
def fetch_slack_config(_: User = Depends(current_admin_user)) -> SlackConfig:
try:
return get_slack_config()
except ConfigNotFoundError:
return SlackConfig(slack_bot_token="", workspace_id="")
@router.post("/connectors/slack/config")
def modify_slack_config(
slack_config: SlackConfig, _: User = Depends(current_admin_user)
) -> None:
update_slack_config(slack_config)
@router.post("/connectors/{source}/index-attempt", status_code=201)
def index(
source: DocumentSource,
index_attempt_request: IndexAttemptRequest,
@router.get("/connector/google-drive/app-credential")
def check_google_app_credentials_exist(
_: User = Depends(current_admin_user),
) -> None:
# validate that the connector specified by the source / input_type combination
# exists AND that the connector_specific_config is valid for that connector type, should be load
build_connector(
source=source,
input_type=index_attempt_request.input_type,
connector_specific_config=index_attempt_request.connector_specific_config,
) -> dict[str, str]:
try:
return {"client_id": get_google_app_cred().web.client_id}
except ConfigNotFoundError as e:
raise HTTPException(status_code=404, detail="Google App Credentials not found")
@router.put("/connector/google-drive/app-credential")
def update_google_app_credentials(
app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
) -> StatusResponse:
try:
upsert_google_app_cred(app_credentials)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully saved Google App Credentials"
)
# once validated, insert the index attempt into the database where it will
# get picked up by a background job
insert_index_attempt(
index_attempt=IndexAttempt(
source=source,
input_type=index_attempt_request.input_type,
connector_specific_config=index_attempt_request.connector_specific_config,
status=IndexingStatus.NOT_STARTED,
@router.get("/connector/google-drive/check-auth/{credential_id}")
def check_drive_tokens(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> AuthStatus:
db_credentials = fetch_credential_by_id(credential_id, user, db_session)
if (
not db_credentials
or DB_CREDENTIALS_DICT_KEY not in db_credentials.credential_json
):
return AuthStatus(authenticated=False)
token_json_str = str(db_credentials.credential_json[DB_CREDENTIALS_DICT_KEY])
google_drive_creds = get_drive_tokens(token_json_str=token_json_str)
if google_drive_creds is None:
return AuthStatus(authenticated=False)
return AuthStatus(authenticated=True)
_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id"
@router.get("/connector/google-drive/authorize/{credential_id}", response_model=AuthUrl)
def google_drive_auth(
response: Response, credential_id: str, _: User = Depends(current_admin_user)
) -> AuthUrl:
# set a cookie that we can read in the callback (used for `verify_csrf`)
response.set_cookie(
key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME,
value=credential_id,
httponly=True,
max_age=600,
)
return AuthUrl(auth_url=get_auth_url(int(credential_id)))
@router.get("/connector/google-drive/callback")
def google_drive_callback(
request: Request,
callback: GDriveCallback = Depends(),
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
credential_id_cookie = request.cookies.get(_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME)
if credential_id_cookie is None or not credential_id_cookie.isdigit():
raise HTTPException(
status_code=401, detail="Request did not pass CSRF verification."
)
)
credential_id = int(credential_id_cookie)
verify_csrf(credential_id, callback.state)
if (
update_credential_access_tokens(callback.code, credential_id, user, db_session)
is None
):
raise HTTPException(
status_code=500, detail="Unable to fetch Google Drive access tokens"
)
return StatusResponse(success=True, message="Updated Google Drive access tokens")
@router.get("/connectors/{source}/index-attempt")
@router.get("/latest-index-attempt", response_model=list[IndexAttemptSnapshot])
def list_all_index_attempts(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[IndexAttemptSnapshot]:
index_attempts = fetch_latest_index_attempt_by_connector(db_session)
return [
IndexAttemptSnapshot(
source=index_attempt.connector.source,
input_type=index_attempt.connector.input_type,
status=index_attempt.status,
connector_specific_config=index_attempt.connector.connector_specific_config,
docs_indexed=0
if not index_attempt.document_ids
else len(index_attempt.document_ids),
time_created=index_attempt.time_created,
time_updated=index_attempt.time_updated,
)
for index_attempt in index_attempts
]
@router.get("/latest-index-attempt/{source}", response_model=list[IndexAttemptSnapshot])
def list_index_attempts(
source: DocumentSource,
_: User = Depends(current_admin_user),
) -> ListIndexAttemptsResponse:
index_attempts = fetch_index_attempts(sources=[source])
return ListIndexAttemptsResponse(
index_attempts=[
IndexAttemptSnapshot(
connector_specific_config=index_attempt.connector_specific_config,
status=index_attempt.status,
source=index_attempt.source,
time_created=index_attempt.time_created,
time_updated=index_attempt.time_updated,
docs_indexed=0
if not index_attempt.document_ids
else len(index_attempt.document_ids),
db_session: Session = Depends(get_session),
) -> list[IndexAttemptSnapshot]:
index_attempts = fetch_latest_index_attempt_by_connector(db_session, source=source)
return [
IndexAttemptSnapshot(
source=index_attempt.connector.source,
input_type=index_attempt.connector.input_type,
status=index_attempt.status,
connector_specific_config=index_attempt.connector.connector_specific_config,
docs_indexed=0
if not index_attempt.document_ids
else len(index_attempt.document_ids),
time_created=index_attempt.time_created,
time_updated=index_attempt.time_updated,
)
for index_attempt in index_attempts
]
@router.get("/connector", response_model=list[ConnectorSnapshot])
def get_connectors(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[ConnectorSnapshot]:
connectors = fetch_connectors(db_session)
return [
ConnectorSnapshot.from_connector_db_model(connector) for connector in connectors
]
@router.get("/connector/indexing-status")
def get_connector_indexing_status(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[ConnectorIndexingStatus]:
connector_id_to_connector = {
connector.id: connector for connector in fetch_connectors(db_session)
}
index_attempts = fetch_latest_index_attempts_by_status(db_session)
connector_to_index_attempts: dict[int, list[IndexAttempt]] = defaultdict(list)
for index_attempt in index_attempts:
# don't consider index attempts where the connector has been deleted
if index_attempt.connector_id:
connector_to_index_attempts[index_attempt.connector_id].append(
index_attempt
)
for index_attempt in index_attempts
indexing_statuses: list[ConnectorIndexingStatus] = []
for connector_id, index_attempts in connector_to_index_attempts.items():
# NOTE: index_attempts is guaranteed to be length > 0
connector = connector_id_to_connector[connector_id]
index_attempts_sorted = sorted(
index_attempts, key=lambda x: x.time_updated, reverse=True
)
successful_index_attempts_sorted = [
index_attempt
for index_attempt in index_attempts_sorted
if index_attempt.status == IndexingStatus.SUCCESS
]
indexing_statuses.append(
ConnectorIndexingStatus(
connector=ConnectorSnapshot.from_connector_db_model(connector),
last_status=index_attempts_sorted[0].status,
last_success=successful_index_attempts_sorted[0].time_updated
if successful_index_attempts_sorted
else None,
docs_indexed=len(successful_index_attempts_sorted[0].document_ids)
if successful_index_attempts_sorted
and successful_index_attempts_sorted[0].document_ids
else 0,
),
)
# add in the connector that haven't started indexing yet
for connector in connector_id_to_connector.values():
if connector.id not in connector_to_index_attempts:
indexing_statuses.append(
ConnectorIndexingStatus(
connector=ConnectorSnapshot.from_connector_db_model(connector),
last_status=IndexingStatus.NOT_STARTED,
last_success=None,
docs_indexed=0,
),
)
return indexing_statuses
@router.get(
"/connector/{connector_id}",
response_model=ConnectorSnapshot | StatusResponse[int],
)
def get_connector_by_id(
connector_id: int,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
raise HTTPException(
status_code=404, detail=f"Connector {connector_id} does not exist"
)
return ConnectorSnapshot(
id=connector.id,
name=connector.name,
source=connector.source,
input_type=connector.input_type,
connector_specific_config=connector.connector_specific_config,
refresh_freq=connector.refresh_freq,
credential_ids=[
association.credential.id for association in connector.credentials
],
time_created=connector.time_created,
time_updated=connector.time_updated,
disabled=connector.disabled,
)
@router.get("/connectors/index-attempt")
def list_all_index_attempts(
@router.post("/connector", response_model=ObjectCreationIdResponse)
def create_connector_from_model(
connector_info: ConnectorBase,
_: User = Depends(current_admin_user),
) -> ListIndexAttemptsResponse:
index_attempts = fetch_index_attempts()
return ListIndexAttemptsResponse(
index_attempts=[
IndexAttemptSnapshot(
connector_specific_config=index_attempt.connector_specific_config,
status=index_attempt.status,
source=index_attempt.source,
time_created=index_attempt.time_created,
time_updated=index_attempt.time_updated,
docs_indexed=0
if not index_attempt.document_ids
else len(index_attempt.document_ids),
db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
try:
return create_connector(connector_info, db_session)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.patch(
"/connector/{connector_id}",
response_model=ConnectorSnapshot | StatusResponse[int],
)
def update_connector_from_model(
connector_id: int,
connector_data: ConnectorBase,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
updated_connector = update_connector(connector_id, connector_data, db_session)
if updated_connector is None:
raise HTTPException(
status_code=404, detail=f"Connector {connector_id} does not exist"
)
return ConnectorSnapshot(
id=updated_connector.id,
name=updated_connector.name,
source=updated_connector.source,
input_type=updated_connector.input_type,
connector_specific_config=updated_connector.connector_specific_config,
refresh_freq=updated_connector.refresh_freq,
credential_ids=[
association.credential.id for association in updated_connector.credentials
],
time_created=updated_connector.time_created,
time_updated=updated_connector.time_updated,
disabled=updated_connector.disabled,
)
@router.delete("/connector/{connector_id}", response_model=StatusResponse[int])
def delete_connector_by_id(
connector_id: int,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
return delete_connector(connector_id, db_session)
@router.get("/credential", response_model=list[CredentialSnapshot])
def get_credentials(
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[CredentialSnapshot]:
credentials = fetch_credentials(user, db_session)
return [
CredentialSnapshot(
id=credential.id,
credential_json=mask_credential_dict(credential.credential_json)
if MASK_CREDENTIAL_PREFIX
else credential.credential_json,
user_id=credential.user_id,
public_doc=credential.public_doc,
time_created=credential.time_created,
time_updated=credential.time_updated,
)
for credential in credentials
]
@router.get(
"/credential/{credential_id}",
response_model=CredentialSnapshot | StatusResponse[int],
)
def get_credential_by_id(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> CredentialSnapshot | StatusResponse[int]:
credential = fetch_credential_by_id(credential_id, user, db_session)
if credential is None:
raise HTTPException(
status_code=401,
detail=f"Credential {credential_id} does not exist or does not belong to user",
)
return CredentialSnapshot(
id=credential.id,
credential_json=mask_credential_dict(credential.credential_json)
if MASK_CREDENTIAL_PREFIX
else credential.credential_json,
user_id=credential.user_id,
public_doc=credential.public_doc,
time_created=credential.time_created,
time_updated=credential.time_updated,
)
@router.post("/credential", response_model=ObjectCreationIdResponse)
def create_credential_from_model(
connector_info: CredentialBase,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
return create_credential(connector_info, user, db_session)
@router.patch(
"/credential/{credential_id}",
response_model=CredentialSnapshot | StatusResponse[int],
)
def update_credential_from_model(
credential_id: int,
credential_data: CredentialBase,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> CredentialSnapshot | StatusResponse[int]:
updated_credential = update_credential(
credential_id, credential_data, user, db_session
)
if updated_credential is None:
raise HTTPException(
status_code=401,
detail=f"Credential {credential_id} does not exist or does not belong to user",
)
return CredentialSnapshot(
id=updated_credential.id,
credential_json=updated_credential.credential_json,
user_id=updated_credential.user_id,
public_doc=updated_credential.public_doc,
time_created=updated_credential.time_created,
time_updated=updated_credential.time_updated,
)
@router.delete("/credential/{credential_id}", response_model=StatusResponse[int])
def delete_credential_by_id(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
delete_credential(credential_id, user, db_session)
return StatusResponse(
success=True, message="Credential deleted successfully", data=credential_id
)
@router.put("/connector/{connector_id}/credential/{credential_id}")
def associate_credential_to_connector(
connector_id: int,
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
return add_credential_to_connector(connector_id, credential_id, user, db_session)
@router.delete("/connector/{connector_id}/credential/{credential_id}")
def dissociate_credential_from_connector(
connector_id: int,
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
return remove_credential_from_connector(
connector_id, credential_id, user, db_session
)
@router.post("/connector/run-once")
def connector_run_once(
run_info: RunConnectorRequest,
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[list[int]]:
connector_id = run_info.connector_id
specified_credential_ids = run_info.credential_ids
try:
possible_credential_ids = get_connector_credential_ids(
run_info.connector_id, db_session
)
except ValueError:
return StatusResponse(
success=False,
message=f"Connector by id {connector_id} does not exist.",
)
if not specified_credential_ids:
credential_ids = possible_credential_ids
else:
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
return StatusResponse(
success=False,
message=f"Not all specified credentials are associated with connector",
)
for index_attempt in index_attempts
]
if not credential_ids:
return StatusResponse(
success=False,
message=f"Connector has no valid credentials, cannot create index attempts.",
)
index_attempt_ids = [
create_index_attempt(run_info.connector_id, credential_id, db_session)
for credential_id in credential_ids
]
return StatusResponse(
success=True,
message=f"Successfully created {len(index_attempt_ids)} index attempts",
data=index_attempt_ids,
)
@ -155,7 +520,6 @@ def list_all_index_attempts(
def validate_existing_openai_api_key(
_: User = Depends(current_admin_user),
) -> None:
is_valid = False
try:
openai_api_key = get_openai_api_key()
is_valid = check_openai_api_key_is_valid(openai_api_key)
@ -168,7 +532,7 @@ def validate_existing_openai_api_key(
raise HTTPException(status_code=400, detail="Invalid API key provided")
@router.get("/openai-api-key")
@router.get("/openai-api-key", response_model=ApiKey)
def get_openai_api_key_from_dynamic_config_store(
_: User = Depends(current_admin_user),
) -> ApiKey:

View File

@ -3,7 +3,6 @@ from typing import Any
from danswer.connectors.slack.connector import get_channel_info
from danswer.connectors.slack.connector import get_thread
from danswer.connectors.slack.connector import thread_to_doc
from danswer.connectors.slack.utils import get_client
from danswer.utils.indexing_pipeline import build_indexing_pipeline
from danswer.utils.logging import setup_logger
from fastapi import APIRouter
@ -25,41 +24,43 @@ class EventHandlingResponse(BaseModel):
challenge: str | None
@router.post("/process_slack_event", response_model=EventHandlingResponse)
def process_slack_event(event: SlackEvent) -> EventHandlingResponse:
logger.info("Recieved slack event: %s", event.dict())
# TODO: just store entry in DB and process in the background, until then this
# won't work cleanly since the slack bot token is not easily accessible
# @router.post("/process_slack_event", response_model=EventHandlingResponse)
# def process_slack_event(event: SlackEvent) -> EventHandlingResponse:
# logger.info("Recieved slack event: %s", event.dict())
if event.type == "url_verification":
return EventHandlingResponse(challenge=event.challenge)
# if event.type == "url_verification":
# return EventHandlingResponse(challenge=event.challenge)
if event.type == "event_callback" and event.event:
try:
# TODO: process in the background as per slack guidelines
message_type = event.event.get("subtype")
if message_type == "message_changed":
message = event.event["message"]
else:
message = event.event
# if event.type == "event_callback" and event.event:
# try:
# # TODO: process in the background as per slack guidelines
# message_type = event.event.get("subtype")
# if message_type == "message_changed":
# message = event.event["message"]
# else:
# message = event.event
channel_id = event.event["channel"]
thread_ts = message.get("thread_ts")
slack_client = get_client()
doc = thread_to_doc(
channel=get_channel_info(client=slack_client, channel_id=channel_id),
thread=get_thread(
client=slack_client, channel_id=channel_id, thread_id=thread_ts
)
if thread_ts
else [message],
)
if doc is None:
logger.info("Message was determined to not be indexable")
return EventHandlingResponse(challenge=None) # @CHRIS is this right?
# channel_id = event.event["channel"]
# thread_ts = message.get("thread_ts")
# slack_client = get_client()
# doc = thread_to_doc(
# channel=get_channel_info(client=slack_client, channel_id=channel_id),
# thread=get_thread(
# client=slack_client, channel_id=channel_id, thread_id=thread_ts
# )
# if thread_ts
# else [message],
# )
# if doc is None:
# logger.info("Message was determined to not be indexable")
# return EventHandlingResponse(challenge=None)
build_indexing_pipeline()([doc])
except Exception:
logger.exception("Failed to process slack message")
return EventHandlingResponse(challenge=None)
# build_indexing_pipeline()([doc])
# except Exception:
# logger.exception("Failed to process slack message")
# return EventHandlingResponse(challenge=None)
logger.error("Unsupported event type: %s", event.type)
return EventHandlingResponse(challenge=None)
# logger.error("Unsupported event type: %s", event.type)
# return EventHandlingResponse(challenge=None)

View File

@ -1,10 +1,10 @@
from danswer.server.models import HealthCheckResponse
from danswer.server.models import StatusResponse
from fastapi import APIRouter
router = APIRouter()
@router.get("/health")
def healthcheck() -> HealthCheckResponse:
return {"status": "ok"}
@router.get("/health", response_model=StatusResponse)
def healthcheck() -> StatusResponse:
return StatusResponse(success=True, message="ok")

View File

@ -1,18 +1,56 @@
from datetime import datetime
from typing import Any
from typing import Generic
from typing import Literal
from typing import Optional
from typing import TYPE_CHECKING
from typing import TypeVar
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from danswer.datastores.interfaces import DatastoreFilter
from danswer.db.models import Connector
from danswer.db.models import IndexingStatus
from pydantic import BaseModel
from pydantic.generics import GenericModel
DataT = TypeVar("DataT")
class StatusResponse(GenericModel, Generic[DataT]):
success: bool
message: Optional[str] = None
data: Optional[DataT] = None
class DataRequest(BaseModel):
data: str
class GoogleAppWebCredentials(BaseModel):
client_id: str
project_id: str
auth_uri: str
token_uri: str
auth_provider_x509_cert_url: str
client_secret: str
redirect_uris: list[str]
javascript_origins: list[str]
class GoogleAppCredentials(BaseModel):
web: GoogleAppWebCredentials
class HealthCheckResponse(BaseModel):
status: Literal["ok"]
class ObjectCreationIdResponse(BaseModel):
id: int | str
class AuthStatus(BaseModel):
authenticated: bool
@ -62,17 +100,73 @@ class IndexAttemptRequest(BaseModel):
connector_specific_config: dict[str, Any]
class IndexAttemptSnapshot(BaseModel):
connector_specific_config: dict[str, Any]
status: IndexingStatus
class ConnectorBase(BaseModel):
name: str
source: DocumentSource
input_type: InputType
connector_specific_config: dict[str, Any]
refresh_freq: int | None # In seconds, None for one time index with no refresh
disabled: bool
class ConnectorSnapshot(ConnectorBase):
id: int
credential_ids: list[int]
time_created: datetime
time_updated: datetime
@classmethod
def from_connector_db_model(cls, connector: Connector) -> "ConnectorSnapshot":
return ConnectorSnapshot(
id=connector.id,
name=connector.name,
source=connector.source,
input_type=connector.input_type,
connector_specific_config=connector.connector_specific_config,
refresh_freq=connector.refresh_freq,
credential_ids=[
association.credential.id for association in connector.credentials
],
time_created=connector.time_created,
time_updated=connector.time_updated,
disabled=connector.disabled,
)
class ConnectorIndexingStatus(BaseModel):
"""Represents the latest indexing status of a connector"""
connector: ConnectorSnapshot
last_status: IndexingStatus
last_success: datetime | None
docs_indexed: int
class ListIndexAttemptsResponse(BaseModel):
index_attempts: list[IndexAttemptSnapshot]
class RunConnectorRequest(BaseModel):
connector_id: int
credential_ids: list[int] | None
class CredentialBase(BaseModel):
credential_json: dict[str, Any]
public_doc: bool
class CredentialSnapshot(CredentialBase):
id: int
user_id: int | None
time_created: datetime
time_updated: datetime
class IndexAttemptSnapshot(BaseModel):
source: DocumentSource
input_type: InputType
status: IndexingStatus
connector_specific_config: dict[str, Any]
docs_indexed: int
time_created: datetime
time_updated: datetime
class ApiKey(BaseModel):

View File

@ -2,8 +2,8 @@ import time
from collections.abc import Generator
from danswer.auth.schemas import UserRole
from danswer.auth.users import current_active_user
from danswer.auth.users import current_admin_user
from danswer.auth.users import current_user
from danswer.configs.app_configs import KEYWORD_MAX_HITS
from danswer.configs.app_configs import NUM_RERANKED_RESULTS
from danswer.configs.app_configs import QA_TIMEOUT
@ -36,7 +36,7 @@ router = APIRouter()
@router.get("/get-user-role", response_model=UserRoleResponse)
async def get_user_role(user: User = Depends(current_active_user)) -> UserRoleResponse:
async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse:
if user is None:
raise ValueError("Invalid or missing user.")
return UserRoleResponse(role=user.role)
@ -61,7 +61,7 @@ async def promote_admin(
@router.get("/direct-qa", response_model=QAResponse)
def direct_qa(
question: QAQuestion = Depends(), _: User = Depends(current_active_user)
question: QAQuestion = Depends(), user: User = Depends(current_user)
) -> QAResponse:
start_time = time.time()
@ -70,8 +70,9 @@ def direct_qa(
filters = question.filters
logger.info(f"Received semantic query: {query}")
user_id = None if user is None else int(user.id)
ranked_chunks = retrieve_ranked_documents(
query, filters, create_datastore(collection)
query, user_id, filters, create_datastore(collection)
)
if not ranked_chunks:
return QAResponse(answer=None, quotes=None, ranked_documents=None)
@ -102,7 +103,7 @@ def direct_qa(
@router.get("/stream-direct-qa")
def stream_direct_qa(
question: QAQuestion = Depends(), _: User = Depends(current_active_user)
question: QAQuestion = Depends(), user: User = Depends(current_user)
) -> StreamingResponse:
top_documents_key = "top_documents"
@ -112,8 +113,9 @@ def stream_direct_qa(
filters = question.filters
logger.info(f"Received semantic query: {query}")
user_id = None if user is None else int(user.id)
ranked_chunks = retrieve_ranked_documents(
query, filters, create_datastore(collection)
query, user_id, filters, create_datastore(collection)
)
if not ranked_chunks:
yield get_json_line({top_documents_key: None})
@ -151,7 +153,7 @@ def stream_direct_qa(
@router.get("/keyword-search", response_model=KeywordResponse)
def keyword_search(
question: QAQuestion = Depends(), _: User = Depends(current_active_user)
question: QAQuestion = Depends(), _: User = Depends(current_user)
) -> KeywordResponse:
ts_client = TSClient.get_instance()
query = question.query

View File

@ -1,6 +1,8 @@
from collections.abc import Callable
from functools import partial
from itertools import chain
from typing import Any
from typing import Protocol
from danswer.chunking.chunk import Chunker
from danswer.chunking.chunk import DefaultChunker
@ -12,17 +14,26 @@ from danswer.semantic_search.biencoder import DefaultEmbedder
from danswer.semantic_search.type_aliases import Embedder
class IndexingPipelineProtocol(Protocol):
def __call__(
self, documents: list[Document], user_id: int | None
) -> list[EmbeddedIndexChunk]:
...
def _indexing_pipeline(
*,
chunker: Chunker,
embedder: Embedder,
datastore: Datastore,
documents: list[Document],
user_id: int | None,
) -> list[EmbeddedIndexChunk]:
# TODO: make entire indexing pipeline async to not block the entire process
# when running on async endpoints
chunks = list(chain(*[chunker.chunk(document) for document in documents]))
chunks_with_embeddings = embedder.embed(chunks)
datastore.index(chunks_with_embeddings)
datastore.index(chunks_with_embeddings, user_id)
return chunks_with_embeddings
@ -31,7 +42,7 @@ def build_indexing_pipeline(
chunker: Chunker | None = None,
embedder: Embedder | None = None,
datastore: Datastore | None = None,
) -> Callable[[list[Document]], list[EmbeddedIndexChunk]]:
) -> IndexingPipelineProtocol:
"""Builds a pipline which takes in a list of docs and indexes them.
Default uses _ chunker, _ embedder, and qdrant for the datastore"""
@ -44,4 +55,6 @@ def build_indexing_pipeline(
if datastore is None:
datastore = QdrantDatastore()
return partial(_indexing_pipeline, chunker, embedder, datastore)
return partial(
_indexing_pipeline, chunker=chunker, embedder=embedder, datastore=datastore
)

View File

@ -1,156 +0,0 @@
# This file is only for development purposes
import argparse
from itertools import chain
from danswer.chunking.chunk import Chunker
from danswer.chunking.chunk import DefaultChunker
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.google_drive.connector_auth import backend_get_credentials
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.slack.connector import SlackConnector
from danswer.connectors.web.connector import WebConnector
from danswer.datastores.interfaces import Datastore
from danswer.datastores.qdrant.indexing import recreate_collection
from danswer.datastores.qdrant.store import QdrantDatastore
from danswer.semantic_search.biencoder import DefaultEmbedder
from danswer.semantic_search.type_aliases import Embedder
from danswer.utils.logging import setup_logger
logger = setup_logger()
def load_batch(
doc_loader: LoadConnector,
chunker: Chunker,
embedder: Embedder,
datastore: Datastore,
) -> None:
num_processed = 0
total_chunks = 0
for document_batch in doc_loader.load_from_state():
if not document_batch:
logger.warning("No parseable documents found in batch")
continue
logger.info(f"Indexed {num_processed} documents")
document_chunks = list(
chain(*[chunker.chunk(document) for document in document_batch])
)
num_chunks = len(document_chunks)
total_chunks += num_chunks
logger.info(
f"Document batch yielded {num_chunks} chunks for a total of {total_chunks}"
)
chunks_with_embeddings = embedder.embed(document_chunks)
datastore.index(chunks_with_embeddings)
num_processed += len(document_batch)
logger.info(f"Finished, indexed a total of {num_processed} documents")
def load_slack_batch(file_path: str, qdrant_collection: str) -> None:
logger.info("Loading documents from Slack.")
load_batch(
SlackConnector(export_path_str=file_path, batch_size=INDEX_BATCH_SIZE),
DefaultChunker(),
DefaultEmbedder(),
QdrantDatastore(collection=qdrant_collection),
)
def load_web_batch(url: str, qdrant_collection: str) -> None:
logger.info("Loading documents from web.")
load_batch(
WebConnector(base_url=url, batch_size=INDEX_BATCH_SIZE),
DefaultChunker(),
DefaultEmbedder(),
QdrantDatastore(collection=qdrant_collection),
)
def load_google_drive_batch(qdrant_collection: str) -> None:
logger.info("Loading documents from Google Drive.")
backend_get_credentials()
load_batch(
GoogleDriveConnector(batch_size=INDEX_BATCH_SIZE),
DefaultChunker(),
DefaultEmbedder(),
QdrantDatastore(collection=qdrant_collection),
)
def load_github_batch(owner: str, repo: str, qdrant_collection: str) -> None:
logger.info("Loading documents from Github.")
load_batch(
GithubConnector(repo_owner=owner, repo_name=repo, batch_size=INDEX_BATCH_SIZE),
DefaultChunker(),
DefaultEmbedder(),
QdrantDatastore(collection=qdrant_collection),
)
def load_confluence_batch(confluence_wiki_url: str, qdrant_collection: str) -> None:
logger.info("Loading documents from Confluence.")
load_batch(
ConfluenceConnector(confluence_wiki_url, batch_size=INDEX_BATCH_SIZE),
DefaultChunker(),
DefaultEmbedder(),
QdrantDatastore(collection=qdrant_collection),
)
class BatchLoadingArgs(argparse.Namespace):
website_url: str
github_owner: str
github_repo: str
slack_export_dir: str
confluence_link: str
qdrant_collection: str
rebuild_index: bool
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--website-url",
default="https://docs.github.com/en/actions",
)
parser.add_argument(
"--github-owner",
default="danswer-ai",
)
parser.add_argument(
"--github-repo",
default="danswer",
)
parser.add_argument(
"--slack-export-dir",
default="~/Downloads/test-slack-export",
)
parser.add_argument(
"--confluence_link",
default="https://danswer.atlassian.net/wiki/spaces/fakespace",
)
parser.add_argument(
"--qdrant-collection",
default=QDRANT_DEFAULT_COLLECTION,
)
parser.add_argument(
"--rebuild-index",
action="store_true",
help="Deletes and repopulates the semantic search index",
)
args = parser.parse_args(namespace=BatchLoadingArgs)
if args.rebuild_index:
recreate_collection(args.qdrant_collection)
# load_slack_batch(args.slack_export_dir, args.qdrant_collection)
# load_web_batch(args.website_url, args.qdrant_collection)
# load_google_drive_batch(args.qdrant_collection)
# load_github_batch(args.github_owner, args.github_repo, args.qdrant_collection)
load_confluence_batch(args.confluence_link, args.qdrant_collection)

5
deployment/.env Normal file
View File

@ -0,0 +1,5 @@
# For a local deployment, no additional setup is needed
# Refer to env.dev.template and env.prod.template for additional options
# Setting Auth to false for local setup convenience to avoid setting up Google OAuth app in GPC.
DISABLE_AUTH=True

View File

@ -1,33 +1,32 @@
This serves as an example for how to deploy everything on a single machine. This is
not optimal, but can get you started easily and cheaply. To run:
1. Set up a `.env` file in this directory with relevant environment variables.
- Use the `env.template` as a reference.
2. SKIP this step if running locally. If you are running this for production and need https do the following:
- Set up a `.env.nginx` file in this directory based on `env.nginx.template`.
- `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to set up https certificate.
3. Run one of the docker compose commands below depending on your environment:
1. Run one of the docker compose commands below depending on your environment:
- For Local:
- `docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build`
- This will start Web/API servers, Postgres (backend DB), Qdrant (vector DB), and the background indexing job.
- For Prod:
- `docker compose -f docker-compose.prod.yml -p danswer-stack up -d --build`
- This will additionally run certbot and start Nginx.
- Downloading packages/requirements may take 20+ minutes depending on your internet connection.
4. To shut down the deployment run:
2. To shut down the deployment run (use stop to stop containers, down to remove containers):
- For Local:
- `docker compose -f docker-compose.dev.yml -p danswer-stack stop`
- For Prod:
- `docker compose -f docker-compose.prod.yml -p danswer-stack stop`
5. To completely remove Danswer (**WARNING, this will also erase your indexed data and all users**) run:
3. To completely remove Danswer (**WARNING, this will also erase your indexed data and all users**) run:
- For Local:
- `docker compose -f docker-compose.dev.yml -p danswer-stack down`
- For Prod:
- `docker compose -f docker-compose.prod.yml -p danswer-stack down`
- `docker compose -f docker-compose.dev.yml -p danswer-stack down -v`
Additional steps for setting up for Prod:
1. Set up a `.env` file in this directory with relevant environment variables.
- Refer to env.dev.template and env.prod.template
2. Set up https:
- Set up a `.env.nginx` file in this directory based on `env.nginx.template`.
- `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to set up https certificate.
3. Follow the above steps but replacing dev with prod.

View File

@ -0,0 +1,56 @@
upstream app_server {
# fail_timeout=0 means we always retry an upstream even if it failed
# to return a good HTTP response
# for UNIX domain socket setups
#server unix:/tmp/gunicorn.sock fail_timeout=0;
# for a TCP configuration
# TODO: use gunicorn to manage multiple processes
server api_server:8080 fail_timeout=0;
}
upstream web_server {
server web_server:3000 fail_timeout=0;
}
server {
listen 80;
server_name ${DOMAIN};
location ~ ^/api(.*)$ {
rewrite ^/api(/.*)$ $1 break;
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
# need to use 1.1 to support chunked transfers
proxy_http_version 1.1;
proxy_buffering off;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://app_server;
}
location / {
# misc headers
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
# we don't want nginx trying to do something clever with
# redirects, we set the Host: header above already.
proxy_redirect off;
proxy_pass http://web_server;
}
}

View File

@ -35,7 +35,7 @@ services:
web_server:
build:
context: ../web
dockerfile: Dockerfile.dev
dockerfile: Dockerfile
depends_on:
- api_server
restart: always
@ -43,12 +43,12 @@ services:
- .env
environment:
- INTERNAL_URL=http://api_server:8080
ports:
- "3000:3000"
relational_db:
image: postgres:15.2-alpine
restart: always
# POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
environment:
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
env_file:
- .env
ports:
@ -62,6 +62,22 @@ services:
- "6333:6333"
volumes:
- qdrant_volume:/qdrant/storage
nginx:
image: nginx:1.23.4-alpine
restart: always
depends_on:
- api_server
- web_server
environment:
- DOMAIN=localhost
ports:
- "80:80"
- "3000:80" # allow for localhost:3000 usage, since that is the norm
volumes:
- ./data/nginx:/etc/nginx/conf.d
command: >
/bin/sh -c "envsubst '$$\{DOMAIN\}' < /etc/nginx/conf.d/app.conf.template.dev > /etc/nginx/conf.d/app.conf
&& while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\""
volumes:
local_dynamic_storage:
db_volume:

View File

@ -33,7 +33,7 @@ services:
web_server:
build:
context: ../web
dockerfile: Dockerfile.prod
dockerfile: Dockerfile
depends_on:
- api_server
restart: always

View File

@ -1,20 +1,12 @@
# Fill in the values and copy the contents of this file to .env in the deployment directory
# Some valid default values are provided where applicable, delete the variables which you don't set values for
# Very basic .env file with options that are easy to change. Allows you to deploy everything on a single machine.
# We don't suggest using these settings for production.
# Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
OPENAI_API_KEY=
# Choose between "openai-chat-completion" and "openai-completion"
INTERNAL_MODEL_VERSION=openai-chat-completion
# Use a valid model for the choice above, consult https://platform.openai.com/docs/models/model-endpoint-compatibility
OPENAI_MODEL_VERSION=gpt-3.5-turbo
# Can leave these as defaults
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
OPENAPI_MODEL_VERSION=gpt-3.5-turbo
# Auth not necessary for local

View File

@ -2,8 +2,8 @@
# Some valid default values are provided where applicable, delete the variables which you don't set values for
# THE SECTION BELOW INCLUDE MUST HAVE CONFIGS
# Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
# If not provided here, UI will prompt on setup
OPENAI_API_KEY=
# Choose between "openai-chat-completion" and "openai-completion"
INTERNAL_MODEL_VERSION=openai-chat-completion
@ -14,13 +14,6 @@ OPENAI_MODEL_VERSION=gpt-4
WEB_DOMAIN=http://localhost:3000
# CONNECTOR CONFIGS (set for the ones you are using, delete the others)
GITHUB_ACCESS_TOKEN=
GOOGLE_DRIVE_CREDENTIAL_JSON=
GOOGLE_DRIVE_TOKENS_JSON=
# BACKEND DB can leave these as defaults
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
@ -28,7 +21,6 @@ POSTGRES_PASSWORD=password
# AUTH CONFIGS
DISABLE_AUTH=False
# Feel free remove everything after if DISABLE_AUTH=True
# Currently frontend page doesn't have basic auth, use OAuth if user auth is enabled.
ENABLE_OAUTH=True
@ -42,7 +34,7 @@ SECRET=
# How long before user needs to reauthenticate, default to 1 day. (cookie expiration time)
SESSION_EXPIRE_TIME_SECONDS=86400
# Only relevant if using basic auth
# Only relevant if using basic auth (not supported on frontend yet)
REQUIRE_EMAIL_VERIFICATION=True
# The five settings below are only required if REQUIRE_EMAIL_VERIFICATION is True
VALID_EMAIL_DOMAIN=

View File

@ -14,6 +14,8 @@ RUN \
elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i --frozen-lockfile; \
else echo "Lockfile not found." && exit 1; \
fi
# needed for image processing
RUN npm i sharp
# Step 2. Rebuild the source code only when needed

View File

@ -1,34 +0,0 @@
FROM node:18-alpine
WORKDIR /app
# Install dependencies based on the preferred package manager
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
elif [ -f package-lock.json ]; then npm ci; \
elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i; \
# Allow install without lockfile, so example works even without Node.js installed locally
else echo "Warning: Lockfile not found. It is recommended to commit lockfiles to version control." && yarn install; \
fi
COPY src ./src
COPY public ./public
COPY next.config.js .
COPY tsconfig.json .
COPY tailwind.config.js .
COPY postcss.config.js .
# Next.js collects completely anonymous telemetry data about general usage. Learn more here: https://nextjs.org/telemetry
# Uncomment the following line to disable telemetry at run time
ENV NEXT_TELEMETRY_DISABLED 1
# Note: Don't expose ports here, Compose will handle that for us
# Start Next.js in development mode based on the preferred package manager
CMD \
if [ -f yarn.lock ]; then yarn dev; \
elif [ -f package-lock.json ]; then npm run dev; \
elif [ -f pnpm-lock.yaml ]; then pnpm dev; \
else yarn dev; \
fi

View File

@ -4,6 +4,19 @@ const nextConfig = {
appDir: true,
},
output: "standalone",
rewrites: async () => {
// In production, something else (nginx in the one box setup) should take
// care of this rewrite. TODO (chris): better support setups where
// web_server and api_server are on different machines.
if (process.env.NODE_ENV === "production") return [];
return [
{
source: "/api/:path*",
destination: "http://127.0.0.1:8080/:path*", // Proxy to Backend
},
];
},
redirects: async () => {
// In production, something else (nginx in the one box setup) should take
// care of this redirect. TODO (chris): better support setups where
@ -12,8 +25,8 @@ const nextConfig = {
return [
{
source: "/api/:path*",
destination: "http://localhost:8080/:path*", // Proxy to Backend
source: "/api/stream-direct-qa:params*",
destination: "http://127.0.0.1:8080/stream-direct-qa:params*", // Proxy to Backend
permanent: true,
},
];

2775
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +1,147 @@
"use client";
import * as Yup from "yup";
import { IndexForm } from "@/components/admin/connectors/Form";
import { ConfluenceIcon } from "@/components/icons/icons";
import { ConfluenceIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
ConfluenceCredentialJson,
ConfluenceConfig,
Credential,
ConnectorIndexingStatus,
} from "@/lib/types";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
const Main = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
isValidating: isCredentialsValidating,
error: isCredentialsError,
} = useSWR<Credential<ConfluenceCredentialJson>[]>(
"/api/admin/credential",
fetcher
);
if (
isConnectorIndexingStatusesLoading ||
isCredentialsLoading ||
isCredentialsValidating
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const confluenceConnectorIndexingStatuses = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "confluence"
);
const confluenceCredential = credentialsData.filter(
(credential) => credential.credential_json?.confluence_access_token
)[0];
export default function Page() {
return (
<div className="mx-auto">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<ConfluenceIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Confluence</h1>
</div>
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your Credentials
</h2>
{confluenceCredential ? (
<>
<div className="flex mb-1 text-sm">
{/* <div className="flex">
<p className="my-auto">Existing Username: </p>
<p className="ml-1 italic my-auto max-w-md truncate">
{confluenceCredential.credential_json?.confluence_username}
</p>{" "}
</div> */}
<p className="my-auto">Existing Access Token: </p>
<p className="ml-1 italic my-auto max-w-md truncate">
{confluenceCredential.credential_json?.confluence_access_token}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
await deleteCredential(confluenceCredential.id);
mutate("/api/admin/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
To use the Confluence connector, you must first follow the guide
described{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/slack#setting-up"
>
here
</a>{" "}
to generate an Access Token.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
<CredentialForm<ConfluenceCredentialJson>
formBody={
<>
<TextFormField name="confluence_username" label="Username:" />
<TextFormField
name="confluence_access_token"
label="Access Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
confluence_username: Yup.string().required(
"Please enter your username on Confluence"
),
confluence_access_token: Yup.string().required(
"Please enter your Confluence access token"
),
})}
initialValues={{
confluence_username: "",
confluence_access_token: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/admin/credential");
}
}}
/>
</div>
</>
)}
{/* TODO: make this periodic */}
<h2 className="text-xl font-bold mb-2 mt-6 ml-auto mr-auto">
Request Indexing
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Which spaces do you want to make searchable?
</h2>
<p className="text-sm mb-4">
To use the Confluence connector, you must first follow the guide
@ -34,13 +156,62 @@ export default function Page() {
setup, specify any link to a Confluence page below and click
&quot;Index&quot; to Index. Based on the provided link, we will index
the ENTIRE SPACE, not just the specified page. For example, entering{" "}
<i>https://danswer.atlassian.net/wiki/spaces/SD/overview</i> and
clicking the Index button will index the whole <i>SD</i> Confluence
space.
<i>https://danswer.atlassian.net/wiki/spaces/Engineering/overview</i>{" "}
and clicking the Index button will index the whole <i>Engineering</i>{" "}
Confluence space.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6">
<IndexForm
{confluenceConnectorIndexingStatuses.length > 0 && (
<>
<p className="text-sm mb-2">
We pull the latest pages and comments from each space listed below
every <b>10</b> minutes.
</p>
<div className="mb-2">
<ConnectorsTable<ConfluenceConfig, ConfluenceCredentialJson>
connectorIndexingStatuses={confluenceConnectorIndexingStatuses}
liveCredential={confluenceCredential}
getCredential={(credential) => {
return (
<div>
<p>{credential.credential_json.confluence_access_token}</p>
</div>
);
}}
onCredentialLink={async (connectorId) => {
if (confluenceCredential) {
await linkCredential(connectorId, confluenceCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Url",
key: "url",
getValue: (connector) => (
<a
className="text-blue-500"
href={connector.connector_specific_config.wiki_page_url}
>
{connector.connector_specific_config.wiki_page_url}
</a>
),
},
]}
onUpdate={() => mutate("/api/admin/connector/indexing-status")}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Add a New Space</h2>
<ConnectorForm<ConfluenceConfig>
nameBuilder={(values) =>
`ConfluenceConnector-${values.wiki_page_url}`
}
source="confluence"
inputType="load_state"
formBody={
<>
<TextFormField name="wiki_page_url" label="Confluence URL:" />
@ -48,15 +219,36 @@ export default function Page() {
}
validationSchema={Yup.object().shape({
wiki_page_url: Yup.string().required(
"Please enter any link to your confluence e.g. https://danswer.atlassian.net/wiki/spaces/SD/overview"
"Please enter any link to your confluence e.g. https://danswer.atlassian.net/wiki/spaces/Engineering/overview"
),
})}
initialValues={{
wiki_page_url: "",
}}
onSubmit={(isSuccess) => console.log(isSuccess)}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(responseJson.id, confluenceCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
/>
</div>
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<ConfluenceIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Confluence</h1>
</div>
<Main />
</div>
);
}

View File

@ -1,14 +1,213 @@
"use client";
import * as Yup from "yup";
import { IndexForm } from "@/components/admin/connectors/Form";
import { GithubIcon } from "@/components/icons/icons";
import { GithubIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import {
GithubConfig,
GithubCredentialJson,
Credential,
ConnectorIndexingStatus,
} from "@/lib/types";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { LoadingAnimation } from "@/components/Loading";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
const Main = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
isValidating: isCredentialsValidating,
error: isCredentialsError,
} = useSWR<Credential<GithubCredentialJson>[]>(
"/api/admin/credential",
fetcher
);
if (
isConnectorIndexingStatusesLoading ||
isCredentialsLoading ||
isCredentialsValidating
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const githubConnectorIndexingStatuses: ConnectorIndexingStatus<GithubConfig>[] =
connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "github"
);
const githubCredential = credentialsData.filter(
(credential) => credential.credential_json?.github_access_token
)[0];
return (
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access token
</h2>
{githubCredential ? (
<>
{" "}
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing Access Token: </p>
<p className="ml-1 italic my-auto">
{githubCredential.credential_json.github_access_token}
</p>{" "}
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
await deleteCredential(githubCredential.id);
mutate("/api/admin/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
If you don&apos;t have an access token, read the guide{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/github"
>
here
</a>{" "}
on how to get one from Github.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
<CredentialForm<GithubCredentialJson>
formBody={
<>
<TextFormField
name="github_access_token"
label="Access Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
github_access_token: Yup.string().required(
"Please enter the access token for Github"
),
})}
initialValues={{
github_access_token: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/admin/credential");
}
}}
/>
</div>
</>
)}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Which repositories do you want to make searchable?
</h2>
{githubConnectorIndexingStatuses.length > 0 && (
<>
<p className="text-sm mb-2">
We pull the latest Pull Requests from each repository listed below
every <b>10</b> minutes.
</p>
<div className="mb-2">
<ConnectorsTable<GithubConfig, GithubCredentialJson>
connectorIndexingStatuses={githubConnectorIndexingStatuses}
liveCredential={githubCredential}
getCredential={(credential) =>
credential.credential_json.github_access_token
}
onCredentialLink={async (connectorId) => {
if (githubCredential) {
await linkCredential(connectorId, githubCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Repository",
key: "repository",
getValue: (connector) =>
`${connector.connector_specific_config.repo_owner}/${connector.connector_specific_config.repo_name}`,
},
]}
onUpdate={() => mutate("/api/admin/connector/indexing-status")}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Connect to a New Repository</h2>
<ConnectorForm<GithubConfig>
nameBuilder={(values) =>
`GithubConnector-${values.repo_owner}/${values.repo_name}`
}
source="github"
inputType="load_state"
formBody={
<>
<TextFormField name="repo_owner" label="Repository Owner:" />
<TextFormField name="repo_name" label="Repository Name:" />
</>
}
validationSchema={Yup.object().shape({
repo_owner: Yup.string().required(
"Please enter the owner of the repository to index e.g. danswer-ai"
),
repo_name: Yup.string().required(
"Please enter the name of the repository to index e.g. danswer "
),
})}
initialValues={{
repo_owner: "",
repo_name: "",
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(responseJson.id, githubCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
/>
</div>
</>
);
};
export default function Page() {
return (
<div className="mx-auto">
<div className="container mx-auto">
<div className="mb-4">
<HealthCheckBanner />
</div>
@ -16,35 +215,7 @@ export default function Page() {
<GithubIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Github PRs</h1>
</div>
{/* TODO: make this periodic */}
<h2 className="text-xl font-bold pl-2 mb-2 mt-6 ml-auto mr-auto">
Request Indexing
</h2>
<div className="border-solid border-gray-600 border rounded-md p-6">
<IndexForm
source="github"
formBody={
<>
<TextFormField name="repo_owner" label="Owner of repo:" />
<TextFormField name="repo_name" label="Name of repo:" />
</>
}
validationSchema={Yup.object().shape({
repo_owner: Yup.string().required(
"Please enter the owner of the repo scrape e.g. danswer-ai"
),
repo_name: Yup.string().required(
"Please enter the name of the repo scrape e.g. danswer "
),
})}
initialValues={{
repo_owner: "",
repo_name: "",
}}
onSubmit={(isSuccess) => console.log(isSuccess)}
/>
</div>
<Main />
</div>
);
}

View File

@ -1,16 +1,28 @@
import { getDomain } from "@/lib/redirectSS";
import { buildUrl } from "@/lib/utilsSS";
import { NextRequest, NextResponse } from "next/server";
import { cookies } from "next/headers";
export const GET = async (request: NextRequest) => {
// Wrapper around the FastAPI endpoint /connectors/google-drive/callback,
// which adds back a redirect to the Google Drive admin page.
const url = new URL(buildUrl("/admin/connectors/google-drive/callback"));
const url = new URL(buildUrl("/admin/connector/google-drive/callback"));
url.search = request.nextUrl.search;
const response = await fetch(url.toString());
const response = await fetch(url.toString(), {
headers: {
cookie: cookies()
.getAll()
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; "),
},
});
if (!response.ok) {
console.log(
"Error in Google Drive callback:",
(await response.json()).detail
);
return NextResponse.redirect(new URL("/auth/error", getDomain(request)));
}

View File

@ -1,40 +1,120 @@
"use client";
import * as Yup from "yup";
import {
IndexForm,
submitIndexRequest,
} from "@/components/admin/connectors/Form";
import {
ConnectorStatusEnum,
ConnectorStatus,
} from "@/components/admin/connectors/ConnectorStatus";
import { GoogleDriveIcon } from "@/components/icons/icons";
import useSWR from "swr";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { useRouter } from "next/navigation";
import { Popup } from "@/components/admin/connectors/Popup";
import { Popup, PopupSpec } from "@/components/admin/connectors/Popup";
import { useState } from "react";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { Button } from "@/components/Button";
import {
Connector,
ConnectorBase,
ConnectorIndexingStatus,
Credential,
GoogleDriveCredentialJson,
} from "@/lib/types";
import { deleteConnector } from "@/lib/connector";
import { StatusRow } from "@/components/admin/connectors/table/ConnectorsTable";
export default function Page() {
const AppCredentialUpload = ({
setPopup,
}: {
setPopup: (popupSpec: PopupSpec | null) => void;
}) => {
const [appCredentialJsonStr, setAppCredentialJsonStr] = useState<
string | undefined
>();
return (
<>
<input
className={
"mr-3 text-sm text-gray-900 border border-gray-300 rounded-lg " +
"cursor-pointer bg-gray-50 dark:text-gray-400 focus:outline-none " +
"dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400"
}
type="file"
accept=".json"
onChange={(event) => {
if (!event.target.files) {
return;
}
const file = event.target.files[0];
const reader = new FileReader();
reader.onload = function (loadEvent) {
if (!loadEvent?.target?.result) {
return;
}
const fileContents = loadEvent.target.result;
setAppCredentialJsonStr(fileContents as string);
};
reader.readAsText(file);
}}
/>
<Button
disabled={!appCredentialJsonStr}
onClick={async () => {
const response = await fetch(
"/api/admin/connector/google-drive/app-credential",
{
method: "PUT",
headers: {
"Content-Type": "application/json",
},
body: appCredentialJsonStr,
}
);
if (response.ok) {
setPopup({
message: "Successfully uploaded app credentials",
type: "success",
});
} else {
setPopup({
message: `Failed to upload app credentials - ${response.status}`,
type: "error",
});
}
}}
>
Upload
</Button>
</>
);
};
const Main = () => {
const router = useRouter();
const { mutate } = useSWRConfig();
const {
data: isAuthenticatedData,
isLoading: isAuthenticatedLoading,
error: isAuthenticatedError,
} = useSWR<{ authenticated: boolean }>(
"/api/admin/connectors/google-drive/check-auth",
data: appCredentialData,
isLoading: isAppCredentialLoading,
error: isAppCredentialError,
} = useSWR<{ client_id: string }>(
"/api/admin/connector/google-drive/app-credential",
fetcher
);
const {
data: authorizationUrlData,
isLoading: authorizationUrlLoading,
error: authorizationUrlError,
} = useSWR<{ auth_url: string }>(
"/api/admin/connectors/google-drive/authorize",
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
} = useSWR<Credential<GoogleDriveCredentialJson>[]>(
"/api/admin/credential",
fetcher
);
@ -42,9 +122,294 @@ export default function Page() {
message: string;
type: "success" | "error";
} | null>(null);
const setPopupWithExpiration = (popupSpec: PopupSpec | null) => {
setPopup(popupSpec);
setTimeout(() => {
setPopup(null);
}, 4000);
};
const header = (
<div>
if (
isCredentialsLoading ||
isAppCredentialLoading ||
isConnectorIndexingStatusesLoading
) {
return (
<div className="mx-auto">
<LoadingAnimation text="" />
</div>
);
}
if (isCredentialsError || !credentialsData) {
return (
<div className="mx-auto">
<div className="text-red-500">Failed to load credentials.</div>
</div>
);
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return (
<div className="mx-auto">
<div className="text-red-500">Failed to load connectors.</div>
</div>
);
}
if (isAppCredentialError) {
return (
<div className="mx-auto">
<div className="text-red-500">
Error loading Google Drive app credentials. Contact an administrator.
</div>
</div>
);
}
const googleDriveConnectorIndexingStatuses: ConnectorIndexingStatus<{}>[] =
connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "google_drive"
);
const googleDriveConnectorIndexingStatus =
googleDriveConnectorIndexingStatuses[0];
const googleDriveCredential = credentialsData.filter(
(credential) => credential.credential_json?.google_drive_tokens
)[0];
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your app Credentials
</h2>
<div className="mt-2">
{appCredentialData?.client_id ? (
<div className="text-sm">
<div>
Found existing app credentials with the following{" "}
<b>Client ID:</b>
<p className="italic mt-1">{appCredentialData.client_id}</p>
</div>
<div className="mt-4">
If you want to update these credentials, upload a new
credentials.json file below.
<div className="mt-2">
<AppCredentialUpload
setPopup={(popup) => {
mutate("/api/admin/connector/google-drive/app-credential");
setPopupWithExpiration(popup);
}}
/>
</div>
</div>
</div>
) : (
<>
<p className="text-sm">
Follow the guide{" "}
<a
className="text-blue-500"
target="_blank"
href="https://docs.danswer.dev/connectors/google_drive#authorization"
>
here
</a>{" "}
to setup your google app in your company workspace. Download the
credentials.json, and upload it here.
</p>
<AppCredentialUpload
setPopup={(popup) => {
mutate("/api/admin/connector/google-drive/app-credential");
setPopupWithExpiration(popup);
}}
/>
</>
)}
</div>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Authenticate with Danswer
</h2>
<div className="text-sm mb-4">
{googleDriveCredential ? (
<p>
<i>Existing credential already setup!</i> If you want to reset that
credential, click the button below to go through the OAuth flow
again.
</p>
) : (
<>
<p>
Next, you must provide credentials via OAuth. This gives us read
access to the docs you have access to in your google drive
account.
</p>
</>
)}
</div>
<Button
onClick={async () => {
const credentialCreationResponse = await fetch(
"/api/admin/credential",
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
public_doc: true,
credential_json: {},
}),
}
);
if (!credentialCreationResponse.ok) {
setPopupWithExpiration({
message: `Failed to create credential - ${credentialCreationResponse.status}`,
type: "error",
});
return;
}
const credential =
(await credentialCreationResponse.json()) as Credential<{}>;
const authorizationUrlResponse = await fetch(
`/api/admin/connector/google-drive/authorize/${credential.id}`
);
if (!authorizationUrlResponse.ok) {
setPopupWithExpiration({
message: `Failed to create credential - ${authorizationUrlResponse.status}`,
type: "error",
});
return;
}
const authorizationUrlJson =
(await authorizationUrlResponse.json()) as { auth_url: string };
router.push(authorizationUrlJson.auth_url);
}}
>
Authenticate with Google Drive
</Button>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 3: Start Indexing!
</h2>
{googleDriveConnectorIndexingStatus ? (
<div>
<div className="text-sm mb-2">
<div className="flex mb-1">
The Google Drive connector is setup!{" "}
<b className="mx-2">Status:</b>{" "}
<StatusRow
connectorIndexingStatus={googleDriveConnectorIndexingStatus}
hasCredentialsIssue={
googleDriveConnectorIndexingStatus.connector.credential_ids
.length === 0
}
setPopup={setPopupWithExpiration}
onUpdate={() => {
mutate("/api/admin/connector/indexing-status");
}}
/>
</div>
<p>
Checkout the{" "}
<a href="/admin/indexing/status" className="text-blue-500">
status page
</a>{" "}
for the latest indexing status. We fetch the latest documents from
Google Drive every <b>10</b> minutes.
</p>
</div>
<Button
onClick={() => {
deleteConnector(
googleDriveConnectorIndexingStatus.connector.id
).then(() => {
setPopupWithExpiration({
message: "Successfully deleted connector!",
type: "success",
});
mutate("/api/admin/connector/indexing-status");
});
}}
>
Delete Connector
</Button>
</div>
) : (
<>
<p className="text-sm mb-2">
Click the button below to create a connector. We will refresh the
latest documents from Google Drive every <b>10</b> minutes.
</p>
<Button
onClick={async () => {
const connectorBase: ConnectorBase<{}> = {
name: "GoogleDriveConnector",
input_type: "load_state",
source: "google_drive",
connector_specific_config: {},
refresh_freq: 60 * 10, // 10 minutes
disabled: false,
};
const connectorCreationResponse = await fetch(
`/api/admin/connector`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(connectorBase),
}
);
if (!connectorCreationResponse.ok) {
setPopupWithExpiration({
message: `Failed to create connector - ${connectorCreationResponse.status}`,
type: "error",
});
return;
}
const connector =
(await connectorCreationResponse.json()) as Connector<{}>;
const credentialLinkResponse = await fetch(
`/api/admin/connector/${connector.id}/credential/${googleDriveCredential.id}`,
{
method: "PUT",
headers: {
"Content-Type": "application/json",
},
}
);
if (!credentialLinkResponse.ok) {
setPopupWithExpiration({
message: `Failed to link connector to credential - ${credentialLinkResponse.status}`,
type: "error",
});
return;
}
setPopupWithExpiration({
message: "Successfully created connector!",
type: "success",
});
mutate("/api/admin/connector/indexing-status");
}}
>
Add
</Button>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
@ -52,138 +417,8 @@ export default function Page() {
<GoogleDriveIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Google Drive</h1>
</div>
</div>
);
let body = null;
if (isAuthenticatedLoading || authorizationUrlLoading) {
return (
<div className="mx-auto">
{header}
<LoadingAnimation text="" />
</div>
);
}
if (
isAuthenticatedError ||
isAuthenticatedData?.authenticated === undefined
) {
return (
<div className="mx-auto">
{header}
<div className="text-red-500">
Error loading Google Drive authentication status. Contact an
administrator.
</div>
</div>
);
}
if (authorizationUrlError || authorizationUrlData?.auth_url === undefined) {
return (
<div className="mx-auto">
{header}
<div className="text-red-500">
Error loading Google Drive authentication URL. Contact an
administrator.
</div>
</div>
);
}
if (isAuthenticatedData.authenticated) {
return (
<div>
{header}
{popup && <Popup message={popup.message} type={popup.type} />}
{/* TODO: add periodic support */}
<h2 className="text-xl font-bold mb-2 ml-auto mr-auto">
Request Indexing
</h2>
<p className="text-sm mb-2">
Index the all docs in the setup Google Drive account.
</p>
<div className="mt-2 mb-4">
<button
type="submit"
className={
"bg-slate-500 hover:bg-slate-700 text-white " +
"font-bold py-2 px-4 rounded focus:outline-none " +
"focus:shadow-outline w-full max-w-sm mx-auto"
}
onClick={async () => {
const { message, isSuccess } = await submitIndexRequest(
"google_drive",
{}
);
if (isSuccess) {
setPopup({
message,
type: isSuccess ? "success" : "error",
});
setTimeout(() => {
setPopup(null);
}, 3000);
router.push("/admin/indexing/status");
}
}}
>
Index
</button>
</div>
{/* TODO: add ability to add more accounts / switch account */}
<div className="mb-2">
<h2 className="text-xl font-bold mb-2 ml-auto mr-auto">
Re-Authenticate
</h2>
<p className="text-sm mb-4">
If you want to switch Google Drive accounts, you can re-authenticate
below.
</p>
<a
className={
"group relative w-64 " +
"py-2 px-4 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-600 " +
"hover:bg-red-700 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
href={authorizationUrlData.auth_url}
>
Authenticate with Google Drive
</a>
</div>
</div>
);
}
return (
<div className="mx-auto">
{header}
<div className="flex">
<div className="max-w-2xl mx-auto border p-3 border-gray-700 rounded-md">
<h2 className="text-xl font-bold mb-2 mt-6 ml-auto mr-auto">Setup</h2>
<p className="text-sm mb-4">
To use the Google Drive connector, you must first provide
credentials via OAuth. This gives us read access to the docs in your
google drive account.
</p>
<a
className={
"group relative w-64 flex justify-center " +
"py-2 px-4 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-600 " +
"hover:bg-red-700 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
href={authorizationUrlData.auth_url}
>
Authenticate with Google Drive
</a>
</div>
</div>
<Main />
</div>
);
}

View File

@ -1,101 +0,0 @@
import React, { useState } from "react";
import { Formik, Form, FormikHelpers } from "formik";
import * as Yup from "yup";
import { Popup } from "../../../../components/admin/connectors/Popup";
import { TextFormField } from "../../../../components/admin/connectors/Field";
import { SlackConfig } from "../../../../components/admin/connectors/types";
const validationSchema = Yup.object().shape({
slack_bot_token: Yup.string().required("Please enter your Slack Bot Token"),
workspace_id: Yup.string().required("Please enter your Workspace ID"),
pull_frequency: Yup.number().optional(),
});
const handleSubmit = async (
values: SlackConfig,
{ setSubmitting }: FormikHelpers<SlackConfig>,
setPopup: (
popup: { message: string; type: "success" | "error" } | null
) => void
) => {
let isSuccess = false;
setSubmitting(true);
try {
const response = await fetch("/api/admin/connectors/slack/config", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(values),
});
if (response.ok) {
isSuccess = true;
setPopup({ message: "Success!", type: "success" });
} else {
const errorData = await response.json();
setPopup({ message: `Error: ${errorData.detail}`, type: "error" });
}
} catch (error) {
setPopup({ message: `Error: ${error}`, type: "error" });
} finally {
setSubmitting(false);
setTimeout(() => {
setPopup(null);
}, 3000);
}
return isSuccess;
};
interface Props {
existingSlackConfig: SlackConfig;
onSubmit: (isSuccess: boolean) => void;
}
export const InitialSetupForm: React.FC<Props> = ({
existingSlackConfig,
onSubmit,
}) => {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<Formik
initialValues={existingSlackConfig}
validationSchema={validationSchema}
onSubmit={(values, formikHelpers) =>
handleSubmit(values, formikHelpers, setPopup).then((isSuccess) =>
onSubmit(isSuccess)
)
}
>
{({ isSubmitting }) => (
<Form>
<TextFormField name="slack_bot_token" label="Slack Bot Token:" />
<TextFormField name="workspace_id" label="Workspace ID:" />
<TextFormField
name="pull_frequency"
label="Pull Frequency (in minutes):"
/>
<div className="flex">
<button
type="submit"
disabled={isSubmitting}
className={
"mx-auto bg-slate-500 hover:bg-slate-700 text-white font-bold py-2 " +
"px-4 max-w-sm rounded focus:outline-none focus:shadow-outline w-full"
}
>
Update
</button>
</div>
</Form>
)}
</Formik>
</>
);
};

View File

@ -1,65 +1,208 @@
"use client";
import { SlackIcon } from "@/components/icons/icons";
import * as Yup from "yup";
import { SlackIcon, TrashIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import useSWR, { useSWRConfig } from "swr";
import { SlackConfig } from "../../../../components/admin/connectors/types";
import { LoadingAnimation } from "@/components/Loading";
import { InitialSetupForm } from "./InitialSetupForm";
import { useRouter } from "next/navigation";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import {
Connector,
SlackConfig,
Credential,
SlackCredentialJson,
ConnectorIndexingStatus,
} from "@/lib/types";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import { TextFormField } from "@/components/admin/connectors/Field";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
const MainSection = () => {
// TODO: add back in once this is ready
// const { data, isLoading, error } = useSWR<ListSlackIndexingResponse>(
// "/api/admin/connectors/web/index-attempt",
// fetcher
// );
const { mutate } = useSWRConfig();
const { data, isLoading, error } = useSWR<SlackConfig>(
"/api/admin/connectors/slack/config",
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher
);
if (isLoading) {
return (
<div className="mt-16">
<LoadingAnimation text="Loading" />
</div>
);
} else if (error || !data) {
return <div>{`Error loading Slack config - ${error}`}</div>;
const {
data: credentialsData,
isLoading: isCredentialsLoading,
isValidating: isCredentialsValidating,
error: isCredentialsError,
} = useSWR<Credential<SlackCredentialJson>[]>(
"/api/admin/credential",
fetcher
);
if (
isConnectorIndexingStatusesLoading ||
isCredentialsLoading ||
isCredentialsValidating
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const slackConnectorIndexingStatuses: ConnectorIndexingStatus<SlackConfig>[] =
connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "slack"
);
const slackCredential = credentialsData.filter(
(credential) => credential.credential_json?.slack_bot_token
)[0];
return (
<div className="mx-auto">
<h2 className="text-xl font-bold mb-3 ml-auto mr-auto">Config</h2>
<p className="text-sm mb-4">
To use the Slack connector, you must first provide a Slack bot token
corresponding to the Slack App set up in your workspace. For more
details on setting up the Danswer Slack App, see the{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/slack#setting-up"
>
docs
</a>
.
</p>
<div className="border border-gray-700 rounded-md p-3">
<InitialSetupForm
existingSlackConfig={data}
onSubmit={() => mutate("/api/admin/connectors/slack/config")}
<>
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide Credentials
</h2>
{slackCredential ? (
<>
<div className="flex mb-1 text-sm">
<p className="my-auto">Existing Slack Bot Token: </p>
<p className="ml-1 italic my-auto">
{slackCredential.credential_json.slack_bot_token}
</p>{" "}
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
await deleteCredential(slackCredential.id);
mutate("/api/admin/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm mb-4">
To use the Slack connector, you must first provide a Slack bot token
corresponding to the Slack App set up in your workspace. For more
details on setting up the Danswer Slack App, see the{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/slack#setting-up"
>
docs
</a>
.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
<CredentialForm<SlackCredentialJson>
formBody={
<>
<TextFormField
name="slack_bot_token"
label="Slack Bot Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
slack_bot_token: Yup.string().required(
"Please enter your Slack bot token"
),
})}
initialValues={{
slack_bot_token: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/admin/credential");
}
}}
/>
</div>
</>
)}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Which workspaces do you want to make searchable?
</h2>
{slackConnectorIndexingStatuses.length > 0 && (
<>
<p className="text-sm mb-2">
We pull the latest messages from each workspace listed below every{" "}
<b>10</b> minutes.
</p>
<div className="mb-2">
<ConnectorsTable
connectorIndexingStatuses={slackConnectorIndexingStatuses}
liveCredential={slackCredential}
getCredential={(credential) =>
credential.credential_json.slack_bot_token
}
specialColumns={[
{
header: "Workspace",
key: "workspace",
getValue: (connector) =>
connector.connector_specific_config.workspace,
},
]}
onUpdate={() => mutate("/api/admin/connector/indexing-status")}
onCredentialLink={async (connectorId) => {
if (slackCredential) {
await linkCredential(connectorId, slackCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Connect to a New Workspace</h2>
<ConnectorForm<SlackConfig>
nameBuilder={(values) => `SlackConnector-${values.workspace}`}
source="slack"
inputType="poll"
formBody={
<>
<TextFormField name="workspace" label="Workspace:" />
</>
}
validationSchema={Yup.object().shape({
workspace: Yup.string().required(
"Please enter the workspace to index"
),
})}
initialValues={{
workspace: "",
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(responseJson.id, slackCredential.id);
mutate("/api/admin/connector/indexing-status");
}
}}
/>
</div>
</div>
</>
);
};
export default function Page() {
return (
<div className="mx-auto">
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>

View File

@ -1,60 +1,38 @@
"use client";
import useSWR from "swr";
import useSWR, { useSWRConfig } from "swr";
import * as Yup from "yup";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { LoadingAnimation } from "@/components/Loading";
import { timeAgo } from "@/lib/time";
import { GlobeIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import {
IndexAttempt,
ListIndexingResponse,
} from "../../../../components/admin/connectors/types";
import { IndexForm } from "@/components/admin/connectors/Form";
import { TextFormField } from "@/components/admin/connectors/Field";
import { useRouter } from "next/navigation";
import { HealthCheckBanner } from "@/components/health/healthcheck";
const COLUMNS = [
{ header: "Base URL", key: "url" },
{ header: "Last Indexed", key: "indexed_at" },
{ header: "Docs Indexed", key: "docs_indexed" },
{ header: "Status", key: "status" },
];
import { ConnectorIndexingStatus, WebConfig } from "@/lib/types";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { linkCredential } from "@/lib/credential";
export default function Web() {
const router = useRouter();
const { mutate } = useSWRConfig();
const { data, isLoading, error } = useSWR<ListIndexingResponse>(
"/api/admin/connectors/web/index-attempt",
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher
);
const urlToLatestIndexAttempt = new Map<string, IndexAttempt>();
const urlToLatestIndexSuccess = new Map<string, string>();
data?.index_attempts?.forEach((indexAttempt) => {
const url = indexAttempt.connector_specific_config.base_url;
const latestIndexAttempt = urlToLatestIndexAttempt.get(url);
if (
!latestIndexAttempt ||
indexAttempt.time_created > latestIndexAttempt.time_created
) {
urlToLatestIndexAttempt.set(url, indexAttempt);
}
const latestIndexSuccess = urlToLatestIndexSuccess.get(url);
if (
indexAttempt.status === "success" &&
(!latestIndexSuccess || indexAttempt.time_updated > latestIndexSuccess)
) {
urlToLatestIndexSuccess.set(url, indexAttempt.time_updated);
}
});
const webIndexingStatuses: ConnectorIndexingStatus<WebConfig>[] =
connectorIndexingStatuses?.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "web"
) ?? [];
return (
<div className="mx-auto">
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
@ -62,59 +40,69 @@ export default function Web() {
<GlobeIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Web</h1>
</div>
<h2 className="text-xl font-bold pl-2 mb-2 mt-6 ml-auto mr-auto">
Request Indexing
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Specify which websites to index
</h2>
<p className="text-sm mb-2">
We re-fetch the latest state of the website once a day.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6">
<IndexForm
<ConnectorForm<WebConfig>
nameBuilder={(values) => `WebConnector-${values.base_url}`}
source="web"
formBody={<TextFormField name="base_url" label="URL to Index:" />}
inputType="load_state"
formBody={
<>
<TextFormField name="base_url" label="URL to Index:" />
</>
}
validationSchema={Yup.object().shape({
base_url: Yup.string().required(
"Please enter the website URL to scrape e.g. https://docs.github.com/en/actions"
"Please enter the website URL to scrape e.g. https://docs.danswer.dev/"
),
})}
initialValues={{ base_url: "" }}
onSubmit={(success) => {
if (success) {
router.push("/admin/indexing/status");
initialValues={{
base_url: "",
}}
refreshFreq={60 * 60 * 24} // 1 day
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
// assumes there is a dummy credential with id 0
await linkCredential(responseJson.id, 0);
mutate("/api/admin/connector/indexing-status");
}
}}
/>
</div>
<h2 className="text-xl font-bold pl-2 mb-2 mt-6 ml-auto mr-auto">
Indexing History
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Already Indexed Websites
</h2>
{isLoading ? (
{isConnectorIndexingStatusesLoading ? (
<LoadingAnimation text="Loading" />
) : error ? (
) : isConnectorIndexingStatusesError || !connectorIndexingStatuses ? (
<div>Error loading indexing history</div>
) : (
<BasicTable
columns={COLUMNS}
data={
urlToLatestIndexAttempt.size > 0
? Array.from(urlToLatestIndexAttempt.values()).map(
(indexAttempt) => {
const url = indexAttempt.connector_specific_config
.base_url as string;
return {
indexed_at:
timeAgo(urlToLatestIndexSuccess.get(url)) || "-",
docs_indexed: indexAttempt.docs_indexed || "-",
url: (
<a className="text-blue-500" target="_blank" href={url}>
{url}
</a>
),
status: indexAttempt.status,
};
}
)
: []
}
) : webIndexingStatuses.length > 0 ? (
<ConnectorsTable<WebConfig, {}>
connectorIndexingStatuses={webIndexingStatuses}
specialColumns={[
{
header: "Base URL",
key: "base_url",
getValue: (connector) => (
<a
className="text-blue-500"
href={connector.connector_specific_config.base_url}
>
{connector.connector_specific_config.base_url}
</a>
),
},
]}
onUpdate={() => mutate("/api/admin/connector/indexing-status")}
/>
) : (
<p className="text-sm">No indexed websites found</p>
)}
</div>
);

View File

@ -1,49 +1,55 @@
"use client";
import useSWR, { useSWRConfig } from "swr";
import useSWR from "swr";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { LoadingAnimation } from "@/components/Loading";
import { timeAgo } from "@/lib/time";
import { NotebookIcon } from "@/components/icons/icons";
import { NotebookIcon, XSquareIcon } from "@/components/icons/icons";
import { fetcher } from "@/lib/fetcher";
import {
IndexAttempt,
ListIndexingResponse,
} from "@/components/admin/connectors/types";
import { getSourceMetadata } from "@/components/source";
import { CheckCircle, XCircle } from "@phosphor-icons/react";
import { submitIndexRequest } from "@/components/admin/connectors/Form";
import { useState } from "react";
import { Popup } from "@/components/admin/connectors/Popup";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { Connector, ConnectorIndexingStatus } from "@/lib/types";
const getModifiedSource = (indexAttempt: IndexAttempt) => {
return indexAttempt.source === "web"
? indexAttempt.source + indexAttempt.connector_specific_config?.base_url
: indexAttempt.source;
};
const getSourceDisplay = (connector: Connector<any>) => {
const sourceMetadata = getSourceMetadata(connector.source);
if (connector.source === "web") {
return (
sourceMetadata.displayName +
(connector.connector_specific_config?.base_url &&
` [${connector.connector_specific_config?.base_url}]`)
);
}
const getLatestIndexAttemptsBySource = (indexAttempts: IndexAttempt[]) => {
const latestIndexAttemptsBySource = new Map<string, IndexAttempt>();
indexAttempts.forEach((indexAttempt) => {
const source = getModifiedSource(indexAttempt);
const existingIndexAttempt = latestIndexAttemptsBySource.get(source);
if (
!existingIndexAttempt ||
indexAttempt.time_updated > existingIndexAttempt.time_updated
) {
latestIndexAttemptsBySource.set(source, indexAttempt);
}
});
return latestIndexAttemptsBySource;
if (connector.source === "github") {
return (
sourceMetadata.displayName +
` [${connector.connector_specific_config?.repo_owner}/${connector.connector_specific_config?.repo_name}]`
);
}
if (connector.source === "confluence") {
return (
sourceMetadata.displayName +
` [${connector.connector_specific_config?.wiki_page_url}]`
);
}
return sourceMetadata.displayName;
};
export default function Status() {
const { mutate } = useSWRConfig();
const { data, isLoading, error } = useSWR<ListIndexingResponse>(
"/api/admin/connectors/index-attempt",
fetcher
const {
data: indexAttemptData,
isLoading: indexAttemptIsLoading,
error: indexAttemptIsError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/admin/connector/indexing-status",
fetcher,
{ refreshInterval: 30000 } // 30 seconds
);
const [popup, setPopup] = useState<{
@ -51,18 +57,8 @@ export default function Status() {
type: "success" | "error";
} | null>(null);
// TODO: don't retrieve all index attempts, just the latest ones for each source
const latestIndexAttemptsBySource = getLatestIndexAttemptsBySource(
data?.index_attempts || []
);
const latestSuccessfulIndexAttemptsBySource = getLatestIndexAttemptsBySource(
data?.index_attempts?.filter(
(indexAttempt) => indexAttempt.status === "success"
) || []
);
return (
<div className="mx-auto">
<div className="mx-auto container">
{popup && <Popup message={popup.message} type={popup.type} />}
<div className="mb-4">
<HealthCheckBanner />
@ -72,9 +68,9 @@ export default function Status() {
<h1 className="text-3xl font-bold pl-2">Indexing Status</h1>
</div>
{isLoading ? (
{indexAttemptIsLoading ? (
<LoadingAnimation text="Loading" />
) : error ? (
) : indexAttemptIsError || !indexAttemptData ? (
<div>Error loading indexing history</div>
) : (
<BasicTable
@ -83,85 +79,85 @@ export default function Status() {
{ header: "Status", key: "status" },
{ header: "Last Indexed", key: "indexed_at" },
{ header: "Docs Indexed", key: "docs_indexed" },
{ header: "Re-Index", key: "reindex" },
// { header: "Re-Index", key: "reindex" },
]}
data={Array.from(latestIndexAttemptsBySource.values()).map(
(indexAttempt) => {
const sourceMetadata = getSourceMetadata(indexAttempt.source);
const successfulIndexAttempt =
latestSuccessfulIndexAttemptsBySource.get(
getModifiedSource(indexAttempt)
);
let statusDisplay = (
<div className="text-gray-400">In Progress...</div>
data={indexAttemptData.map((connectorIndexingStatus) => {
const sourceMetadata = getSourceMetadata(
connectorIndexingStatus.connector.source
);
let statusDisplay = (
<div className="text-gray-400">In Progress...</div>
);
if (connectorIndexingStatus.connector.disabled) {
statusDisplay = (
<div className="text-red-600 flex">
<XSquareIcon className="my-auto mr-1" size="18" />
Disabled
</div>
);
} else if (connectorIndexingStatus.last_status === "success") {
statusDisplay = (
<div className="text-green-600 flex">
<CheckCircle className="my-auto mr-1" size="18" />
Enabled
</div>
);
} else if (connectorIndexingStatus.last_status === "failed") {
statusDisplay = (
<div className="text-red-600 flex">
<XCircle className="my-auto mr-1" size="18" />
Error
</div>
);
if (indexAttempt.status === "success") {
statusDisplay = (
<div className="text-green-600 flex">
<CheckCircle className="my-auto mr-1" size="18" />
Success
</div>
);
} else if (indexAttempt.status === "failed") {
statusDisplay = (
<div className="text-red-600 flex">
<XCircle className="my-auto mr-1" size="18" />
Error
</div>
);
}
return {
indexed_at:
timeAgo(successfulIndexAttempt?.time_updated) || "-",
docs_indexed: successfulIndexAttempt?.docs_indexed
? `${successfulIndexAttempt?.docs_indexed} documents`
: "-",
connector: (
<a
className="text-blue-500 flex"
href={sourceMetadata.adminPageLink}
>
{sourceMetadata.icon({ size: "20" })}
<div className="ml-1">
{sourceMetadata.displayName}
{indexAttempt.source === "web" &&
indexAttempt.connector_specific_config?.base_url &&
` [${indexAttempt.connector_specific_config?.base_url}]`}
</div>
</a>
),
status: statusDisplay,
reindex: (
<button
className={
"group relative " +
"py-1 px-2 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-800 " +
"hover:bg-red-900 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
onClick={async () => {
const { message, isSuccess } = await submitIndexRequest(
indexAttempt.source,
indexAttempt.connector_specific_config
);
setPopup({
message,
type: isSuccess ? "success" : "error",
});
setTimeout(() => {
setPopup(null);
}, 3000);
mutate("/api/admin/connectors/index-attempt");
}}
>
Index
</button>
),
};
}
)}
return {
indexed_at: timeAgo(connectorIndexingStatus?.last_success) || "-",
docs_indexed: connectorIndexingStatus?.docs_indexed
? `${connectorIndexingStatus?.docs_indexed} documents`
: "-",
connector: (
<a
className="text-blue-500 flex"
href={sourceMetadata.adminPageLink}
>
{sourceMetadata.icon({ size: "20" })}
<div className="ml-1">
{getSourceDisplay(connectorIndexingStatus.connector)}
</div>
</a>
),
status: statusDisplay,
// TODO: add the below back in after this is supported in the backend
// reindex: (
// <button
// className={
// "group relative " +
// "py-1 px-2 border border-transparent text-sm " +
// "font-medium rounded-md text-white bg-red-800 " +
// "hover:bg-red-900 focus:outline-none focus:ring-2 " +
// "focus:ring-offset-2 focus:ring-red-500 mx-auto"
// }
// onClick={async () => {
// const { message, isSuccess } = await submitIndexRequest(
// connectorIndexingStatus.connector.source,
// connectorIndexingStatus.connector
// .connector_specific_config
// );
// setPopup({
// message,
// type: isSuccess ? "success" : "error",
// });
// setTimeout(() => {
// setPopup(null);
// }, 4000);
// mutate("/api/admin/connector/index-attempt");
// }}
// >
// Index
// </button>
// ),
};
})}
/>
)}
</div>

View File

@ -29,7 +29,7 @@ const ExistingKeys = () => {
<div>
<h2 className="text-lg font-bold mb-2">Existing Key</h2>
<div className="flex mb-1">
<p className="text-sm italic my-auto">sk- ...{data?.api_key}</p>
<p className="text-sm italic my-auto">sk- ****...**{data?.api_key}</p>
<button
className="ml-1 my-auto hover:bg-gray-700 rounded-full p-1"
onClick={async () => {

View File

@ -0,0 +1,23 @@
interface Props {
onClick: () => void;
children: JSX.Element | string;
disabled?: boolean;
}
export const Button = ({ onClick, children, disabled = false }: Props) => {
return (
<button
className={
"group relative " +
"py-1 px-2 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-800 " +
"hover:bg-red-900 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
onClick={onClick}
disabled={disabled}
>
{children}
</button>
);
};

View File

@ -0,0 +1,3 @@
export const AttachCredentialPopup = () => {
return <div></div>;
};

View File

@ -16,7 +16,7 @@ interface BasicTableProps {
export const BasicTable: FC<BasicTableProps> = ({ columns, data }) => {
return (
<div className="overflow-x-auto">
<div>
<table className="w-full table-auto">
<thead>
<tr className="text-left bg-gray-700">

View File

@ -0,0 +1,114 @@
import React, { useState } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { Popup } from "./Popup";
import {
Connector,
ConnectorBase,
ValidInputTypes,
ValidSources,
} from "@/lib/types";
export async function submitConnector<T>(
connector: ConnectorBase<T>
): Promise<{ message: string; isSuccess: boolean; response?: Connector<T> }> {
let isSuccess = false;
try {
const response = await fetch(`/api/admin/connector`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(connector),
});
if (response.ok) {
isSuccess = true;
const responseJson = await response.json();
return { message: "Success!", isSuccess: true, response: responseJson };
} else {
const errorData = await response.json();
return { message: `Error: ${errorData.detail}`, isSuccess: false };
}
} catch (error) {
return { message: `Error: ${error}`, isSuccess: false };
}
}
interface Props<T extends Yup.AnyObject> {
nameBuilder: (values: T) => string;
source: ValidSources;
inputType: ValidInputTypes;
credentialId?: number;
formBody: JSX.Element | null;
validationSchema: Yup.ObjectSchema<T>;
initialValues: T;
onSubmit: (isSuccess: boolean, responseJson?: Connector<T>) => void;
refreshFreq?: number;
}
export function ConnectorForm<T extends Yup.AnyObject>({
nameBuilder,
source,
inputType,
formBody,
validationSchema,
initialValues,
refreshFreq,
onSubmit,
}: Props<T>): JSX.Element {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<Formik
initialValues={initialValues}
validationSchema={validationSchema}
onSubmit={(values, formikHelpers) => {
formikHelpers.setSubmitting(true);
submitConnector<T>({
name: nameBuilder(values),
source,
input_type: inputType,
connector_specific_config: values,
refresh_freq: refreshFreq || 0,
disabled: false,
}).then(({ message, isSuccess, response }) => {
setPopup({ message, type: isSuccess ? "success" : "error" });
formikHelpers.setSubmitting(false);
if (isSuccess) {
formikHelpers.resetForm();
}
setTimeout(() => {
setPopup(null);
}, 4000);
onSubmit(isSuccess, response);
});
}}
>
{({ isSubmitting }) => (
<Form>
{formBody}
<div className="flex">
<button
type="submit"
disabled={isSubmitting}
className={
"bg-slate-500 hover:bg-slate-700 text-white " +
"font-bold py-2 px-4 rounded focus:outline-none " +
"focus:shadow-outline w-full max-w-sm mx-auto"
}
>
Connect
</button>
</div>
</Form>
)}
</Formik>
</>
);
}

View File

@ -1,65 +0,0 @@
"use client";
import {
IndexAttempt,
ListIndexingResponse,
} from "@/components/admin/connectors/types";
import { fetcher } from "@/lib/fetcher";
import { timeAgo } from "@/lib/time";
import { ValidSources } from "@/lib/types";
import { CheckCircle, MinusCircle } from "@phosphor-icons/react";
import useSWR from "swr";
export enum ConnectorStatusEnum {
Setup = "Setup",
Running = "Running",
NotSetup = "Not Setup",
}
const sortIndexAttemptsByTimeUpdated = (a: IndexAttempt, b: IndexAttempt) => {
if (a.time_updated === b.time_updated) {
return 0;
}
return a.time_updated > b.time_updated ? -1 : 1;
};
interface ConnectorStatusProps {
status: ConnectorStatusEnum;
source: ValidSources;
}
export const ConnectorStatus = ({ status, source }: ConnectorStatusProps) => {
const { data } = useSWR<ListIndexingResponse>(
`/api/admin/connectors/${source}/index-attempt`,
fetcher
);
const lastSuccessfulAttempt = data?.index_attempts
.filter((attempt) => attempt.status === "success")
.sort(sortIndexAttemptsByTimeUpdated)[0];
if (
status === ConnectorStatusEnum.Running ||
status == ConnectorStatusEnum.Setup
) {
return (
<div>
<div className="text-emerald-600 flex align-middle text-center">
<CheckCircle size={20} className="my-auto" />
<p className="my-auto ml-1">{status}</p>
</div>
{lastSuccessfulAttempt && (
<p className="text-xs my-auto ml-1">
Last indexed {timeAgo(lastSuccessfulAttempt.time_updated)}
</p>
)}
</div>
);
}
return (
<div className="text-gray-400 flex align-middle text-center">
<MinusCircle size={20} className="my-auto" />
<p className="my-auto ml-1">{status}</p>
</div>
);
};

View File

@ -0,0 +1,92 @@
import React, { useState } from "react";
import { Formik, Form } from "formik";
import * as Yup from "yup";
import { Popup } from "./Popup";
import { CredentialBase } from "@/lib/types";
export async function submitCredential<T>(
connector: CredentialBase<T>
): Promise<{ message: string; isSuccess: boolean }> {
let isSuccess = false;
try {
const response = await fetch(`/api/admin/credential`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(connector),
});
if (response.ok) {
isSuccess = true;
return { message: "Success!", isSuccess: true };
} else {
const errorData = await response.json();
return { message: `Error: ${errorData.detail}`, isSuccess: false };
}
} catch (error) {
return { message: `Error: ${error}`, isSuccess: false };
}
}
interface Props<YupObjectType extends Yup.AnyObject> {
formBody: JSX.Element | null;
validationSchema: Yup.ObjectSchema<YupObjectType>;
initialValues: YupObjectType;
onSubmit: (isSuccess: boolean) => void;
}
export function CredentialForm<T extends Yup.AnyObject>({
formBody,
validationSchema,
initialValues,
onSubmit,
}: Props<T>): JSX.Element {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<Formik
initialValues={initialValues}
validationSchema={validationSchema}
onSubmit={(values, formikHelpers) => {
formikHelpers.setSubmitting(true);
submitCredential<T>({
credential_json: values,
public_doc: true,
}).then(({ message, isSuccess }) => {
setPopup({ message, type: isSuccess ? "success" : "error" });
formikHelpers.setSubmitting(false);
setTimeout(() => {
setPopup(null);
}, 4000);
onSubmit(isSuccess);
});
}}
>
{({ isSubmitting }) => (
<Form>
{formBody}
<div className="flex">
<button
type="submit"
disabled={isSubmitting}
className={
"bg-slate-500 hover:bg-slate-700 text-white " +
"font-bold py-2 px-4 rounded focus:outline-none " +
"focus:shadow-outline w-full max-w-sm mx-auto"
}
>
Update
</button>
</div>
</Form>
)}
</Formik>
</>
);
}

View File

@ -12,7 +12,7 @@ export const submitIndexRequest = async (
let isSuccess = false;
try {
const response = await fetch(
`/api/admin/connectors/${source}/index-attempt`,
`/api/admin/connector/${source}/index-attempt`,
{
method: "POST",
headers: {
@ -75,7 +75,7 @@ export function IndexForm<YupObjectType extends Yup.AnyObject>({
formikHelpers.setSubmitting(false);
setTimeout(() => {
setPopup(null);
}, 3000);
}, 4000);
onSubmit(isSuccess);
});
}}

View File

@ -1,9 +1,9 @@
interface PopupProps {
export interface PopupSpec {
message: string;
type: "success" | "error";
}
export const Popup: React.FC<PopupProps> = ({ message, type }) => (
export const Popup: React.FC<PopupSpec> = ({ message, type }) => (
<div
className={`fixed bottom-4 left-4 p-4 rounded-md shadow-lg text-white ${
type === "success" ? "bg-green-500" : "bg-red-500"

View File

@ -0,0 +1,20 @@
interface Props {
onClick: () => void;
}
export const AttachCredentialButtonForTable = ({ onClick }: Props) => {
return (
<button
className={
"group relative " +
"py-1 px-2 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-800 " +
"hover:bg-red-900 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
onClick={onClick}
>
Attach Credential
</button>
);
};

View File

@ -0,0 +1,20 @@
interface Props {
onClick: () => void;
}
export const IndexButtonForTable = ({ onClick }: Props) => {
return (
<button
className={
"group relative " +
"py-1 px-2 border border-transparent text-sm " +
"font-medium rounded-md text-white bg-red-800 " +
"hover:bg-red-900 focus:outline-none focus:ring-2 " +
"focus:ring-offset-2 focus:ring-red-500 mx-auto"
}
onClick={onClick}
>
Index
</button>
);
};

View File

@ -0,0 +1,223 @@
import { Connector, ConnectorIndexingStatus, Credential } from "@/lib/types";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { Popup, PopupSpec } from "@/components/admin/connectors/Popup";
import { useState } from "react";
import { LinkBreakIcon, LinkIcon, TrashIcon } from "@/components/icons/icons";
import { deleteConnector, updateConnector } from "@/lib/connector";
import { AttachCredentialButtonForTable } from "@/components/admin/connectors/buttons/AttachCredentialButtonForTable";
interface StatusRowProps<ConnectorConfigType> {
connectorIndexingStatus: ConnectorIndexingStatus<ConnectorConfigType>;
hasCredentialsIssue: boolean;
setPopup: (popupSpec: PopupSpec | null) => void;
onUpdate: () => void;
}
export function StatusRow<ConnectorConfigType>({
connectorIndexingStatus,
hasCredentialsIssue,
setPopup,
onUpdate,
}: StatusRowProps<ConnectorConfigType>) {
const [statusHovered, setStatusHovered] = useState<boolean>(false);
const connector = connectorIndexingStatus.connector;
let statusDisplay;
switch (connectorIndexingStatus.last_status) {
case "failed":
statusDisplay = <div className="text-red-700">Failed</div>;
break;
default:
statusDisplay = <div className="text-emerald-600 flex">Enabled!</div>;
}
if (connector.disabled) {
statusDisplay = <div className="text-red-700">Disabled</div>;
}
return (
<div className="flex">
{statusDisplay}
{!hasCredentialsIssue && (
<div
className="cursor-pointer ml-1 my-auto relative"
onMouseEnter={() => setStatusHovered(true)}
onMouseLeave={() => setStatusHovered(false)}
onClick={() => {
updateConnector({
...connector,
disabled: !connector.disabled,
}).then(() => {
setPopup({
message: connector.disabled
? "Enabled connector!"
: "Disabled connector!",
type: "success",
});
setTimeout(() => {
setPopup(null);
}, 4000);
onUpdate();
});
}}
>
{statusHovered && (
<div className="flex flex-nowrap absolute top-0 left-0 ml-8 bg-gray-700 px-3 py-2 rounded shadow-lg">
{connector.disabled ? "Enable!" : "Disable!"}
</div>
)}
{connector.disabled ? (
<LinkIcon className="my-auto flex flex-shrink-0 text-red-700" />
) : (
<LinkBreakIcon
className={`my-auto flex flex-shrink-0 ${
connectorIndexingStatus.last_status === "failed"
? "text-red-700"
: "text-emerald-600"
}`}
/>
)}
</div>
)}
</div>
);
}
interface ColumnSpecification<ConnectorConfigType> {
header: string;
key: string;
getValue: (connector: Connector<ConnectorConfigType>) => JSX.Element | string;
}
interface ConnectorsTableProps<ConnectorConfigType, ConnectorCredentialType> {
connectorIndexingStatuses: ConnectorIndexingStatus<ConnectorConfigType>[];
liveCredential?: Credential<ConnectorCredentialType> | null;
getCredential?: (
credential: Credential<ConnectorCredentialType>
) => JSX.Element | string;
onUpdate: () => void;
onCredentialLink?: (connectorId: number) => void;
specialColumns?: ColumnSpecification<ConnectorConfigType>[];
}
export function ConnectorsTable<ConnectorConfigType, ConnectorCredentialType>({
connectorIndexingStatuses,
liveCredential,
getCredential,
specialColumns,
onUpdate,
onCredentialLink,
}: ConnectorsTableProps<ConnectorConfigType, ConnectorCredentialType>) {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
const connectorIncludesCredential =
getCredential !== undefined && onCredentialLink !== undefined;
const columns = [
...(specialColumns ?? []),
{
header: "Status",
key: "status",
},
];
if (connectorIncludesCredential) {
columns.push({
header: "Credential",
key: "credential",
});
}
columns.push({
header: "Remove",
key: "remove",
});
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<BasicTable
columns={columns}
data={connectorIndexingStatuses.map((connectorIndexingStatus) => {
const connector = connectorIndexingStatus.connector;
const hasValidCredentials =
liveCredential &&
connector.credential_ids.includes(liveCredential.id);
const credential = connectorIncludesCredential
? {
credential: hasValidCredentials ? (
<p className="max-w-sm truncate">
{getCredential(liveCredential)}
</p>
) : liveCredential ? (
<AttachCredentialButtonForTable
onClick={() => onCredentialLink(connector.id)}
/>
) : (
<p className="text-red-700">N/A</p>
),
}
: { credential: "" };
return {
status: (
<StatusRow
connectorIndexingStatus={connectorIndexingStatus}
hasCredentialsIssue={
!hasValidCredentials && connectorIncludesCredential
}
setPopup={setPopup}
onUpdate={onUpdate}
/>
),
remove: (
<div
className="cursor-pointer mx-auto"
onClick={() => {
deleteConnector(connector.id).then(() => {
setPopup({
message: "Successfully deleted connector",
type: "success",
});
setTimeout(() => {
setPopup(null);
}, 4000);
onUpdate();
});
}}
>
<TrashIcon />
</div>
),
...credential,
...(specialColumns
? Object.fromEntries(
specialColumns.map(({ key, getValue }, i) => [
key,
getValue(connector),
])
)
: {}),
};
// index: (
// <IndexButtonForTable
// onClick={async () => {
// const { message, isSuccess } = await submitIndexRequest(
// connector.source,
// connector.connector_specific_config
// );
// setPopup({
// message,
// type: isSuccess ? "success" : "error",
// });
// setTimeout(() => {
// setPopup(null);
// }, 4000);
// mutate("/api/admin/connector/index-attempt");
// }}
// />
// ),
})}
/>
</>
);
}

View File

@ -1,20 +0,0 @@
import { ValidSources } from "@/lib/types";
export interface SlackConfig {
slack_bot_token: string;
workspace_id: string;
pull_frequency: number;
}
export interface IndexAttempt {
connector_specific_config: { [key: string]: any };
status: "success" | "failed" | "in_progress" | "not_started";
source: ValidSources;
time_created: string;
time_updated: string;
docs_indexed: number;
}
export interface ListIndexingResponse {
index_attempts: IndexAttempt[];
}

View File

@ -1,6 +1,14 @@
"use client";
import { Notebook, Key, Trash, Info } from "@phosphor-icons/react";
import {
Notebook,
Key,
Trash,
Info,
XSquare,
LinkBreak,
Link,
} from "@phosphor-icons/react";
import { SiConfluence, SiGithub, SiGoogledrive, SiSlack } from "react-icons/si";
import { FaGlobe } from "react-icons/fa";
@ -32,6 +40,27 @@ export const TrashIcon = ({
return <Trash size={size} className={className} />;
};
export const LinkBreakIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <LinkBreak size={size} className={className} />;
};
export const LinkIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <Link size={size} className={className} />;
};
export const XSquareIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <XSquare size={size} className={className} />;
};
export const GlobeIcon = ({
size = "16",
className = defaultTailwindCSS,

View File

@ -50,7 +50,7 @@ export const ApiKeyForm = ({ handleResponse }: Props) => {
}
setTimeout(() => {
setPopup(null);
}, 3000);
}, 4000);
}
}}
>

View File

@ -46,7 +46,7 @@ export const SearchResultsDisplay: React.FC<SearchResultsDisplayProps> = ({
if (answer === null && documents === null && quotes === null) {
return (
<div className="text-red-800">
<div className="text-red-500">
Something went wrong, please try again.
</div>
);
@ -104,9 +104,9 @@ export const SearchResultsDisplay: React.FC<SearchResultsDisplayProps> = ({
<div className="flex">
<InfoIcon
size="20"
className="text-red-800 my-auto flex flex-shrink-0"
className="text-red-500 my-auto flex flex-shrink-0"
/>
<div className="text-red-800 text-xs my-auto ml-1">
<div className="text-red-500 text-xs my-auto ml-1">
GPT hurt itself in its confusion :(
</div>
</div>

View File

@ -117,10 +117,14 @@ const searchRequestStreamed = async (
updateCurrentAnswer(answer);
}
} else {
const docs = chunk.top_documents as any[];
if (docs) {
relevantDocuments = docs.map((doc) => JSON.parse(doc) as Document);
updateDocs(relevantDocuments);
if (Object.hasOwn(chunk, "top_documents")) {
const docs = chunk.top_documents as any[] | null;
if (docs) {
relevantDocuments = docs.map(
(doc) => JSON.parse(doc) as Document
);
updateDocs(relevantDocuments);
}
} else {
quotes = chunk as Record<string, Quote>;
updateQuotes(quotes);

39
web/src/lib/connector.ts Normal file
View File

@ -0,0 +1,39 @@
import { Connector, ConnectorBase } from "./types";
export async function createConnector<T>(
connector: ConnectorBase<T>
): Promise<Connector<T>> {
const response = await fetch(`/api/admin/connector`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(connector),
});
return response.json();
}
export async function updateConnector<T>(
connector: Connector<T>
): Promise<Connector<T>> {
const response = await fetch(`/api/admin/connector/${connector.id}`, {
method: "PATCH",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(connector),
});
return response.json();
}
export async function deleteConnector<T>(
connectorId: number
): Promise<Connector<T>> {
const response = await fetch(`/api/admin/connector/${connectorId}`, {
method: "DELETE",
headers: {
"Content-Type": "application/json",
},
});
return response.json();
}

25
web/src/lib/credential.ts Normal file
View File

@ -0,0 +1,25 @@
export async function deleteCredential<T>(credentialId: number) {
const response = await fetch(`/api/admin/credential/${credentialId}`, {
method: "DELETE",
headers: {
"Content-Type": "application/json",
},
});
return response.json();
}
export async function linkCredential<T>(
connectorId: number,
credentialId: number
) {
const response = await fetch(
`/api/admin/connector/${connectorId}/credential/${credentialId}`,
{
method: "PUT",
headers: {
"Content-Type": "application/json",
},
}
);
return response.json();
}

View File

@ -1,4 +1,6 @@
export const timeAgo = (dateString: string | undefined): string | null => {
export const timeAgo = (
dateString: string | undefined | null
): string | null => {
if (!dateString) {
return null;
}

View File

@ -14,3 +14,74 @@ export type ValidSources =
| "google_drive"
| "confluence";
export type ValidInputTypes = "load_state" | "poll" | "event";
// CONNECTORS
export interface ConnectorBase<T> {
name: string;
input_type: ValidInputTypes;
source: ValidSources;
connector_specific_config: T;
refresh_freq: number;
disabled: boolean;
}
export interface Connector<T> extends ConnectorBase<T> {
id: number;
credential_ids: number[];
time_created: string;
time_updated: string;
}
export interface WebConfig {
base_url: string;
}
export interface GithubConfig {
repo_owner: string;
repo_name: string;
}
export interface ConfluenceConfig {
wiki_page_url: string;
}
export interface SlackConfig {
workspace: string;
}
export interface ConnectorIndexingStatus<T> {
connector: Connector<T>;
last_status: "success" | "failed" | "in_progress" | "not_started";
last_success: string | null;
docs_indexed: number;
}
// CREDENTIALS
export interface CredentialBase<T> {
credential_json: T;
public_doc: boolean;
}
export interface Credential<T> extends CredentialBase<T> {
id: number;
user_id: number | null;
time_created: string;
time_updated: string;
}
export interface GithubCredentialJson {
github_access_token: string;
}
export interface ConfluenceCredentialJson {
confluence_username: string;
confluence_access_token: string;
}
export interface SlackCredentialJson {
slack_bot_token: string;
}
export interface GoogleDriveCredentialJson {
google_drive_tokens: string;
}