mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-03 16:30:21 +02:00
1330 lines
48 KiB
Python
1330 lines
48 KiB
Python
import mimetypes
|
|
import os
|
|
import uuid
|
|
import zipfile
|
|
from io import BytesIO
|
|
from typing import cast
|
|
|
|
from fastapi import APIRouter
|
|
from fastapi import Depends
|
|
from fastapi import HTTPException
|
|
from fastapi import Query
|
|
from fastapi import Request
|
|
from fastapi import Response
|
|
from fastapi import UploadFile
|
|
from google.oauth2.credentials import Credentials # type: ignore
|
|
from pydantic import BaseModel
|
|
from sqlalchemy.orm import Session
|
|
|
|
from onyx.auth.users import current_admin_user
|
|
from onyx.auth.users import current_chat_accessible_user
|
|
from onyx.auth.users import current_curator_or_admin_user
|
|
from onyx.auth.users import current_user
|
|
from onyx.background.celery.versioned_apps.client import app as client_app
|
|
from onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES
|
|
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
|
|
from onyx.configs.constants import DocumentSource
|
|
from onyx.configs.constants import FileOrigin
|
|
from onyx.configs.constants import MilestoneRecordType
|
|
from onyx.configs.constants import OnyxCeleryPriority
|
|
from onyx.configs.constants import OnyxCeleryTask
|
|
from onyx.connectors.exceptions import ConnectorValidationError
|
|
from onyx.connectors.factory import validate_ccpair_for_user
|
|
from onyx.connectors.google_utils.google_auth import (
|
|
get_google_oauth_creds,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
build_service_account_creds,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
delete_google_app_cred,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
delete_service_account_key,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import get_auth_url
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
get_google_app_cred,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
get_service_account_key,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
update_credential_access_tokens,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
upsert_google_app_cred,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import (
|
|
upsert_service_account_key,
|
|
)
|
|
from onyx.connectors.google_utils.google_kv import verify_csrf
|
|
from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TOKEN_KEY
|
|
from onyx.connectors.google_utils.shared_constants import (
|
|
GoogleOAuthAuthenticationMethod,
|
|
)
|
|
from onyx.db.connector import create_connector
|
|
from onyx.db.connector import delete_connector
|
|
from onyx.db.connector import fetch_connector_by_id
|
|
from onyx.db.connector import fetch_connectors
|
|
from onyx.db.connector import get_connector_credential_ids
|
|
from onyx.db.connector import mark_ccpair_with_indexing_trigger
|
|
from onyx.db.connector import update_connector
|
|
from onyx.db.connector_credential_pair import add_credential_to_connector
|
|
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
|
|
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids_parallel
|
|
from onyx.db.connector_credential_pair import get_connector_credential_pair
|
|
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user
|
|
from onyx.db.connector_credential_pair import (
|
|
get_connector_credential_pairs_for_user_parallel,
|
|
)
|
|
from onyx.db.credentials import cleanup_gmail_credentials
|
|
from onyx.db.credentials import cleanup_google_drive_credentials
|
|
from onyx.db.credentials import create_credential
|
|
from onyx.db.credentials import delete_service_account_credentials
|
|
from onyx.db.credentials import fetch_credential_by_id_for_user
|
|
from onyx.db.deletion_attempt import check_deletion_attempt_is_allowed
|
|
from onyx.db.document import get_document_counts_for_cc_pairs_parallel
|
|
from onyx.db.engine import get_current_tenant_id
|
|
from onyx.db.engine import get_session
|
|
from onyx.db.enums import AccessType
|
|
from onyx.db.enums import IndexingMode
|
|
from onyx.db.index_attempt import get_index_attempts_for_cc_pair
|
|
from onyx.db.index_attempt import get_latest_index_attempts_by_status
|
|
from onyx.db.index_attempt import get_latest_index_attempts_parallel
|
|
from onyx.db.models import ConnectorCredentialPair
|
|
from onyx.db.models import IndexAttempt
|
|
from onyx.db.models import IndexingStatus
|
|
from onyx.db.models import User
|
|
from onyx.db.models import UserGroup__ConnectorCredentialPair
|
|
from onyx.db.search_settings import get_current_search_settings
|
|
from onyx.db.search_settings import get_secondary_search_settings
|
|
from onyx.file_processing.extract_file_text import convert_docx_to_txt
|
|
from onyx.file_store.file_store import get_default_file_store
|
|
from onyx.key_value_store.interface import KvKeyNotFoundError
|
|
from onyx.redis.redis_connector import RedisConnector
|
|
from onyx.server.documents.models import AuthStatus
|
|
from onyx.server.documents.models import AuthUrl
|
|
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
|
|
from onyx.server.documents.models import ConnectorIndexingStatus
|
|
from onyx.server.documents.models import ConnectorSnapshot
|
|
from onyx.server.documents.models import ConnectorStatus
|
|
from onyx.server.documents.models import ConnectorUpdateRequest
|
|
from onyx.server.documents.models import CredentialBase
|
|
from onyx.server.documents.models import CredentialSnapshot
|
|
from onyx.server.documents.models import FailedConnectorIndexingStatus
|
|
from onyx.server.documents.models import FileUploadResponse
|
|
from onyx.server.documents.models import GDriveCallback
|
|
from onyx.server.documents.models import GmailCallback
|
|
from onyx.server.documents.models import GoogleAppCredentials
|
|
from onyx.server.documents.models import GoogleServiceAccountCredentialRequest
|
|
from onyx.server.documents.models import GoogleServiceAccountKey
|
|
from onyx.server.documents.models import IndexAttemptSnapshot
|
|
from onyx.server.documents.models import ObjectCreationIdResponse
|
|
from onyx.server.documents.models import RunConnectorRequest
|
|
from onyx.server.models import StatusResponse
|
|
from onyx.utils.logger import setup_logger
|
|
from onyx.utils.telemetry import create_milestone_and_report
|
|
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
|
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
|
|
|
|
|
logger = setup_logger()
|
|
|
|
_GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id"
|
|
_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id"
|
|
|
|
|
|
router = APIRouter(prefix="/manage")
|
|
|
|
|
|
"""Admin only API endpoints"""
|
|
|
|
|
|
@router.get("/admin/connector/gmail/app-credential")
|
|
def check_google_app_gmail_credentials_exist(
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
) -> dict[str, str]:
|
|
try:
|
|
return {"client_id": get_google_app_cred(DocumentSource.GMAIL).web.client_id}
|
|
except KvKeyNotFoundError:
|
|
raise HTTPException(status_code=404, detail="Google App Credentials not found")
|
|
|
|
|
|
@router.put("/admin/connector/gmail/app-credential")
|
|
def upsert_google_app_gmail_credentials(
|
|
app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
|
|
) -> StatusResponse:
|
|
try:
|
|
upsert_google_app_cred(app_credentials, DocumentSource.GMAIL)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully saved Google App Credentials"
|
|
)
|
|
|
|
|
|
@router.delete("/admin/connector/gmail/app-credential")
|
|
def delete_google_app_gmail_credentials(
|
|
_: User = Depends(current_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
try:
|
|
delete_google_app_cred(DocumentSource.GMAIL)
|
|
cleanup_gmail_credentials(db_session=db_session)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully deleted Google App Credentials"
|
|
)
|
|
|
|
|
|
@router.get("/admin/connector/google-drive/app-credential")
|
|
def check_google_app_credentials_exist(
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
) -> dict[str, str]:
|
|
try:
|
|
return {
|
|
"client_id": get_google_app_cred(DocumentSource.GOOGLE_DRIVE).web.client_id
|
|
}
|
|
except KvKeyNotFoundError:
|
|
raise HTTPException(status_code=404, detail="Google App Credentials not found")
|
|
|
|
|
|
@router.put("/admin/connector/google-drive/app-credential")
|
|
def upsert_google_app_credentials(
|
|
app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
|
|
) -> StatusResponse:
|
|
try:
|
|
upsert_google_app_cred(app_credentials, DocumentSource.GOOGLE_DRIVE)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully saved Google App Credentials"
|
|
)
|
|
|
|
|
|
@router.delete("/admin/connector/google-drive/app-credential")
|
|
def delete_google_app_credentials(
|
|
_: User = Depends(current_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
try:
|
|
delete_google_app_cred(DocumentSource.GOOGLE_DRIVE)
|
|
cleanup_google_drive_credentials(db_session=db_session)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully deleted Google App Credentials"
|
|
)
|
|
|
|
|
|
@router.get("/admin/connector/gmail/service-account-key")
|
|
def check_google_service_gmail_account_key_exist(
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
) -> dict[str, str]:
|
|
try:
|
|
return {
|
|
"service_account_email": get_service_account_key(
|
|
DocumentSource.GMAIL
|
|
).client_email
|
|
}
|
|
except KvKeyNotFoundError:
|
|
raise HTTPException(
|
|
status_code=404, detail="Google Service Account Key not found"
|
|
)
|
|
|
|
|
|
@router.put("/admin/connector/gmail/service-account-key")
|
|
def upsert_google_service_gmail_account_key(
|
|
service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
|
|
) -> StatusResponse:
|
|
try:
|
|
upsert_service_account_key(service_account_key, DocumentSource.GMAIL)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully saved Google Service Account Key"
|
|
)
|
|
|
|
|
|
@router.delete("/admin/connector/gmail/service-account-key")
|
|
def delete_google_service_gmail_account_key(
|
|
_: User = Depends(current_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
try:
|
|
delete_service_account_key(DocumentSource.GMAIL)
|
|
cleanup_gmail_credentials(db_session=db_session)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully deleted Google Service Account Key"
|
|
)
|
|
|
|
|
|
@router.get("/admin/connector/google-drive/service-account-key")
|
|
def check_google_service_account_key_exist(
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
) -> dict[str, str]:
|
|
try:
|
|
return {
|
|
"service_account_email": get_service_account_key(
|
|
DocumentSource.GOOGLE_DRIVE
|
|
).client_email
|
|
}
|
|
except KvKeyNotFoundError:
|
|
raise HTTPException(
|
|
status_code=404, detail="Google Service Account Key not found"
|
|
)
|
|
|
|
|
|
@router.put("/admin/connector/google-drive/service-account-key")
|
|
def upsert_google_service_account_key(
|
|
service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
|
|
) -> StatusResponse:
|
|
try:
|
|
upsert_service_account_key(service_account_key, DocumentSource.GOOGLE_DRIVE)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully saved Google Service Account Key"
|
|
)
|
|
|
|
|
|
@router.delete("/admin/connector/google-drive/service-account-key")
|
|
def delete_google_service_account_key(
|
|
_: User = Depends(current_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
try:
|
|
delete_service_account_key(DocumentSource.GOOGLE_DRIVE)
|
|
cleanup_google_drive_credentials(db_session=db_session)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
return StatusResponse(
|
|
success=True, message="Successfully deleted Google Service Account Key"
|
|
)
|
|
|
|
|
|
@router.put("/admin/connector/google-drive/service-account-credential")
|
|
def upsert_service_account_credential(
|
|
service_account_credential_request: GoogleServiceAccountCredentialRequest,
|
|
user: User | None = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> ObjectCreationIdResponse:
|
|
"""Special API which allows the creation of a credential for a service account.
|
|
Combines the input with the saved service account key to create an entry in the
|
|
`Credential` table."""
|
|
try:
|
|
credential_base = build_service_account_creds(
|
|
DocumentSource.GOOGLE_DRIVE,
|
|
primary_admin_email=service_account_credential_request.google_primary_admin,
|
|
name="Service Account (uploaded)",
|
|
)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
# first delete all existing service account credentials
|
|
delete_service_account_credentials(user, db_session, DocumentSource.GOOGLE_DRIVE)
|
|
# `user=None` since this credential is not a personal credential
|
|
credential = create_credential(
|
|
credential_data=credential_base, user=user, db_session=db_session
|
|
)
|
|
return ObjectCreationIdResponse(id=credential.id)
|
|
|
|
|
|
@router.put("/admin/connector/gmail/service-account-credential")
|
|
def upsert_gmail_service_account_credential(
|
|
service_account_credential_request: GoogleServiceAccountCredentialRequest,
|
|
user: User | None = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> ObjectCreationIdResponse:
|
|
"""Special API which allows the creation of a credential for a service account.
|
|
Combines the input with the saved service account key to create an entry in the
|
|
`Credential` table."""
|
|
try:
|
|
credential_base = build_service_account_creds(
|
|
DocumentSource.GMAIL,
|
|
primary_admin_email=service_account_credential_request.google_primary_admin,
|
|
)
|
|
except KvKeyNotFoundError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
# first delete all existing service account credentials
|
|
delete_service_account_credentials(user, db_session, DocumentSource.GMAIL)
|
|
# `user=None` since this credential is not a personal credential
|
|
credential = create_credential(
|
|
credential_data=credential_base, user=user, db_session=db_session
|
|
)
|
|
return ObjectCreationIdResponse(id=credential.id)
|
|
|
|
|
|
@router.get("/admin/connector/google-drive/check-auth/{credential_id}")
|
|
def check_drive_tokens(
|
|
credential_id: int,
|
|
user: User = Depends(current_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> AuthStatus:
|
|
db_credentials = fetch_credential_by_id_for_user(credential_id, user, db_session)
|
|
if (
|
|
not db_credentials
|
|
or DB_CREDENTIALS_DICT_TOKEN_KEY not in db_credentials.credential_json
|
|
):
|
|
return AuthStatus(authenticated=False)
|
|
token_json_str = str(db_credentials.credential_json[DB_CREDENTIALS_DICT_TOKEN_KEY])
|
|
google_drive_creds = get_google_oauth_creds(
|
|
token_json_str=token_json_str,
|
|
source=DocumentSource.GOOGLE_DRIVE,
|
|
)
|
|
if google_drive_creds is None:
|
|
return AuthStatus(authenticated=False)
|
|
return AuthStatus(authenticated=True)
|
|
|
|
|
|
def upload_files(files: list[UploadFile], db_session: Session) -> FileUploadResponse:
|
|
for file in files:
|
|
if not file.filename:
|
|
raise HTTPException(status_code=400, detail="File name cannot be empty")
|
|
|
|
# Skip directories and known macOS metadata entries
|
|
def should_process_file(file_path: str) -> bool:
|
|
normalized_path = os.path.normpath(file_path)
|
|
return not any(part.startswith(".") for part in normalized_path.split(os.sep))
|
|
|
|
try:
|
|
file_store = get_default_file_store(db_session)
|
|
deduped_file_paths = []
|
|
|
|
for file in files:
|
|
if file.content_type and file.content_type.startswith("application/zip"):
|
|
with zipfile.ZipFile(file.file, "r") as zf:
|
|
for file_info in zf.namelist():
|
|
if zf.getinfo(file_info).is_dir():
|
|
continue
|
|
|
|
if not should_process_file(file_info):
|
|
continue
|
|
|
|
sub_file_bytes = zf.read(file_info)
|
|
sub_file_name = os.path.join(str(uuid.uuid4()), file_info)
|
|
deduped_file_paths.append(sub_file_name)
|
|
|
|
mime_type, __ = mimetypes.guess_type(file_info)
|
|
if mime_type is None:
|
|
mime_type = "application/octet-stream"
|
|
|
|
file_store.save_file(
|
|
file_name=sub_file_name,
|
|
content=BytesIO(sub_file_bytes),
|
|
display_name=os.path.basename(file_info),
|
|
file_origin=FileOrigin.CONNECTOR,
|
|
file_type=mime_type,
|
|
)
|
|
continue
|
|
|
|
# Special handling for docx files - only store the plaintext version
|
|
if file.content_type and file.content_type.startswith(
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
):
|
|
file_path = convert_docx_to_txt(file, file_store)
|
|
deduped_file_paths.append(file_path)
|
|
continue
|
|
|
|
# Default handling for all other file types
|
|
file_path = os.path.join(str(uuid.uuid4()), cast(str, file.filename))
|
|
deduped_file_paths.append(file_path)
|
|
file_store.save_file(
|
|
file_name=file_path,
|
|
content=file.file,
|
|
display_name=file.filename,
|
|
file_origin=FileOrigin.CONNECTOR,
|
|
file_type=file.content_type or "text/plain",
|
|
)
|
|
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
return FileUploadResponse(file_paths=deduped_file_paths)
|
|
|
|
|
|
@router.post("/admin/connector/file/upload")
|
|
def upload_files_api(
|
|
files: list[UploadFile],
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> FileUploadResponse:
|
|
return upload_files(files, db_session)
|
|
|
|
|
|
@router.get("/admin/connector")
|
|
def get_connectors_by_credential(
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
credential: int | None = None,
|
|
) -> list[ConnectorSnapshot]:
|
|
"""Get a list of connectors. Allow filtering by a specific credential id."""
|
|
|
|
connectors = fetch_connectors(db_session)
|
|
|
|
filtered_connectors = []
|
|
for connector in connectors:
|
|
if connector.source == DocumentSource.INGESTION_API:
|
|
# don't include INGESTION_API, as it's a system level
|
|
# connector not manageable by the user
|
|
continue
|
|
|
|
if credential is not None:
|
|
found = False
|
|
for cc_pair in connector.credentials:
|
|
if credential == cc_pair.credential_id:
|
|
found = True
|
|
break
|
|
|
|
if not found:
|
|
continue
|
|
|
|
filtered_connectors.append(ConnectorSnapshot.from_connector_db_model(connector))
|
|
|
|
return filtered_connectors
|
|
|
|
|
|
# Retrieves most recent failure cases for connectors that are currently failing
|
|
@router.get("/admin/connector/failed-indexing-status")
|
|
def get_currently_failed_indexing_status(
|
|
secondary_index: bool = False,
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
get_editable: bool = Query(
|
|
False, description="If true, return editable document sets"
|
|
),
|
|
) -> list[FailedConnectorIndexingStatus]:
|
|
# Get the latest failed indexing attempts
|
|
latest_failed_indexing_attempts = get_latest_index_attempts_by_status(
|
|
secondary_index=secondary_index,
|
|
db_session=db_session,
|
|
status=IndexingStatus.FAILED,
|
|
)
|
|
|
|
# Get the latest successful indexing attempts
|
|
latest_successful_indexing_attempts = get_latest_index_attempts_by_status(
|
|
secondary_index=secondary_index,
|
|
db_session=db_session,
|
|
status=IndexingStatus.SUCCESS,
|
|
)
|
|
|
|
# Get all connector credential pairs
|
|
cc_pairs = get_connector_credential_pairs_for_user(
|
|
db_session=db_session,
|
|
user=user,
|
|
get_editable=get_editable,
|
|
)
|
|
|
|
# Filter out failed attempts that have a more recent successful attempt
|
|
filtered_failed_attempts = [
|
|
failed_attempt
|
|
for failed_attempt in latest_failed_indexing_attempts
|
|
if not any(
|
|
success_attempt.connector_credential_pair_id
|
|
== failed_attempt.connector_credential_pair_id
|
|
and success_attempt.time_updated > failed_attempt.time_updated
|
|
for success_attempt in latest_successful_indexing_attempts
|
|
)
|
|
]
|
|
|
|
# Filter cc_pairs to include only those with failed attempts
|
|
cc_pairs = [
|
|
cc_pair
|
|
for cc_pair in cc_pairs
|
|
if any(
|
|
attempt.connector_credential_pair == cc_pair
|
|
for attempt in filtered_failed_attempts
|
|
)
|
|
]
|
|
|
|
# Create a mapping of cc_pair_id to its latest failed index attempt
|
|
cc_pair_to_latest_index_attempt = {
|
|
attempt.connector_credential_pair_id: attempt
|
|
for attempt in filtered_failed_attempts
|
|
}
|
|
|
|
indexing_statuses = []
|
|
|
|
for cc_pair in cc_pairs:
|
|
# Skip DefaultCCPair
|
|
if cc_pair.name == "DefaultCCPair":
|
|
continue
|
|
|
|
latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
|
|
|
|
indexing_statuses.append(
|
|
FailedConnectorIndexingStatus(
|
|
cc_pair_id=cc_pair.id,
|
|
name=cc_pair.name,
|
|
error_msg=(
|
|
latest_index_attempt.error_msg if latest_index_attempt else None
|
|
),
|
|
connector_id=cc_pair.connector_id,
|
|
credential_id=cc_pair.credential_id,
|
|
is_deletable=check_deletion_attempt_is_allowed(
|
|
connector_credential_pair=cc_pair,
|
|
db_session=db_session,
|
|
allow_scheduled=True,
|
|
)
|
|
is None,
|
|
)
|
|
)
|
|
|
|
return indexing_statuses
|
|
|
|
|
|
@router.get("/admin/connector/status")
|
|
def get_connector_status(
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> list[ConnectorStatus]:
|
|
cc_pairs = get_connector_credential_pairs_for_user(
|
|
db_session=db_session,
|
|
user=user,
|
|
eager_load_connector=True,
|
|
eager_load_credential=True,
|
|
)
|
|
|
|
group_cc_pair_relationships = get_cc_pair_groups_for_ids(
|
|
db_session=db_session,
|
|
cc_pair_ids=[cc_pair.id for cc_pair in cc_pairs],
|
|
)
|
|
group_cc_pair_relationships_dict: dict[int, list[int]] = {}
|
|
for relationship in group_cc_pair_relationships:
|
|
group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(
|
|
relationship.user_group_id
|
|
)
|
|
|
|
return [
|
|
ConnectorStatus(
|
|
cc_pair_id=cc_pair.id,
|
|
name=cc_pair.name,
|
|
connector=ConnectorSnapshot.from_connector_db_model(cc_pair.connector),
|
|
credential=CredentialSnapshot.from_credential_db_model(cc_pair.credential),
|
|
access_type=cc_pair.access_type,
|
|
groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),
|
|
)
|
|
for cc_pair in cc_pairs
|
|
if cc_pair.name != "DefaultCCPair" and cc_pair.connector and cc_pair.credential
|
|
]
|
|
|
|
|
|
@router.get("/admin/connector/indexing-status")
|
|
def get_connector_indexing_status(
|
|
secondary_index: bool = False,
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
get_editable: bool = Query(
|
|
False, description="If true, return editable document sets"
|
|
),
|
|
) -> list[ConnectorIndexingStatus]:
|
|
tenant_id = get_current_tenant_id()
|
|
indexing_statuses: list[ConnectorIndexingStatus] = []
|
|
|
|
if MOCK_CONNECTOR_FILE_PATH:
|
|
import json
|
|
|
|
with open(MOCK_CONNECTOR_FILE_PATH, "r") as f:
|
|
raw_data = json.load(f)
|
|
connector_indexing_statuses = [
|
|
ConnectorIndexingStatus(**status) for status in raw_data
|
|
]
|
|
return connector_indexing_statuses
|
|
|
|
# NOTE: If the connector is deleting behind the scenes,
|
|
# accessing cc_pairs can be inconsistent and members like
|
|
# connector or credential may be None.
|
|
# Additional checks are done to make sure the connector and credential still exist.
|
|
# TODO: make this one query ... possibly eager load or wrap in a read transaction
|
|
# to avoid the complexity of trying to error check throughout the function
|
|
|
|
# see https://stackoverflow.com/questions/75758327/
|
|
# sqlalchemy-method-connection-for-bind-is-already-in-progress
|
|
# for why we can't pass in the current db_session to these functions
|
|
(
|
|
cc_pairs,
|
|
latest_index_attempts,
|
|
latest_finished_index_attempts,
|
|
) = run_functions_tuples_in_parallel(
|
|
[
|
|
(
|
|
# Gets the connector/credential pairs for the user
|
|
get_connector_credential_pairs_for_user_parallel,
|
|
(user, get_editable, None, True, True, True),
|
|
),
|
|
(
|
|
# Gets the most recent index attempt for each connector/credential pair
|
|
get_latest_index_attempts_parallel,
|
|
(secondary_index, True, False),
|
|
),
|
|
(
|
|
# Gets the most recent FINISHED index attempt for each connector/credential pair
|
|
get_latest_index_attempts_parallel,
|
|
(secondary_index, True, True),
|
|
),
|
|
]
|
|
)
|
|
cc_pairs = cast(list[ConnectorCredentialPair], cc_pairs)
|
|
latest_index_attempts = cast(list[IndexAttempt], latest_index_attempts)
|
|
|
|
cc_pair_to_latest_index_attempt = {
|
|
(
|
|
index_attempt.connector_credential_pair.connector_id,
|
|
index_attempt.connector_credential_pair.credential_id,
|
|
): index_attempt
|
|
for index_attempt in latest_index_attempts
|
|
}
|
|
|
|
cc_pair_to_latest_finished_index_attempt = {
|
|
(
|
|
index_attempt.connector_credential_pair.connector_id,
|
|
index_attempt.connector_credential_pair.credential_id,
|
|
): index_attempt
|
|
for index_attempt in latest_finished_index_attempts
|
|
}
|
|
|
|
document_count_info, group_cc_pair_relationships = run_functions_tuples_in_parallel(
|
|
[
|
|
(
|
|
get_document_counts_for_cc_pairs_parallel,
|
|
(
|
|
[
|
|
ConnectorCredentialPairIdentifier(
|
|
connector_id=cc_pair.connector_id,
|
|
credential_id=cc_pair.credential_id,
|
|
)
|
|
for cc_pair in cc_pairs
|
|
],
|
|
),
|
|
),
|
|
(
|
|
get_cc_pair_groups_for_ids_parallel,
|
|
([cc_pair.id for cc_pair in cc_pairs],),
|
|
),
|
|
]
|
|
)
|
|
document_count_info = cast(list[tuple[int, int, int]], document_count_info)
|
|
group_cc_pair_relationships = cast(
|
|
list[UserGroup__ConnectorCredentialPair], group_cc_pair_relationships
|
|
)
|
|
|
|
cc_pair_to_document_cnt = {
|
|
(connector_id, credential_id): cnt
|
|
for connector_id, credential_id, cnt in document_count_info
|
|
}
|
|
|
|
group_cc_pair_relationships_dict: dict[int, list[int]] = {}
|
|
for relationship in group_cc_pair_relationships:
|
|
group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(
|
|
relationship.user_group_id
|
|
)
|
|
|
|
connector_to_cc_pair_ids: dict[int, list[int]] = {}
|
|
for cc_pair in cc_pairs:
|
|
connector_to_cc_pair_ids.setdefault(cc_pair.connector_id, []).append(cc_pair.id)
|
|
|
|
get_search_settings = (
|
|
get_secondary_search_settings
|
|
if secondary_index
|
|
else get_current_search_settings
|
|
)
|
|
search_settings = get_search_settings(db_session)
|
|
for cc_pair in cc_pairs:
|
|
# TODO remove this to enable ingestion API
|
|
if cc_pair.name == "DefaultCCPair":
|
|
continue
|
|
|
|
connector = cc_pair.connector
|
|
credential = cc_pair.credential
|
|
if not connector or not credential:
|
|
# This may happen if background deletion is happening
|
|
continue
|
|
|
|
in_progress = False
|
|
if search_settings:
|
|
redis_connector = RedisConnector(tenant_id, cc_pair.id)
|
|
redis_connector_index = redis_connector.new_index(search_settings.id)
|
|
if redis_connector_index.fenced:
|
|
in_progress = True
|
|
|
|
latest_index_attempt = cc_pair_to_latest_index_attempt.get(
|
|
(connector.id, credential.id)
|
|
)
|
|
|
|
latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
|
|
(connector.id, credential.id)
|
|
)
|
|
|
|
# Safely get the owner email, handling detached instances
|
|
owner_email = ""
|
|
try:
|
|
if credential.user:
|
|
owner_email = credential.user.email
|
|
except Exception:
|
|
# If there's any error accessing the user (like DetachedInstanceError),
|
|
# we'll just use an empty string for the owner email
|
|
pass
|
|
|
|
indexing_statuses.append(
|
|
ConnectorIndexingStatus(
|
|
cc_pair_id=cc_pair.id,
|
|
name=cc_pair.name,
|
|
in_progress=in_progress,
|
|
cc_pair_status=cc_pair.status,
|
|
in_repeated_error_state=cc_pair.in_repeated_error_state,
|
|
connector=ConnectorSnapshot.from_connector_db_model(
|
|
connector, connector_to_cc_pair_ids.get(connector.id, [])
|
|
),
|
|
credential=CredentialSnapshot.from_credential_db_model(credential),
|
|
access_type=cc_pair.access_type,
|
|
owner=owner_email,
|
|
groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),
|
|
last_finished_status=(
|
|
latest_finished_attempt.status if latest_finished_attempt else None
|
|
),
|
|
last_status=(
|
|
latest_index_attempt.status if latest_index_attempt else None
|
|
),
|
|
last_success=cc_pair.last_successful_index_time,
|
|
docs_indexed=cc_pair_to_document_cnt.get(
|
|
(connector.id, credential.id), 0
|
|
),
|
|
latest_index_attempt=(
|
|
IndexAttemptSnapshot.from_index_attempt_db_model(
|
|
latest_index_attempt
|
|
)
|
|
if latest_index_attempt
|
|
else None
|
|
),
|
|
)
|
|
)
|
|
|
|
# Visiting admin page brings the user to the current connectors page which calls this endpoint
|
|
create_milestone_and_report(
|
|
user=user,
|
|
distinct_id=user.email if user else tenant_id or "N/A",
|
|
event_type=MilestoneRecordType.VISITED_ADMIN_PAGE,
|
|
properties=None,
|
|
db_session=db_session,
|
|
)
|
|
|
|
return indexing_statuses
|
|
|
|
|
|
def _validate_connector_allowed(source: DocumentSource) -> None:
|
|
valid_connectors = [
|
|
x for x in ENABLED_CONNECTOR_TYPES.replace("_", "").split(",") if x
|
|
]
|
|
if not valid_connectors:
|
|
return
|
|
for connector_type in valid_connectors:
|
|
if source.value.lower().replace("_", "") == connector_type:
|
|
return
|
|
|
|
raise ValueError(
|
|
"This connector type has been disabled by your system admin. "
|
|
"Please contact them to get it enabled if you wish to use it."
|
|
)
|
|
|
|
|
|
@router.post("/admin/connector")
|
|
def create_connector_from_model(
|
|
connector_data: ConnectorUpdateRequest,
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> ObjectCreationIdResponse:
|
|
tenant_id = get_current_tenant_id()
|
|
|
|
try:
|
|
_validate_connector_allowed(connector_data.source)
|
|
|
|
fetch_ee_implementation_or_noop(
|
|
"onyx.db.user_group", "validate_object_creation_for_user", None
|
|
)(
|
|
db_session=db_session,
|
|
user=user,
|
|
target_group_ids=connector_data.groups,
|
|
object_is_public=connector_data.access_type == AccessType.PUBLIC,
|
|
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
|
|
)
|
|
connector_base = connector_data.to_connector_base()
|
|
connector_response = create_connector(
|
|
db_session=db_session,
|
|
connector_data=connector_base,
|
|
)
|
|
|
|
create_milestone_and_report(
|
|
user=user,
|
|
distinct_id=user.email if user else tenant_id or "N/A",
|
|
event_type=MilestoneRecordType.CREATED_CONNECTOR,
|
|
properties=None,
|
|
db_session=db_session,
|
|
)
|
|
|
|
return connector_response
|
|
except ValueError as e:
|
|
logger.error(f"Error creating connector: {e}")
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
@router.post("/admin/connector-with-mock-credential")
|
|
def create_connector_with_mock_credential(
|
|
connector_data: ConnectorUpdateRequest,
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
tenant_id = get_current_tenant_id()
|
|
|
|
fetch_ee_implementation_or_noop(
|
|
"onyx.db.user_group", "validate_object_creation_for_user", None
|
|
)(
|
|
db_session=db_session,
|
|
user=user,
|
|
target_group_ids=connector_data.groups,
|
|
object_is_public=connector_data.access_type == AccessType.PUBLIC,
|
|
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
|
|
)
|
|
try:
|
|
_validate_connector_allowed(connector_data.source)
|
|
connector_response = create_connector(
|
|
db_session=db_session,
|
|
connector_data=connector_data,
|
|
)
|
|
|
|
mock_credential = CredentialBase(
|
|
credential_json={},
|
|
admin_public=True,
|
|
source=connector_data.source,
|
|
)
|
|
credential = create_credential(
|
|
credential_data=mock_credential,
|
|
user=user,
|
|
db_session=db_session,
|
|
)
|
|
|
|
# Store the created connector and credential IDs
|
|
connector_id = cast(int, connector_response.id)
|
|
credential_id = credential.id
|
|
|
|
validate_ccpair_for_user(
|
|
connector_id=connector_id,
|
|
credential_id=credential_id,
|
|
db_session=db_session,
|
|
)
|
|
response = add_credential_to_connector(
|
|
db_session=db_session,
|
|
user=user,
|
|
connector_id=connector_id,
|
|
credential_id=credential_id,
|
|
access_type=connector_data.access_type,
|
|
cc_pair_name=connector_data.name,
|
|
groups=connector_data.groups,
|
|
)
|
|
|
|
# trigger indexing immediately
|
|
client_app.send_task(
|
|
OnyxCeleryTask.CHECK_FOR_INDEXING,
|
|
priority=OnyxCeleryPriority.HIGH,
|
|
kwargs={"tenant_id": tenant_id},
|
|
)
|
|
|
|
logger.info(
|
|
f"create_connector_with_mock_credential - running check_for_indexing: "
|
|
f"cc_pair={response.data}"
|
|
)
|
|
|
|
create_milestone_and_report(
|
|
user=user,
|
|
distinct_id=user.email if user else tenant_id or "N/A",
|
|
event_type=MilestoneRecordType.CREATED_CONNECTOR,
|
|
properties=None,
|
|
db_session=db_session,
|
|
)
|
|
return response
|
|
|
|
except ConnectorValidationError as e:
|
|
raise HTTPException(
|
|
status_code=400, detail="Connector validation error: " + str(e)
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
@router.patch("/admin/connector/{connector_id}")
|
|
def update_connector_from_model(
|
|
connector_id: int,
|
|
connector_data: ConnectorUpdateRequest,
|
|
user: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> ConnectorSnapshot | StatusResponse[int]:
|
|
try:
|
|
_validate_connector_allowed(connector_data.source)
|
|
fetch_ee_implementation_or_noop(
|
|
"onyx.db.user_group", "validate_object_creation_for_user", None
|
|
)(
|
|
db_session=db_session,
|
|
user=user,
|
|
target_group_ids=connector_data.groups,
|
|
object_is_public=connector_data.access_type == AccessType.PUBLIC,
|
|
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
|
|
)
|
|
connector_base = connector_data.to_connector_base()
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
updated_connector = update_connector(connector_id, connector_base, db_session)
|
|
if updated_connector is None:
|
|
raise HTTPException(
|
|
status_code=404, detail=f"Connector {connector_id} does not exist"
|
|
)
|
|
|
|
return ConnectorSnapshot(
|
|
id=updated_connector.id,
|
|
name=updated_connector.name,
|
|
source=updated_connector.source,
|
|
input_type=updated_connector.input_type,
|
|
connector_specific_config=updated_connector.connector_specific_config,
|
|
refresh_freq=updated_connector.refresh_freq,
|
|
prune_freq=updated_connector.prune_freq,
|
|
credential_ids=[
|
|
association.credential.id for association in updated_connector.credentials
|
|
],
|
|
indexing_start=updated_connector.indexing_start,
|
|
time_created=updated_connector.time_created,
|
|
time_updated=updated_connector.time_updated,
|
|
)
|
|
|
|
|
|
@router.delete("/admin/connector/{connector_id}", response_model=StatusResponse[int])
|
|
def delete_connector_by_id(
|
|
connector_id: int,
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse[int]:
|
|
try:
|
|
with db_session.begin():
|
|
return delete_connector(
|
|
db_session=db_session,
|
|
connector_id=connector_id,
|
|
)
|
|
except AssertionError:
|
|
raise HTTPException(status_code=400, detail="Connector is not deletable")
|
|
|
|
|
|
@router.post("/admin/connector/run-once")
|
|
def connector_run_once(
|
|
run_info: RunConnectorRequest,
|
|
_: User = Depends(current_curator_or_admin_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse[int]:
|
|
"""Used to trigger indexing on a set of cc_pairs associated with a
|
|
single connector."""
|
|
tenant_id = get_current_tenant_id()
|
|
|
|
connector_id = run_info.connector_id
|
|
specified_credential_ids = run_info.credential_ids
|
|
|
|
try:
|
|
possible_credential_ids = get_connector_credential_ids(
|
|
run_info.connector_id, db_session
|
|
)
|
|
except ValueError:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Connector by id {connector_id} does not exist.",
|
|
)
|
|
|
|
if not specified_credential_ids:
|
|
credential_ids = possible_credential_ids
|
|
else:
|
|
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
|
|
credential_ids = specified_credential_ids
|
|
else:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Not all specified credentials are associated with connector",
|
|
)
|
|
|
|
if not credential_ids:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Connector has no valid credentials, cannot create index attempts.",
|
|
)
|
|
try:
|
|
num_triggers = trigger_indexing_for_cc_pair(
|
|
credential_ids,
|
|
connector_id,
|
|
run_info.from_beginning,
|
|
tenant_id,
|
|
db_session,
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
logger.info("connector_run_once - running check_for_indexing")
|
|
|
|
msg = f"Marked {num_triggers} index attempts with indexing triggers."
|
|
return StatusResponse(
|
|
success=True,
|
|
message=msg,
|
|
data=num_triggers,
|
|
)
|
|
|
|
|
|
"""Endpoints for basic users"""
|
|
|
|
|
|
@router.get("/connector/gmail/authorize/{credential_id}")
|
|
def gmail_auth(
|
|
response: Response, credential_id: str, _: User = Depends(current_user)
|
|
) -> AuthUrl:
|
|
# set a cookie that we can read in the callback (used for `verify_csrf`)
|
|
response.set_cookie(
|
|
key=_GMAIL_CREDENTIAL_ID_COOKIE_NAME,
|
|
value=credential_id,
|
|
httponly=True,
|
|
max_age=600,
|
|
)
|
|
return AuthUrl(auth_url=get_auth_url(int(credential_id), DocumentSource.GMAIL))
|
|
|
|
|
|
@router.get("/connector/google-drive/authorize/{credential_id}")
|
|
def google_drive_auth(
|
|
response: Response, credential_id: str, _: User = Depends(current_user)
|
|
) -> AuthUrl:
|
|
# set a cookie that we can read in the callback (used for `verify_csrf`)
|
|
response.set_cookie(
|
|
key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME,
|
|
value=credential_id,
|
|
httponly=True,
|
|
max_age=600,
|
|
)
|
|
return AuthUrl(
|
|
auth_url=get_auth_url(int(credential_id), DocumentSource.GOOGLE_DRIVE)
|
|
)
|
|
|
|
|
|
@router.get("/connector/gmail/callback")
|
|
def gmail_callback(
|
|
request: Request,
|
|
callback: GmailCallback = Depends(),
|
|
user: User = Depends(current_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
credential_id_cookie = request.cookies.get(_GMAIL_CREDENTIAL_ID_COOKIE_NAME)
|
|
if credential_id_cookie is None or not credential_id_cookie.isdigit():
|
|
raise HTTPException(
|
|
status_code=401, detail="Request did not pass CSRF verification."
|
|
)
|
|
credential_id = int(credential_id_cookie)
|
|
verify_csrf(credential_id, callback.state)
|
|
credentials: Credentials | None = update_credential_access_tokens(
|
|
callback.code,
|
|
credential_id,
|
|
user,
|
|
db_session,
|
|
DocumentSource.GMAIL,
|
|
GoogleOAuthAuthenticationMethod.UPLOADED,
|
|
)
|
|
if credentials is None:
|
|
raise HTTPException(
|
|
status_code=500, detail="Unable to fetch Gmail access tokens"
|
|
)
|
|
|
|
return StatusResponse(success=True, message="Updated Gmail access tokens")
|
|
|
|
|
|
@router.get("/connector/google-drive/callback")
|
|
def google_drive_callback(
|
|
request: Request,
|
|
callback: GDriveCallback = Depends(),
|
|
user: User = Depends(current_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> StatusResponse:
|
|
credential_id_cookie = request.cookies.get(_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME)
|
|
if credential_id_cookie is None or not credential_id_cookie.isdigit():
|
|
raise HTTPException(
|
|
status_code=401, detail="Request did not pass CSRF verification."
|
|
)
|
|
credential_id = int(credential_id_cookie)
|
|
verify_csrf(credential_id, callback.state)
|
|
|
|
credentials: Credentials | None = update_credential_access_tokens(
|
|
callback.code,
|
|
credential_id,
|
|
user,
|
|
db_session,
|
|
DocumentSource.GOOGLE_DRIVE,
|
|
GoogleOAuthAuthenticationMethod.UPLOADED,
|
|
)
|
|
if credentials is None:
|
|
raise HTTPException(
|
|
status_code=500, detail="Unable to fetch Google Drive access tokens"
|
|
)
|
|
|
|
return StatusResponse(success=True, message="Updated Google Drive access tokens")
|
|
|
|
|
|
@router.get("/connector")
|
|
def get_connectors(
|
|
_: User = Depends(current_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> list[ConnectorSnapshot]:
|
|
connectors = fetch_connectors(db_session)
|
|
return [
|
|
ConnectorSnapshot.from_connector_db_model(connector)
|
|
for connector in connectors
|
|
# don't include INGESTION_API, as it's not a "real"
|
|
# connector like those created by the user
|
|
if connector.source != DocumentSource.INGESTION_API
|
|
]
|
|
|
|
|
|
@router.get("/connector/{connector_id}")
|
|
def get_connector_by_id(
|
|
connector_id: int,
|
|
_: User = Depends(current_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> ConnectorSnapshot | StatusResponse[int]:
|
|
connector = fetch_connector_by_id(connector_id, db_session)
|
|
if connector is None:
|
|
raise HTTPException(
|
|
status_code=404, detail=f"Connector {connector_id} does not exist"
|
|
)
|
|
|
|
return ConnectorSnapshot(
|
|
id=connector.id,
|
|
name=connector.name,
|
|
source=connector.source,
|
|
indexing_start=connector.indexing_start,
|
|
input_type=connector.input_type,
|
|
connector_specific_config=connector.connector_specific_config,
|
|
refresh_freq=connector.refresh_freq,
|
|
prune_freq=connector.prune_freq,
|
|
credential_ids=[
|
|
association.credential.id for association in connector.credentials
|
|
],
|
|
time_created=connector.time_created,
|
|
time_updated=connector.time_updated,
|
|
)
|
|
|
|
|
|
class BasicCCPairInfo(BaseModel):
|
|
has_successful_run: bool
|
|
source: DocumentSource
|
|
|
|
|
|
@router.get("/connector-status")
|
|
def get_basic_connector_indexing_status(
|
|
user: User = Depends(current_chat_accessible_user),
|
|
db_session: Session = Depends(get_session),
|
|
) -> list[BasicCCPairInfo]:
|
|
cc_pairs = get_connector_credential_pairs_for_user(
|
|
db_session=db_session,
|
|
eager_load_connector=True,
|
|
get_editable=False,
|
|
user=user,
|
|
)
|
|
return [
|
|
BasicCCPairInfo(
|
|
has_successful_run=cc_pair.last_successful_index_time is not None,
|
|
source=cc_pair.connector.source,
|
|
)
|
|
for cc_pair in cc_pairs
|
|
if cc_pair.connector.source != DocumentSource.INGESTION_API
|
|
]
|
|
|
|
|
|
def trigger_indexing_for_cc_pair(
|
|
specified_credential_ids: list[int],
|
|
connector_id: int,
|
|
from_beginning: bool,
|
|
tenant_id: str,
|
|
db_session: Session,
|
|
is_user_file: bool = False,
|
|
) -> int:
|
|
try:
|
|
possible_credential_ids = get_connector_credential_ids(connector_id, db_session)
|
|
except ValueError as e:
|
|
raise ValueError(f"Connector by id {connector_id} does not exist: {str(e)}")
|
|
|
|
if not specified_credential_ids:
|
|
credential_ids = possible_credential_ids
|
|
else:
|
|
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
|
|
credential_ids = specified_credential_ids
|
|
else:
|
|
raise ValueError(
|
|
"Not all specified credentials are associated with connector"
|
|
)
|
|
|
|
if not credential_ids:
|
|
raise ValueError(
|
|
"Connector has no valid credentials, cannot create index attempts."
|
|
)
|
|
|
|
# Prevents index attempts for cc pairs that already have an index attempt currently running
|
|
skipped_credentials = [
|
|
credential_id
|
|
for credential_id in credential_ids
|
|
if get_index_attempts_for_cc_pair(
|
|
cc_pair_identifier=ConnectorCredentialPairIdentifier(
|
|
connector_id=connector_id,
|
|
credential_id=credential_id,
|
|
),
|
|
only_current=True,
|
|
db_session=db_session,
|
|
disinclude_finished=True,
|
|
)
|
|
]
|
|
|
|
connector_credential_pairs = [
|
|
get_connector_credential_pair(
|
|
db_session=db_session,
|
|
connector_id=connector_id,
|
|
credential_id=credential_id,
|
|
)
|
|
for credential_id in credential_ids
|
|
if credential_id not in skipped_credentials
|
|
]
|
|
|
|
num_triggers = 0
|
|
for cc_pair in connector_credential_pairs:
|
|
if cc_pair is not None:
|
|
indexing_mode = IndexingMode.UPDATE
|
|
if from_beginning:
|
|
indexing_mode = IndexingMode.REINDEX
|
|
|
|
mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)
|
|
num_triggers += 1
|
|
|
|
logger.info(
|
|
f"connector_run_once - marking cc_pair with indexing trigger: "
|
|
f"connector={connector_id} "
|
|
f"cc_pair={cc_pair.id} "
|
|
f"indexing_trigger={indexing_mode}"
|
|
)
|
|
|
|
# run the beat task to pick up the triggers immediately
|
|
priority = OnyxCeleryPriority.HIGHEST if is_user_file else OnyxCeleryPriority.HIGH
|
|
logger.info(f"Sending indexing check task with priority {priority}")
|
|
client_app.send_task(
|
|
OnyxCeleryTask.CHECK_FOR_INDEXING,
|
|
priority=priority,
|
|
kwargs={"tenant_id": tenant_id},
|
|
)
|
|
|
|
return num_triggers
|