2025-04-13 15:57:47 -07:00

1330 lines
48 KiB
Python

import mimetypes
import os
import uuid
import zipfile
from io import BytesIO
from typing import cast
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Query
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from google.oauth2.credentials import Credentials # type: ignore
from pydantic import BaseModel
from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_chat_accessible_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.auth.users import current_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.configs.app_configs import ENABLED_CONNECTOR_TYPES
from onyx.configs.app_configs import MOCK_CONNECTOR_FILE_PATH
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FileOrigin
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryTask
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.factory import validate_ccpair_for_user
from onyx.connectors.google_utils.google_auth import (
get_google_oauth_creds,
)
from onyx.connectors.google_utils.google_kv import (
build_service_account_creds,
)
from onyx.connectors.google_utils.google_kv import (
delete_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
delete_service_account_key,
)
from onyx.connectors.google_utils.google_kv import get_auth_url
from onyx.connectors.google_utils.google_kv import (
get_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
get_service_account_key,
)
from onyx.connectors.google_utils.google_kv import (
update_credential_access_tokens,
)
from onyx.connectors.google_utils.google_kv import (
upsert_google_app_cred,
)
from onyx.connectors.google_utils.google_kv import (
upsert_service_account_key,
)
from onyx.connectors.google_utils.google_kv import verify_csrf
from onyx.connectors.google_utils.shared_constants import DB_CREDENTIALS_DICT_TOKEN_KEY
from onyx.connectors.google_utils.shared_constants import (
GoogleOAuthAuthenticationMethod,
)
from onyx.db.connector import create_connector
from onyx.db.connector import delete_connector
from onyx.db.connector import fetch_connector_by_id
from onyx.db.connector import fetch_connectors
from onyx.db.connector import get_connector_credential_ids
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector import update_connector
from onyx.db.connector_credential_pair import add_credential_to_connector
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids_parallel
from onyx.db.connector_credential_pair import get_connector_credential_pair
from onyx.db.connector_credential_pair import get_connector_credential_pairs_for_user
from onyx.db.connector_credential_pair import (
get_connector_credential_pairs_for_user_parallel,
)
from onyx.db.credentials import cleanup_gmail_credentials
from onyx.db.credentials import cleanup_google_drive_credentials
from onyx.db.credentials import create_credential
from onyx.db.credentials import delete_service_account_credentials
from onyx.db.credentials import fetch_credential_by_id_for_user
from onyx.db.deletion_attempt import check_deletion_attempt_is_allowed
from onyx.db.document import get_document_counts_for_cc_pairs_parallel
from onyx.db.engine import get_current_tenant_id
from onyx.db.engine import get_session
from onyx.db.enums import AccessType
from onyx.db.enums import IndexingMode
from onyx.db.index_attempt import get_index_attempts_for_cc_pair
from onyx.db.index_attempt import get_latest_index_attempts_by_status
from onyx.db.index_attempt import get_latest_index_attempts_parallel
from onyx.db.models import ConnectorCredentialPair
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexingStatus
from onyx.db.models import User
from onyx.db.models import UserGroup__ConnectorCredentialPair
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.file_processing.extract_file_text import convert_docx_to_txt
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_connector import RedisConnector
from onyx.server.documents.models import AuthStatus
from onyx.server.documents.models import AuthUrl
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.documents.models import ConnectorIndexingStatus
from onyx.server.documents.models import ConnectorSnapshot
from onyx.server.documents.models import ConnectorStatus
from onyx.server.documents.models import ConnectorUpdateRequest
from onyx.server.documents.models import CredentialBase
from onyx.server.documents.models import CredentialSnapshot
from onyx.server.documents.models import FailedConnectorIndexingStatus
from onyx.server.documents.models import FileUploadResponse
from onyx.server.documents.models import GDriveCallback
from onyx.server.documents.models import GmailCallback
from onyx.server.documents.models import GoogleAppCredentials
from onyx.server.documents.models import GoogleServiceAccountCredentialRequest
from onyx.server.documents.models import GoogleServiceAccountKey
from onyx.server.documents.models import IndexAttemptSnapshot
from onyx.server.documents.models import ObjectCreationIdResponse
from onyx.server.documents.models import RunConnectorRequest
from onyx.server.models import StatusResponse
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import create_milestone_and_report
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
logger = setup_logger()
_GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id"
_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id"
router = APIRouter(prefix="/manage")
"""Admin only API endpoints"""
@router.get("/admin/connector/gmail/app-credential")
def check_google_app_gmail_credentials_exist(
_: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
try:
return {"client_id": get_google_app_cred(DocumentSource.GMAIL).web.client_id}
except KvKeyNotFoundError:
raise HTTPException(status_code=404, detail="Google App Credentials not found")
@router.put("/admin/connector/gmail/app-credential")
def upsert_google_app_gmail_credentials(
app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
) -> StatusResponse:
try:
upsert_google_app_cred(app_credentials, DocumentSource.GMAIL)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully saved Google App Credentials"
)
@router.delete("/admin/connector/gmail/app-credential")
def delete_google_app_gmail_credentials(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
try:
delete_google_app_cred(DocumentSource.GMAIL)
cleanup_gmail_credentials(db_session=db_session)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully deleted Google App Credentials"
)
@router.get("/admin/connector/google-drive/app-credential")
def check_google_app_credentials_exist(
_: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
try:
return {
"client_id": get_google_app_cred(DocumentSource.GOOGLE_DRIVE).web.client_id
}
except KvKeyNotFoundError:
raise HTTPException(status_code=404, detail="Google App Credentials not found")
@router.put("/admin/connector/google-drive/app-credential")
def upsert_google_app_credentials(
app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user)
) -> StatusResponse:
try:
upsert_google_app_cred(app_credentials, DocumentSource.GOOGLE_DRIVE)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully saved Google App Credentials"
)
@router.delete("/admin/connector/google-drive/app-credential")
def delete_google_app_credentials(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
try:
delete_google_app_cred(DocumentSource.GOOGLE_DRIVE)
cleanup_google_drive_credentials(db_session=db_session)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully deleted Google App Credentials"
)
@router.get("/admin/connector/gmail/service-account-key")
def check_google_service_gmail_account_key_exist(
_: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
try:
return {
"service_account_email": get_service_account_key(
DocumentSource.GMAIL
).client_email
}
except KvKeyNotFoundError:
raise HTTPException(
status_code=404, detail="Google Service Account Key not found"
)
@router.put("/admin/connector/gmail/service-account-key")
def upsert_google_service_gmail_account_key(
service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
) -> StatusResponse:
try:
upsert_service_account_key(service_account_key, DocumentSource.GMAIL)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully saved Google Service Account Key"
)
@router.delete("/admin/connector/gmail/service-account-key")
def delete_google_service_gmail_account_key(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
try:
delete_service_account_key(DocumentSource.GMAIL)
cleanup_gmail_credentials(db_session=db_session)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully deleted Google Service Account Key"
)
@router.get("/admin/connector/google-drive/service-account-key")
def check_google_service_account_key_exist(
_: User = Depends(current_curator_or_admin_user),
) -> dict[str, str]:
try:
return {
"service_account_email": get_service_account_key(
DocumentSource.GOOGLE_DRIVE
).client_email
}
except KvKeyNotFoundError:
raise HTTPException(
status_code=404, detail="Google Service Account Key not found"
)
@router.put("/admin/connector/google-drive/service-account-key")
def upsert_google_service_account_key(
service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user)
) -> StatusResponse:
try:
upsert_service_account_key(service_account_key, DocumentSource.GOOGLE_DRIVE)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully saved Google Service Account Key"
)
@router.delete("/admin/connector/google-drive/service-account-key")
def delete_google_service_account_key(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
try:
delete_service_account_key(DocumentSource.GOOGLE_DRIVE)
cleanup_google_drive_credentials(db_session=db_session)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
return StatusResponse(
success=True, message="Successfully deleted Google Service Account Key"
)
@router.put("/admin/connector/google-drive/service-account-credential")
def upsert_service_account_credential(
service_account_credential_request: GoogleServiceAccountCredentialRequest,
user: User | None = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
"""Special API which allows the creation of a credential for a service account.
Combines the input with the saved service account key to create an entry in the
`Credential` table."""
try:
credential_base = build_service_account_creds(
DocumentSource.GOOGLE_DRIVE,
primary_admin_email=service_account_credential_request.google_primary_admin,
name="Service Account (uploaded)",
)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
# first delete all existing service account credentials
delete_service_account_credentials(user, db_session, DocumentSource.GOOGLE_DRIVE)
# `user=None` since this credential is not a personal credential
credential = create_credential(
credential_data=credential_base, user=user, db_session=db_session
)
return ObjectCreationIdResponse(id=credential.id)
@router.put("/admin/connector/gmail/service-account-credential")
def upsert_gmail_service_account_credential(
service_account_credential_request: GoogleServiceAccountCredentialRequest,
user: User | None = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
"""Special API which allows the creation of a credential for a service account.
Combines the input with the saved service account key to create an entry in the
`Credential` table."""
try:
credential_base = build_service_account_creds(
DocumentSource.GMAIL,
primary_admin_email=service_account_credential_request.google_primary_admin,
)
except KvKeyNotFoundError as e:
raise HTTPException(status_code=400, detail=str(e))
# first delete all existing service account credentials
delete_service_account_credentials(user, db_session, DocumentSource.GMAIL)
# `user=None` since this credential is not a personal credential
credential = create_credential(
credential_data=credential_base, user=user, db_session=db_session
)
return ObjectCreationIdResponse(id=credential.id)
@router.get("/admin/connector/google-drive/check-auth/{credential_id}")
def check_drive_tokens(
credential_id: int,
user: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> AuthStatus:
db_credentials = fetch_credential_by_id_for_user(credential_id, user, db_session)
if (
not db_credentials
or DB_CREDENTIALS_DICT_TOKEN_KEY not in db_credentials.credential_json
):
return AuthStatus(authenticated=False)
token_json_str = str(db_credentials.credential_json[DB_CREDENTIALS_DICT_TOKEN_KEY])
google_drive_creds = get_google_oauth_creds(
token_json_str=token_json_str,
source=DocumentSource.GOOGLE_DRIVE,
)
if google_drive_creds is None:
return AuthStatus(authenticated=False)
return AuthStatus(authenticated=True)
def upload_files(files: list[UploadFile], db_session: Session) -> FileUploadResponse:
for file in files:
if not file.filename:
raise HTTPException(status_code=400, detail="File name cannot be empty")
# Skip directories and known macOS metadata entries
def should_process_file(file_path: str) -> bool:
normalized_path = os.path.normpath(file_path)
return not any(part.startswith(".") for part in normalized_path.split(os.sep))
try:
file_store = get_default_file_store(db_session)
deduped_file_paths = []
for file in files:
if file.content_type and file.content_type.startswith("application/zip"):
with zipfile.ZipFile(file.file, "r") as zf:
for file_info in zf.namelist():
if zf.getinfo(file_info).is_dir():
continue
if not should_process_file(file_info):
continue
sub_file_bytes = zf.read(file_info)
sub_file_name = os.path.join(str(uuid.uuid4()), file_info)
deduped_file_paths.append(sub_file_name)
mime_type, __ = mimetypes.guess_type(file_info)
if mime_type is None:
mime_type = "application/octet-stream"
file_store.save_file(
file_name=sub_file_name,
content=BytesIO(sub_file_bytes),
display_name=os.path.basename(file_info),
file_origin=FileOrigin.CONNECTOR,
file_type=mime_type,
)
continue
# Special handling for docx files - only store the plaintext version
if file.content_type and file.content_type.startswith(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
file_path = convert_docx_to_txt(file, file_store)
deduped_file_paths.append(file_path)
continue
# Default handling for all other file types
file_path = os.path.join(str(uuid.uuid4()), cast(str, file.filename))
deduped_file_paths.append(file_path)
file_store.save_file(
file_name=file_path,
content=file.file,
display_name=file.filename,
file_origin=FileOrigin.CONNECTOR,
file_type=file.content_type or "text/plain",
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
return FileUploadResponse(file_paths=deduped_file_paths)
@router.post("/admin/connector/file/upload")
def upload_files_api(
files: list[UploadFile],
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> FileUploadResponse:
return upload_files(files, db_session)
@router.get("/admin/connector")
def get_connectors_by_credential(
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
credential: int | None = None,
) -> list[ConnectorSnapshot]:
"""Get a list of connectors. Allow filtering by a specific credential id."""
connectors = fetch_connectors(db_session)
filtered_connectors = []
for connector in connectors:
if connector.source == DocumentSource.INGESTION_API:
# don't include INGESTION_API, as it's a system level
# connector not manageable by the user
continue
if credential is not None:
found = False
for cc_pair in connector.credentials:
if credential == cc_pair.credential_id:
found = True
break
if not found:
continue
filtered_connectors.append(ConnectorSnapshot.from_connector_db_model(connector))
return filtered_connectors
# Retrieves most recent failure cases for connectors that are currently failing
@router.get("/admin/connector/failed-indexing-status")
def get_currently_failed_indexing_status(
secondary_index: bool = False,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
get_editable: bool = Query(
False, description="If true, return editable document sets"
),
) -> list[FailedConnectorIndexingStatus]:
# Get the latest failed indexing attempts
latest_failed_indexing_attempts = get_latest_index_attempts_by_status(
secondary_index=secondary_index,
db_session=db_session,
status=IndexingStatus.FAILED,
)
# Get the latest successful indexing attempts
latest_successful_indexing_attempts = get_latest_index_attempts_by_status(
secondary_index=secondary_index,
db_session=db_session,
status=IndexingStatus.SUCCESS,
)
# Get all connector credential pairs
cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
get_editable=get_editable,
)
# Filter out failed attempts that have a more recent successful attempt
filtered_failed_attempts = [
failed_attempt
for failed_attempt in latest_failed_indexing_attempts
if not any(
success_attempt.connector_credential_pair_id
== failed_attempt.connector_credential_pair_id
and success_attempt.time_updated > failed_attempt.time_updated
for success_attempt in latest_successful_indexing_attempts
)
]
# Filter cc_pairs to include only those with failed attempts
cc_pairs = [
cc_pair
for cc_pair in cc_pairs
if any(
attempt.connector_credential_pair == cc_pair
for attempt in filtered_failed_attempts
)
]
# Create a mapping of cc_pair_id to its latest failed index attempt
cc_pair_to_latest_index_attempt = {
attempt.connector_credential_pair_id: attempt
for attempt in filtered_failed_attempts
}
indexing_statuses = []
for cc_pair in cc_pairs:
# Skip DefaultCCPair
if cc_pair.name == "DefaultCCPair":
continue
latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
indexing_statuses.append(
FailedConnectorIndexingStatus(
cc_pair_id=cc_pair.id,
name=cc_pair.name,
error_msg=(
latest_index_attempt.error_msg if latest_index_attempt else None
),
connector_id=cc_pair.connector_id,
credential_id=cc_pair.credential_id,
is_deletable=check_deletion_attempt_is_allowed(
connector_credential_pair=cc_pair,
db_session=db_session,
allow_scheduled=True,
)
is None,
)
)
return indexing_statuses
@router.get("/admin/connector/status")
def get_connector_status(
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> list[ConnectorStatus]:
cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
user=user,
eager_load_connector=True,
eager_load_credential=True,
)
group_cc_pair_relationships = get_cc_pair_groups_for_ids(
db_session=db_session,
cc_pair_ids=[cc_pair.id for cc_pair in cc_pairs],
)
group_cc_pair_relationships_dict: dict[int, list[int]] = {}
for relationship in group_cc_pair_relationships:
group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(
relationship.user_group_id
)
return [
ConnectorStatus(
cc_pair_id=cc_pair.id,
name=cc_pair.name,
connector=ConnectorSnapshot.from_connector_db_model(cc_pair.connector),
credential=CredentialSnapshot.from_credential_db_model(cc_pair.credential),
access_type=cc_pair.access_type,
groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),
)
for cc_pair in cc_pairs
if cc_pair.name != "DefaultCCPair" and cc_pair.connector and cc_pair.credential
]
@router.get("/admin/connector/indexing-status")
def get_connector_indexing_status(
secondary_index: bool = False,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
get_editable: bool = Query(
False, description="If true, return editable document sets"
),
) -> list[ConnectorIndexingStatus]:
tenant_id = get_current_tenant_id()
indexing_statuses: list[ConnectorIndexingStatus] = []
if MOCK_CONNECTOR_FILE_PATH:
import json
with open(MOCK_CONNECTOR_FILE_PATH, "r") as f:
raw_data = json.load(f)
connector_indexing_statuses = [
ConnectorIndexingStatus(**status) for status in raw_data
]
return connector_indexing_statuses
# NOTE: If the connector is deleting behind the scenes,
# accessing cc_pairs can be inconsistent and members like
# connector or credential may be None.
# Additional checks are done to make sure the connector and credential still exist.
# TODO: make this one query ... possibly eager load or wrap in a read transaction
# to avoid the complexity of trying to error check throughout the function
# see https://stackoverflow.com/questions/75758327/
# sqlalchemy-method-connection-for-bind-is-already-in-progress
# for why we can't pass in the current db_session to these functions
(
cc_pairs,
latest_index_attempts,
latest_finished_index_attempts,
) = run_functions_tuples_in_parallel(
[
(
# Gets the connector/credential pairs for the user
get_connector_credential_pairs_for_user_parallel,
(user, get_editable, None, True, True, True),
),
(
# Gets the most recent index attempt for each connector/credential pair
get_latest_index_attempts_parallel,
(secondary_index, True, False),
),
(
# Gets the most recent FINISHED index attempt for each connector/credential pair
get_latest_index_attempts_parallel,
(secondary_index, True, True),
),
]
)
cc_pairs = cast(list[ConnectorCredentialPair], cc_pairs)
latest_index_attempts = cast(list[IndexAttempt], latest_index_attempts)
cc_pair_to_latest_index_attempt = {
(
index_attempt.connector_credential_pair.connector_id,
index_attempt.connector_credential_pair.credential_id,
): index_attempt
for index_attempt in latest_index_attempts
}
cc_pair_to_latest_finished_index_attempt = {
(
index_attempt.connector_credential_pair.connector_id,
index_attempt.connector_credential_pair.credential_id,
): index_attempt
for index_attempt in latest_finished_index_attempts
}
document_count_info, group_cc_pair_relationships = run_functions_tuples_in_parallel(
[
(
get_document_counts_for_cc_pairs_parallel,
(
[
ConnectorCredentialPairIdentifier(
connector_id=cc_pair.connector_id,
credential_id=cc_pair.credential_id,
)
for cc_pair in cc_pairs
],
),
),
(
get_cc_pair_groups_for_ids_parallel,
([cc_pair.id for cc_pair in cc_pairs],),
),
]
)
document_count_info = cast(list[tuple[int, int, int]], document_count_info)
group_cc_pair_relationships = cast(
list[UserGroup__ConnectorCredentialPair], group_cc_pair_relationships
)
cc_pair_to_document_cnt = {
(connector_id, credential_id): cnt
for connector_id, credential_id, cnt in document_count_info
}
group_cc_pair_relationships_dict: dict[int, list[int]] = {}
for relationship in group_cc_pair_relationships:
group_cc_pair_relationships_dict.setdefault(relationship.cc_pair_id, []).append(
relationship.user_group_id
)
connector_to_cc_pair_ids: dict[int, list[int]] = {}
for cc_pair in cc_pairs:
connector_to_cc_pair_ids.setdefault(cc_pair.connector_id, []).append(cc_pair.id)
get_search_settings = (
get_secondary_search_settings
if secondary_index
else get_current_search_settings
)
search_settings = get_search_settings(db_session)
for cc_pair in cc_pairs:
# TODO remove this to enable ingestion API
if cc_pair.name == "DefaultCCPair":
continue
connector = cc_pair.connector
credential = cc_pair.credential
if not connector or not credential:
# This may happen if background deletion is happening
continue
in_progress = False
if search_settings:
redis_connector = RedisConnector(tenant_id, cc_pair.id)
redis_connector_index = redis_connector.new_index(search_settings.id)
if redis_connector_index.fenced:
in_progress = True
latest_index_attempt = cc_pair_to_latest_index_attempt.get(
(connector.id, credential.id)
)
latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
(connector.id, credential.id)
)
# Safely get the owner email, handling detached instances
owner_email = ""
try:
if credential.user:
owner_email = credential.user.email
except Exception:
# If there's any error accessing the user (like DetachedInstanceError),
# we'll just use an empty string for the owner email
pass
indexing_statuses.append(
ConnectorIndexingStatus(
cc_pair_id=cc_pair.id,
name=cc_pair.name,
in_progress=in_progress,
cc_pair_status=cc_pair.status,
in_repeated_error_state=cc_pair.in_repeated_error_state,
connector=ConnectorSnapshot.from_connector_db_model(
connector, connector_to_cc_pair_ids.get(connector.id, [])
),
credential=CredentialSnapshot.from_credential_db_model(credential),
access_type=cc_pair.access_type,
owner=owner_email,
groups=group_cc_pair_relationships_dict.get(cc_pair.id, []),
last_finished_status=(
latest_finished_attempt.status if latest_finished_attempt else None
),
last_status=(
latest_index_attempt.status if latest_index_attempt else None
),
last_success=cc_pair.last_successful_index_time,
docs_indexed=cc_pair_to_document_cnt.get(
(connector.id, credential.id), 0
),
latest_index_attempt=(
IndexAttemptSnapshot.from_index_attempt_db_model(
latest_index_attempt
)
if latest_index_attempt
else None
),
)
)
# Visiting admin page brings the user to the current connectors page which calls this endpoint
create_milestone_and_report(
user=user,
distinct_id=user.email if user else tenant_id or "N/A",
event_type=MilestoneRecordType.VISITED_ADMIN_PAGE,
properties=None,
db_session=db_session,
)
return indexing_statuses
def _validate_connector_allowed(source: DocumentSource) -> None:
valid_connectors = [
x for x in ENABLED_CONNECTOR_TYPES.replace("_", "").split(",") if x
]
if not valid_connectors:
return
for connector_type in valid_connectors:
if source.value.lower().replace("_", "") == connector_type:
return
raise ValueError(
"This connector type has been disabled by your system admin. "
"Please contact them to get it enabled if you wish to use it."
)
@router.post("/admin/connector")
def create_connector_from_model(
connector_data: ConnectorUpdateRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> ObjectCreationIdResponse:
tenant_id = get_current_tenant_id()
try:
_validate_connector_allowed(connector_data.source)
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
)(
db_session=db_session,
user=user,
target_group_ids=connector_data.groups,
object_is_public=connector_data.access_type == AccessType.PUBLIC,
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
)
connector_base = connector_data.to_connector_base()
connector_response = create_connector(
db_session=db_session,
connector_data=connector_base,
)
create_milestone_and_report(
user=user,
distinct_id=user.email if user else tenant_id or "N/A",
event_type=MilestoneRecordType.CREATED_CONNECTOR,
properties=None,
db_session=db_session,
)
return connector_response
except ValueError as e:
logger.error(f"Error creating connector: {e}")
raise HTTPException(status_code=400, detail=str(e))
@router.post("/admin/connector-with-mock-credential")
def create_connector_with_mock_credential(
connector_data: ConnectorUpdateRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
tenant_id = get_current_tenant_id()
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
)(
db_session=db_session,
user=user,
target_group_ids=connector_data.groups,
object_is_public=connector_data.access_type == AccessType.PUBLIC,
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
)
try:
_validate_connector_allowed(connector_data.source)
connector_response = create_connector(
db_session=db_session,
connector_data=connector_data,
)
mock_credential = CredentialBase(
credential_json={},
admin_public=True,
source=connector_data.source,
)
credential = create_credential(
credential_data=mock_credential,
user=user,
db_session=db_session,
)
# Store the created connector and credential IDs
connector_id = cast(int, connector_response.id)
credential_id = credential.id
validate_ccpair_for_user(
connector_id=connector_id,
credential_id=credential_id,
db_session=db_session,
)
response = add_credential_to_connector(
db_session=db_session,
user=user,
connector_id=connector_id,
credential_id=credential_id,
access_type=connector_data.access_type,
cc_pair_name=connector_data.name,
groups=connector_data.groups,
)
# trigger indexing immediately
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=OnyxCeleryPriority.HIGH,
kwargs={"tenant_id": tenant_id},
)
logger.info(
f"create_connector_with_mock_credential - running check_for_indexing: "
f"cc_pair={response.data}"
)
create_milestone_and_report(
user=user,
distinct_id=user.email if user else tenant_id or "N/A",
event_type=MilestoneRecordType.CREATED_CONNECTOR,
properties=None,
db_session=db_session,
)
return response
except ConnectorValidationError as e:
raise HTTPException(
status_code=400, detail="Connector validation error: " + str(e)
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.patch("/admin/connector/{connector_id}")
def update_connector_from_model(
connector_id: int,
connector_data: ConnectorUpdateRequest,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
try:
_validate_connector_allowed(connector_data.source)
fetch_ee_implementation_or_noop(
"onyx.db.user_group", "validate_object_creation_for_user", None
)(
db_session=db_session,
user=user,
target_group_ids=connector_data.groups,
object_is_public=connector_data.access_type == AccessType.PUBLIC,
object_is_perm_sync=connector_data.access_type == AccessType.SYNC,
)
connector_base = connector_data.to_connector_base()
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
updated_connector = update_connector(connector_id, connector_base, db_session)
if updated_connector is None:
raise HTTPException(
status_code=404, detail=f"Connector {connector_id} does not exist"
)
return ConnectorSnapshot(
id=updated_connector.id,
name=updated_connector.name,
source=updated_connector.source,
input_type=updated_connector.input_type,
connector_specific_config=updated_connector.connector_specific_config,
refresh_freq=updated_connector.refresh_freq,
prune_freq=updated_connector.prune_freq,
credential_ids=[
association.credential.id for association in updated_connector.credentials
],
indexing_start=updated_connector.indexing_start,
time_created=updated_connector.time_created,
time_updated=updated_connector.time_updated,
)
@router.delete("/admin/connector/{connector_id}", response_model=StatusResponse[int])
def delete_connector_by_id(
connector_id: int,
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
try:
with db_session.begin():
return delete_connector(
db_session=db_session,
connector_id=connector_id,
)
except AssertionError:
raise HTTPException(status_code=400, detail="Connector is not deletable")
@router.post("/admin/connector/run-once")
def connector_run_once(
run_info: RunConnectorRequest,
_: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
) -> StatusResponse[int]:
"""Used to trigger indexing on a set of cc_pairs associated with a
single connector."""
tenant_id = get_current_tenant_id()
connector_id = run_info.connector_id
specified_credential_ids = run_info.credential_ids
try:
possible_credential_ids = get_connector_credential_ids(
run_info.connector_id, db_session
)
except ValueError:
raise HTTPException(
status_code=404,
detail=f"Connector by id {connector_id} does not exist.",
)
if not specified_credential_ids:
credential_ids = possible_credential_ids
else:
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
raise HTTPException(
status_code=400,
detail="Not all specified credentials are associated with connector",
)
if not credential_ids:
raise HTTPException(
status_code=400,
detail="Connector has no valid credentials, cannot create index attempts.",
)
try:
num_triggers = trigger_indexing_for_cc_pair(
credential_ids,
connector_id,
run_info.from_beginning,
tenant_id,
db_session,
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
logger.info("connector_run_once - running check_for_indexing")
msg = f"Marked {num_triggers} index attempts with indexing triggers."
return StatusResponse(
success=True,
message=msg,
data=num_triggers,
)
"""Endpoints for basic users"""
@router.get("/connector/gmail/authorize/{credential_id}")
def gmail_auth(
response: Response, credential_id: str, _: User = Depends(current_user)
) -> AuthUrl:
# set a cookie that we can read in the callback (used for `verify_csrf`)
response.set_cookie(
key=_GMAIL_CREDENTIAL_ID_COOKIE_NAME,
value=credential_id,
httponly=True,
max_age=600,
)
return AuthUrl(auth_url=get_auth_url(int(credential_id), DocumentSource.GMAIL))
@router.get("/connector/google-drive/authorize/{credential_id}")
def google_drive_auth(
response: Response, credential_id: str, _: User = Depends(current_user)
) -> AuthUrl:
# set a cookie that we can read in the callback (used for `verify_csrf`)
response.set_cookie(
key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME,
value=credential_id,
httponly=True,
max_age=600,
)
return AuthUrl(
auth_url=get_auth_url(int(credential_id), DocumentSource.GOOGLE_DRIVE)
)
@router.get("/connector/gmail/callback")
def gmail_callback(
request: Request,
callback: GmailCallback = Depends(),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
credential_id_cookie = request.cookies.get(_GMAIL_CREDENTIAL_ID_COOKIE_NAME)
if credential_id_cookie is None or not credential_id_cookie.isdigit():
raise HTTPException(
status_code=401, detail="Request did not pass CSRF verification."
)
credential_id = int(credential_id_cookie)
verify_csrf(credential_id, callback.state)
credentials: Credentials | None = update_credential_access_tokens(
callback.code,
credential_id,
user,
db_session,
DocumentSource.GMAIL,
GoogleOAuthAuthenticationMethod.UPLOADED,
)
if credentials is None:
raise HTTPException(
status_code=500, detail="Unable to fetch Gmail access tokens"
)
return StatusResponse(success=True, message="Updated Gmail access tokens")
@router.get("/connector/google-drive/callback")
def google_drive_callback(
request: Request,
callback: GDriveCallback = Depends(),
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StatusResponse:
credential_id_cookie = request.cookies.get(_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME)
if credential_id_cookie is None or not credential_id_cookie.isdigit():
raise HTTPException(
status_code=401, detail="Request did not pass CSRF verification."
)
credential_id = int(credential_id_cookie)
verify_csrf(credential_id, callback.state)
credentials: Credentials | None = update_credential_access_tokens(
callback.code,
credential_id,
user,
db_session,
DocumentSource.GOOGLE_DRIVE,
GoogleOAuthAuthenticationMethod.UPLOADED,
)
if credentials is None:
raise HTTPException(
status_code=500, detail="Unable to fetch Google Drive access tokens"
)
return StatusResponse(success=True, message="Updated Google Drive access tokens")
@router.get("/connector")
def get_connectors(
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> list[ConnectorSnapshot]:
connectors = fetch_connectors(db_session)
return [
ConnectorSnapshot.from_connector_db_model(connector)
for connector in connectors
# don't include INGESTION_API, as it's not a "real"
# connector like those created by the user
if connector.source != DocumentSource.INGESTION_API
]
@router.get("/connector/{connector_id}")
def get_connector_by_id(
connector_id: int,
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> ConnectorSnapshot | StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session)
if connector is None:
raise HTTPException(
status_code=404, detail=f"Connector {connector_id} does not exist"
)
return ConnectorSnapshot(
id=connector.id,
name=connector.name,
source=connector.source,
indexing_start=connector.indexing_start,
input_type=connector.input_type,
connector_specific_config=connector.connector_specific_config,
refresh_freq=connector.refresh_freq,
prune_freq=connector.prune_freq,
credential_ids=[
association.credential.id for association in connector.credentials
],
time_created=connector.time_created,
time_updated=connector.time_updated,
)
class BasicCCPairInfo(BaseModel):
has_successful_run: bool
source: DocumentSource
@router.get("/connector-status")
def get_basic_connector_indexing_status(
user: User = Depends(current_chat_accessible_user),
db_session: Session = Depends(get_session),
) -> list[BasicCCPairInfo]:
cc_pairs = get_connector_credential_pairs_for_user(
db_session=db_session,
eager_load_connector=True,
get_editable=False,
user=user,
)
return [
BasicCCPairInfo(
has_successful_run=cc_pair.last_successful_index_time is not None,
source=cc_pair.connector.source,
)
for cc_pair in cc_pairs
if cc_pair.connector.source != DocumentSource.INGESTION_API
]
def trigger_indexing_for_cc_pair(
specified_credential_ids: list[int],
connector_id: int,
from_beginning: bool,
tenant_id: str,
db_session: Session,
is_user_file: bool = False,
) -> int:
try:
possible_credential_ids = get_connector_credential_ids(connector_id, db_session)
except ValueError as e:
raise ValueError(f"Connector by id {connector_id} does not exist: {str(e)}")
if not specified_credential_ids:
credential_ids = possible_credential_ids
else:
if set(specified_credential_ids).issubset(set(possible_credential_ids)):
credential_ids = specified_credential_ids
else:
raise ValueError(
"Not all specified credentials are associated with connector"
)
if not credential_ids:
raise ValueError(
"Connector has no valid credentials, cannot create index attempts."
)
# Prevents index attempts for cc pairs that already have an index attempt currently running
skipped_credentials = [
credential_id
for credential_id in credential_ids
if get_index_attempts_for_cc_pair(
cc_pair_identifier=ConnectorCredentialPairIdentifier(
connector_id=connector_id,
credential_id=credential_id,
),
only_current=True,
db_session=db_session,
disinclude_finished=True,
)
]
connector_credential_pairs = [
get_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
)
for credential_id in credential_ids
if credential_id not in skipped_credentials
]
num_triggers = 0
for cc_pair in connector_credential_pairs:
if cc_pair is not None:
indexing_mode = IndexingMode.UPDATE
if from_beginning:
indexing_mode = IndexingMode.REINDEX
mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session)
num_triggers += 1
logger.info(
f"connector_run_once - marking cc_pair with indexing trigger: "
f"connector={connector_id} "
f"cc_pair={cc_pair.id} "
f"indexing_trigger={indexing_mode}"
)
# run the beat task to pick up the triggers immediately
priority = OnyxCeleryPriority.HIGHEST if is_user_file else OnyxCeleryPriority.HIGH
logger.info(f"Sending indexing check task with priority {priority}")
client_app.send_task(
OnyxCeleryTask.CHECK_FOR_INDEXING,
priority=priority,
kwargs={"tenant_id": tenant_id},
)
return num_triggers