diff --git a/backend/onyx/background/celery/tasks/monitoring/tasks.py b/backend/onyx/background/celery/tasks/monitoring/tasks.py index 5fa1440ca88..5fc6dbb455c 100644 --- a/backend/onyx/background/celery/tasks/monitoring/tasks.py +++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py @@ -190,9 +190,9 @@ def _build_connector_start_latency_metric( desired_start_time = cc_pair.connector.time_created else: if not cc_pair.connector.refresh_freq: - task_logger.error( - "Found non-initial index attempt for connector " - "without refresh_freq. This should never happen." + task_logger.debug( + "Connector has no refresh_freq and this is a non-initial index attempt. " + "Assuming user manually triggered indexing, so we'll skip start latency metric." ) return None diff --git a/backend/onyx/background/celery/tasks/vespa/tasks.py b/backend/onyx/background/celery/tasks/vespa/tasks.py index d033f9e9282..d15481ca09a 100644 --- a/backend/onyx/background/celery/tasks/vespa/tasks.py +++ b/backend/onyx/background/celery/tasks/vespa/tasks.py @@ -78,6 +78,10 @@ logger = setup_logger() def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None: """Runs periodically to check if any document needs syncing. Generates sets of tasks for Celery if syncing is needed.""" + + # Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place + task_logger.info("check_for_vespa_sync_task started") + time_start = time.monotonic() r = get_redis_client(tenant_id=tenant_id) @@ -492,13 +496,21 @@ def monitor_document_set_taskset( task_logger.info( f"Successfully synced document set: document_set={document_set_id}" ) - update_sync_record_status( - db_session=db_session, - entity_id=document_set_id, - sync_type=SyncType.DOCUMENT_SET, - sync_status=SyncStatus.SUCCESS, - num_docs_synced=initial_count, - ) + + try: + update_sync_record_status( + db_session=db_session, + entity_id=document_set_id, + sync_type=SyncType.DOCUMENT_SET, + sync_status=SyncStatus.SUCCESS, + num_docs_synced=initial_count, + ) + except Exception: + task_logger.exception( + "update_sync_record_status exceptioned. " + f"document_set_id={document_set_id} " + "Resetting document set regardless." + ) rds.reset() diff --git a/backend/onyx/connectors/google_drive/connector.py b/backend/onyx/connectors/google_drive/connector.py index b04cb58279c..1287a896076 100644 --- a/backend/onyx/connectors/google_drive/connector.py +++ b/backend/onyx/connectors/google_drive/connector.py @@ -302,7 +302,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector): if e.status_code == 401: # fail gracefully, let the other impersonations continue # one user without access shouldn't block the entire connector - logger.exception( + logger.warning( f"User '{user_email}' does not have access to the drive APIs." ) return diff --git a/backend/onyx/db/chat.py b/backend/onyx/db/chat.py index 93258a8b126..a620e8d2443 100644 --- a/backend/onyx/db/chat.py +++ b/backend/onyx/db/chat.py @@ -350,13 +350,14 @@ def delete_chat_session( user_id: UUID | None, chat_session_id: UUID, db_session: Session, + include_deleted: bool = False, hard_delete: bool = HARD_DELETE_CHATS, ) -> None: chat_session = get_chat_session_by_id( chat_session_id=chat_session_id, user_id=user_id, db_session=db_session ) - if chat_session.deleted: + if chat_session.deleted and not include_deleted: raise ValueError("Cannot delete an already deleted chat session") if hard_delete: @@ -380,7 +381,15 @@ def delete_chat_sessions_older_than(days_old: int, db_session: Session) -> None: ).fetchall() for user_id, session_id in old_sessions: - delete_chat_session(user_id, session_id, db_session, hard_delete=True) + try: + delete_chat_session( + user_id, session_id, db_session, include_deleted=True, hard_delete=True + ) + except Exception: + logger.exception( + "delete_chat_session exceptioned. " + f"user_id={user_id} session_id={session_id}" + ) def get_chat_message( diff --git a/backend/onyx/main.py b/backend/onyx/main.py index c8d0e2d1482..594870b363f 100644 --- a/backend/onyx/main.py +++ b/backend/onyx/main.py @@ -238,12 +238,17 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: await close_auth_limiter() -def log_http_error(_: Request, exc: Exception) -> JSONResponse: +def log_http_error(request: Request, exc: Exception) -> JSONResponse: status_code = getattr(exc, "status_code", 500) if isinstance(exc, BasicAuthenticationError): - # For BasicAuthenticationError, just log a brief message without stack trace (almost always spam) - logger.warning(f"Authentication failed: {str(exc)}") + # For BasicAuthenticationError, just log a brief message without stack trace + # (almost always spammy) + logger.debug(f"Authentication failed: {str(exc)}") + + elif status_code == 404 and request.url.path == "/metrics": + # Log 404 errors for the /metrics endpoint with debug level + logger.debug(f"404 error for /metrics endpoint: {str(exc)}") elif status_code >= 400: error_msg = f"{str(exc)}\n" diff --git a/backend/onyx/server/settings/store.py b/backend/onyx/server/settings/store.py index f21af46c2e9..df693043e1c 100644 --- a/backend/onyx/server/settings/store.py +++ b/backend/onyx/server/settings/store.py @@ -1,6 +1,7 @@ from onyx.configs.constants import KV_SETTINGS_KEY from onyx.configs.constants import OnyxRedisLocks from onyx.key_value_store.factory import get_kv_store +from onyx.key_value_store.interface import KvKeyNotFoundError from onyx.redis.redis_pool import get_redis_client from onyx.server.settings.models import Settings from onyx.utils.logger import setup_logger @@ -17,6 +18,9 @@ def load_settings() -> Settings: settings = ( Settings.model_validate(stored_settings) if stored_settings else Settings() ) + except KvKeyNotFoundError: + logger.error(f"No settings found in KV store for key: {KV_SETTINGS_KEY}") + settings = Settings() except Exception as e: logger.error(f"Error loading settings from KV store: {str(e)}") settings = Settings()