Reduce background errors (#4004)

This commit is contained in:
pablonyx
2025-02-14 17:35:26 -08:00
committed by GitHub
parent 3ba65214b8
commit 697f8bc1c6
6 changed files with 46 additions and 16 deletions

View File

@@ -190,9 +190,9 @@ def _build_connector_start_latency_metric(
desired_start_time = cc_pair.connector.time_created
else:
if not cc_pair.connector.refresh_freq:
task_logger.error(
"Found non-initial index attempt for connector "
"without refresh_freq. This should never happen."
task_logger.debug(
"Connector has no refresh_freq and this is a non-initial index attempt. "
"Assuming user manually triggered indexing, so we'll skip start latency metric."
)
return None

View File

@@ -78,6 +78,10 @@ logger = setup_logger()
def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None:
"""Runs periodically to check if any document needs syncing.
Generates sets of tasks for Celery if syncing is needed."""
# Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
task_logger.info("check_for_vespa_sync_task started")
time_start = time.monotonic()
r = get_redis_client(tenant_id=tenant_id)
@@ -492,13 +496,21 @@ def monitor_document_set_taskset(
task_logger.info(
f"Successfully synced document set: document_set={document_set_id}"
)
update_sync_record_status(
db_session=db_session,
entity_id=document_set_id,
sync_type=SyncType.DOCUMENT_SET,
sync_status=SyncStatus.SUCCESS,
num_docs_synced=initial_count,
)
try:
update_sync_record_status(
db_session=db_session,
entity_id=document_set_id,
sync_type=SyncType.DOCUMENT_SET,
sync_status=SyncStatus.SUCCESS,
num_docs_synced=initial_count,
)
except Exception:
task_logger.exception(
"update_sync_record_status exceptioned. "
f"document_set_id={document_set_id} "
"Resetting document set regardless."
)
rds.reset()

View File

@@ -302,7 +302,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
if e.status_code == 401:
# fail gracefully, let the other impersonations continue
# one user without access shouldn't block the entire connector
logger.exception(
logger.warning(
f"User '{user_email}' does not have access to the drive APIs."
)
return

View File

@@ -350,13 +350,14 @@ def delete_chat_session(
user_id: UUID | None,
chat_session_id: UUID,
db_session: Session,
include_deleted: bool = False,
hard_delete: bool = HARD_DELETE_CHATS,
) -> None:
chat_session = get_chat_session_by_id(
chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
)
if chat_session.deleted:
if chat_session.deleted and not include_deleted:
raise ValueError("Cannot delete an already deleted chat session")
if hard_delete:
@@ -380,7 +381,15 @@ def delete_chat_sessions_older_than(days_old: int, db_session: Session) -> None:
).fetchall()
for user_id, session_id in old_sessions:
delete_chat_session(user_id, session_id, db_session, hard_delete=True)
try:
delete_chat_session(
user_id, session_id, db_session, include_deleted=True, hard_delete=True
)
except Exception:
logger.exception(
"delete_chat_session exceptioned. "
f"user_id={user_id} session_id={session_id}"
)
def get_chat_message(

View File

@@ -238,12 +238,17 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
await close_auth_limiter()
def log_http_error(_: Request, exc: Exception) -> JSONResponse:
def log_http_error(request: Request, exc: Exception) -> JSONResponse:
status_code = getattr(exc, "status_code", 500)
if isinstance(exc, BasicAuthenticationError):
# For BasicAuthenticationError, just log a brief message without stack trace (almost always spam)
logger.warning(f"Authentication failed: {str(exc)}")
# For BasicAuthenticationError, just log a brief message without stack trace
# (almost always spammy)
logger.debug(f"Authentication failed: {str(exc)}")
elif status_code == 404 and request.url.path == "/metrics":
# Log 404 errors for the /metrics endpoint with debug level
logger.debug(f"404 error for /metrics endpoint: {str(exc)}")
elif status_code >= 400:
error_msg = f"{str(exc)}\n"

View File

@@ -1,6 +1,7 @@
from onyx.configs.constants import KV_SETTINGS_KEY
from onyx.configs.constants import OnyxRedisLocks
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.redis.redis_pool import get_redis_client
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger
@@ -17,6 +18,9 @@ def load_settings() -> Settings:
settings = (
Settings.model_validate(stored_settings) if stored_settings else Settings()
)
except KvKeyNotFoundError:
logger.error(f"No settings found in KV store for key: {KV_SETTINGS_KEY}")
settings = Settings()
except Exception as e:
logger.error(f"Error loading settings from KV store: {str(e)}")
settings = Settings()