mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-08 13:40:46 +02:00
Reduce errors in workers (#3962)
This commit is contained in:
@ -478,14 +478,15 @@ def update_external_document_permissions_task(
|
||||
)
|
||||
doc_id = document_external_access.doc_id
|
||||
external_access = document_external_access.external_access
|
||||
|
||||
try:
|
||||
with get_session_with_tenant(tenant_id) as db_session:
|
||||
# Add the users to the DB if they don't exist
|
||||
batch_add_ext_perm_user_if_not_exists(
|
||||
db_session=db_session,
|
||||
emails=list(external_access.external_user_emails),
|
||||
continue_on_error=True,
|
||||
)
|
||||
# Then we upsert the document's external permissions in postgres
|
||||
# Then upsert the document's external permissions
|
||||
created_new_doc = upsert_document_external_perms(
|
||||
db_session=db_session,
|
||||
doc_id=doc_id,
|
||||
@ -509,11 +510,11 @@ def update_external_document_permissions_task(
|
||||
f"action=update_permissions "
|
||||
f"elapsed={elapsed:.2f}"
|
||||
)
|
||||
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"Exception in update_external_document_permissions_task: "
|
||||
f"connector_id={connector_id} "
|
||||
f"doc_id={doc_id}"
|
||||
f"connector_id={connector_id} doc_id={doc_id}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
@ -421,6 +421,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
|
||||
- Throughput (docs/min) (only if success)
|
||||
- Raw start/end times for each sync
|
||||
"""
|
||||
|
||||
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
|
||||
|
||||
# Get all sync records that ended in the last hour
|
||||
@ -588,6 +589,10 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
|
||||
entity = db_session.scalar(
|
||||
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
|
||||
)
|
||||
else:
|
||||
# Only user groups and document set sync records have
|
||||
# an associated entity we can use for latency metrics
|
||||
continue
|
||||
|
||||
if entity is None:
|
||||
task_logger.error(
|
||||
@ -778,7 +783,7 @@ def cloud_check_alembic() -> bool | None:
|
||||
|
||||
tenant_to_revision[tenant_id] = result_scalar
|
||||
except Exception:
|
||||
task_logger.warning(f"Tenant {tenant_id} has no revision!")
|
||||
task_logger.error(f"Tenant {tenant_id} has no revision!")
|
||||
tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION
|
||||
|
||||
# get the total count of each revision
|
||||
|
@ -39,19 +39,6 @@ def get_message_link(
|
||||
return permalink
|
||||
|
||||
|
||||
def _make_slack_api_call_logged(
|
||||
call: Callable[..., SlackResponse],
|
||||
) -> Callable[..., SlackResponse]:
|
||||
@wraps(call)
|
||||
def logged_call(**kwargs: Any) -> SlackResponse:
|
||||
logger.debug(f"Making call to Slack API '{call.__name__}' with args '{kwargs}'")
|
||||
result = call(**kwargs)
|
||||
logger.debug(f"Call to Slack API '{call.__name__}' returned '{result}'")
|
||||
return result
|
||||
|
||||
return logged_call
|
||||
|
||||
|
||||
def _make_slack_api_call_paginated(
|
||||
call: Callable[..., SlackResponse],
|
||||
) -> Callable[..., Generator[dict[str, Any], None, None]]:
|
||||
@ -127,18 +114,14 @@ def make_slack_api_rate_limited(
|
||||
def make_slack_api_call_w_retries(
|
||||
call: Callable[..., SlackResponse], **kwargs: Any
|
||||
) -> SlackResponse:
|
||||
return basic_retry_wrapper(
|
||||
make_slack_api_rate_limited(_make_slack_api_call_logged(call))
|
||||
)(**kwargs)
|
||||
return basic_retry_wrapper(make_slack_api_rate_limited(call))(**kwargs)
|
||||
|
||||
|
||||
def make_paginated_slack_api_call_w_retries(
|
||||
call: Callable[..., SlackResponse], **kwargs: Any
|
||||
) -> Generator[dict[str, Any], None, None]:
|
||||
return _make_slack_api_call_paginated(
|
||||
basic_retry_wrapper(
|
||||
make_slack_api_rate_limited(_make_slack_api_call_logged(call))
|
||||
)
|
||||
basic_retry_wrapper(make_slack_api_rate_limited(call))
|
||||
)(**kwargs)
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ from fastapi import HTTPException
|
||||
from fastapi_users.password import PasswordHelper
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.sql import expression
|
||||
from sqlalchemy.sql.elements import ColumnElement
|
||||
@ -274,7 +275,7 @@ def _generate_ext_permissioned_user(email: str) -> User:
|
||||
|
||||
|
||||
def batch_add_ext_perm_user_if_not_exists(
|
||||
db_session: Session, emails: list[str]
|
||||
db_session: Session, emails: list[str], continue_on_error: bool = False
|
||||
) -> list[User]:
|
||||
lower_emails = [email.lower() for email in emails]
|
||||
found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)
|
||||
@ -283,10 +284,23 @@ def batch_add_ext_perm_user_if_not_exists(
|
||||
for email in missing_lower_emails:
|
||||
new_users.append(_generate_ext_permissioned_user(email=email))
|
||||
|
||||
db_session.add_all(new_users)
|
||||
db_session.commit()
|
||||
|
||||
return found_users + new_users
|
||||
try:
|
||||
db_session.add_all(new_users)
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
db_session.rollback()
|
||||
if not continue_on_error:
|
||||
raise
|
||||
for user in new_users:
|
||||
try:
|
||||
db_session.add(user)
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
db_session.rollback()
|
||||
continue
|
||||
# Fetch all users again to ensure we have the most up-to-date list
|
||||
all_users, _ = _get_users_by_emails(db_session, lower_emails)
|
||||
return all_users
|
||||
|
||||
|
||||
def delete_user_from_db(
|
||||
|
@ -17,6 +17,7 @@ from uuid import UUID
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
from retry import retry
|
||||
|
||||
from onyx.configs.chat_configs import DOC_TIME_DECAY
|
||||
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
||||
@ -549,6 +550,11 @@ class VespaIndex(DocumentIndex):
|
||||
time.monotonic() - update_start,
|
||||
)
|
||||
|
||||
@retry(
|
||||
tries=3,
|
||||
delay=1,
|
||||
backoff=2,
|
||||
)
|
||||
def _update_single_chunk(
|
||||
self,
|
||||
doc_chunk_id: UUID,
|
||||
@ -559,6 +565,7 @@ class VespaIndex(DocumentIndex):
|
||||
) -> None:
|
||||
"""
|
||||
Update a single "chunk" (document) in Vespa using its chunk ID.
|
||||
Retries if we encounter transient HTTPStatusError (e.g., overload).
|
||||
"""
|
||||
|
||||
update_dict: dict[str, dict] = {"fields": {}}
|
||||
@ -567,13 +574,11 @@ class VespaIndex(DocumentIndex):
|
||||
update_dict["fields"][BOOST] = {"assign": fields.boost}
|
||||
|
||||
if fields.document_sets is not None:
|
||||
# WeightedSet<string> needs a map { item: weight, ... }
|
||||
update_dict["fields"][DOCUMENT_SETS] = {
|
||||
"assign": {document_set: 1 for document_set in fields.document_sets}
|
||||
}
|
||||
|
||||
if fields.access is not None:
|
||||
# Similar to above
|
||||
update_dict["fields"][ACCESS_CONTROL_LIST] = {
|
||||
"assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
|
||||
}
|
||||
@ -585,7 +590,10 @@ class VespaIndex(DocumentIndex):
|
||||
logger.error("Update request received but nothing to update.")
|
||||
return
|
||||
|
||||
vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true"
|
||||
vespa_url = (
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
|
||||
"?create=true"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = http_client.put(
|
||||
@ -595,8 +603,11 @@ class VespaIndex(DocumentIndex):
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_message = f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). Details: {e.response.text}"
|
||||
logger.error(error_message)
|
||||
logger.error(
|
||||
f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). "
|
||||
f"Details: {e.response.text}"
|
||||
)
|
||||
# Re-raise so the @retry decorator will catch and retry
|
||||
raise
|
||||
|
||||
def update_single(
|
||||
|
Reference in New Issue
Block a user