mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Reduce errors in workers (#3962)
This commit is contained in:
@@ -478,14 +478,15 @@ def update_external_document_permissions_task(
|
|||||||
)
|
)
|
||||||
doc_id = document_external_access.doc_id
|
doc_id = document_external_access.doc_id
|
||||||
external_access = document_external_access.external_access
|
external_access = document_external_access.external_access
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with get_session_with_tenant(tenant_id) as db_session:
|
with get_session_with_tenant(tenant_id) as db_session:
|
||||||
# Add the users to the DB if they don't exist
|
|
||||||
batch_add_ext_perm_user_if_not_exists(
|
batch_add_ext_perm_user_if_not_exists(
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
emails=list(external_access.external_user_emails),
|
emails=list(external_access.external_user_emails),
|
||||||
|
continue_on_error=True,
|
||||||
)
|
)
|
||||||
# Then we upsert the document's external permissions in postgres
|
# Then upsert the document's external permissions
|
||||||
created_new_doc = upsert_document_external_perms(
|
created_new_doc = upsert_document_external_perms(
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
doc_id=doc_id,
|
doc_id=doc_id,
|
||||||
@@ -509,11 +510,11 @@ def update_external_document_permissions_task(
|
|||||||
f"action=update_permissions "
|
f"action=update_permissions "
|
||||||
f"elapsed={elapsed:.2f}"
|
f"elapsed={elapsed:.2f}"
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
task_logger.exception(
|
task_logger.exception(
|
||||||
f"Exception in update_external_document_permissions_task: "
|
f"Exception in update_external_document_permissions_task: "
|
||||||
f"connector_id={connector_id} "
|
f"connector_id={connector_id} doc_id={doc_id}"
|
||||||
f"doc_id={doc_id}"
|
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@@ -421,6 +421,7 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
|
|||||||
- Throughput (docs/min) (only if success)
|
- Throughput (docs/min) (only if success)
|
||||||
- Raw start/end times for each sync
|
- Raw start/end times for each sync
|
||||||
"""
|
"""
|
||||||
|
|
||||||
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
|
one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)
|
||||||
|
|
||||||
# Get all sync records that ended in the last hour
|
# Get all sync records that ended in the last hour
|
||||||
@@ -588,6 +589,10 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
|
|||||||
entity = db_session.scalar(
|
entity = db_session.scalar(
|
||||||
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
|
select(UserGroup).where(UserGroup.id == sync_record.entity_id)
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# Only user groups and document set sync records have
|
||||||
|
# an associated entity we can use for latency metrics
|
||||||
|
continue
|
||||||
|
|
||||||
if entity is None:
|
if entity is None:
|
||||||
task_logger.error(
|
task_logger.error(
|
||||||
@@ -778,7 +783,7 @@ def cloud_check_alembic() -> bool | None:
|
|||||||
|
|
||||||
tenant_to_revision[tenant_id] = result_scalar
|
tenant_to_revision[tenant_id] = result_scalar
|
||||||
except Exception:
|
except Exception:
|
||||||
task_logger.warning(f"Tenant {tenant_id} has no revision!")
|
task_logger.error(f"Tenant {tenant_id} has no revision!")
|
||||||
tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION
|
tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION
|
||||||
|
|
||||||
# get the total count of each revision
|
# get the total count of each revision
|
||||||
|
@@ -39,19 +39,6 @@ def get_message_link(
|
|||||||
return permalink
|
return permalink
|
||||||
|
|
||||||
|
|
||||||
def _make_slack_api_call_logged(
|
|
||||||
call: Callable[..., SlackResponse],
|
|
||||||
) -> Callable[..., SlackResponse]:
|
|
||||||
@wraps(call)
|
|
||||||
def logged_call(**kwargs: Any) -> SlackResponse:
|
|
||||||
logger.debug(f"Making call to Slack API '{call.__name__}' with args '{kwargs}'")
|
|
||||||
result = call(**kwargs)
|
|
||||||
logger.debug(f"Call to Slack API '{call.__name__}' returned '{result}'")
|
|
||||||
return result
|
|
||||||
|
|
||||||
return logged_call
|
|
||||||
|
|
||||||
|
|
||||||
def _make_slack_api_call_paginated(
|
def _make_slack_api_call_paginated(
|
||||||
call: Callable[..., SlackResponse],
|
call: Callable[..., SlackResponse],
|
||||||
) -> Callable[..., Generator[dict[str, Any], None, None]]:
|
) -> Callable[..., Generator[dict[str, Any], None, None]]:
|
||||||
@@ -127,18 +114,14 @@ def make_slack_api_rate_limited(
|
|||||||
def make_slack_api_call_w_retries(
|
def make_slack_api_call_w_retries(
|
||||||
call: Callable[..., SlackResponse], **kwargs: Any
|
call: Callable[..., SlackResponse], **kwargs: Any
|
||||||
) -> SlackResponse:
|
) -> SlackResponse:
|
||||||
return basic_retry_wrapper(
|
return basic_retry_wrapper(make_slack_api_rate_limited(call))(**kwargs)
|
||||||
make_slack_api_rate_limited(_make_slack_api_call_logged(call))
|
|
||||||
)(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def make_paginated_slack_api_call_w_retries(
|
def make_paginated_slack_api_call_w_retries(
|
||||||
call: Callable[..., SlackResponse], **kwargs: Any
|
call: Callable[..., SlackResponse], **kwargs: Any
|
||||||
) -> Generator[dict[str, Any], None, None]:
|
) -> Generator[dict[str, Any], None, None]:
|
||||||
return _make_slack_api_call_paginated(
|
return _make_slack_api_call_paginated(
|
||||||
basic_retry_wrapper(
|
basic_retry_wrapper(make_slack_api_rate_limited(call))
|
||||||
make_slack_api_rate_limited(_make_slack_api_call_logged(call))
|
|
||||||
)
|
|
||||||
)(**kwargs)
|
)(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@ from fastapi import HTTPException
|
|||||||
from fastapi_users.password import PasswordHelper
|
from fastapi_users.password import PasswordHelper
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy.sql import expression
|
from sqlalchemy.sql import expression
|
||||||
from sqlalchemy.sql.elements import ColumnElement
|
from sqlalchemy.sql.elements import ColumnElement
|
||||||
@@ -274,7 +275,7 @@ def _generate_ext_permissioned_user(email: str) -> User:
|
|||||||
|
|
||||||
|
|
||||||
def batch_add_ext_perm_user_if_not_exists(
|
def batch_add_ext_perm_user_if_not_exists(
|
||||||
db_session: Session, emails: list[str]
|
db_session: Session, emails: list[str], continue_on_error: bool = False
|
||||||
) -> list[User]:
|
) -> list[User]:
|
||||||
lower_emails = [email.lower() for email in emails]
|
lower_emails = [email.lower() for email in emails]
|
||||||
found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)
|
found_users, missing_lower_emails = _get_users_by_emails(db_session, lower_emails)
|
||||||
@@ -283,10 +284,23 @@ def batch_add_ext_perm_user_if_not_exists(
|
|||||||
for email in missing_lower_emails:
|
for email in missing_lower_emails:
|
||||||
new_users.append(_generate_ext_permissioned_user(email=email))
|
new_users.append(_generate_ext_permissioned_user(email=email))
|
||||||
|
|
||||||
db_session.add_all(new_users)
|
try:
|
||||||
db_session.commit()
|
db_session.add_all(new_users)
|
||||||
|
db_session.commit()
|
||||||
return found_users + new_users
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
if not continue_on_error:
|
||||||
|
raise
|
||||||
|
for user in new_users:
|
||||||
|
try:
|
||||||
|
db_session.add(user)
|
||||||
|
db_session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
db_session.rollback()
|
||||||
|
continue
|
||||||
|
# Fetch all users again to ensure we have the most up-to-date list
|
||||||
|
all_users, _ = _get_users_by_emails(db_session, lower_emails)
|
||||||
|
return all_users
|
||||||
|
|
||||||
|
|
||||||
def delete_user_from_db(
|
def delete_user_from_db(
|
||||||
|
@@ -17,6 +17,7 @@ from uuid import UUID
|
|||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
import requests # type: ignore
|
import requests # type: ignore
|
||||||
|
from retry import retry
|
||||||
|
|
||||||
from onyx.configs.chat_configs import DOC_TIME_DECAY
|
from onyx.configs.chat_configs import DOC_TIME_DECAY
|
||||||
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
||||||
@@ -549,6 +550,11 @@ class VespaIndex(DocumentIndex):
|
|||||||
time.monotonic() - update_start,
|
time.monotonic() - update_start,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
tries=3,
|
||||||
|
delay=1,
|
||||||
|
backoff=2,
|
||||||
|
)
|
||||||
def _update_single_chunk(
|
def _update_single_chunk(
|
||||||
self,
|
self,
|
||||||
doc_chunk_id: UUID,
|
doc_chunk_id: UUID,
|
||||||
@@ -559,6 +565,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Update a single "chunk" (document) in Vespa using its chunk ID.
|
Update a single "chunk" (document) in Vespa using its chunk ID.
|
||||||
|
Retries if we encounter transient HTTPStatusError (e.g., overload).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
update_dict: dict[str, dict] = {"fields": {}}
|
update_dict: dict[str, dict] = {"fields": {}}
|
||||||
@@ -567,13 +574,11 @@ class VespaIndex(DocumentIndex):
|
|||||||
update_dict["fields"][BOOST] = {"assign": fields.boost}
|
update_dict["fields"][BOOST] = {"assign": fields.boost}
|
||||||
|
|
||||||
if fields.document_sets is not None:
|
if fields.document_sets is not None:
|
||||||
# WeightedSet<string> needs a map { item: weight, ... }
|
|
||||||
update_dict["fields"][DOCUMENT_SETS] = {
|
update_dict["fields"][DOCUMENT_SETS] = {
|
||||||
"assign": {document_set: 1 for document_set in fields.document_sets}
|
"assign": {document_set: 1 for document_set in fields.document_sets}
|
||||||
}
|
}
|
||||||
|
|
||||||
if fields.access is not None:
|
if fields.access is not None:
|
||||||
# Similar to above
|
|
||||||
update_dict["fields"][ACCESS_CONTROL_LIST] = {
|
update_dict["fields"][ACCESS_CONTROL_LIST] = {
|
||||||
"assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
|
"assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
|
||||||
}
|
}
|
||||||
@@ -585,7 +590,10 @@ class VespaIndex(DocumentIndex):
|
|||||||
logger.error("Update request received but nothing to update.")
|
logger.error("Update request received but nothing to update.")
|
||||||
return
|
return
|
||||||
|
|
||||||
vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true"
|
vespa_url = (
|
||||||
|
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
|
||||||
|
"?create=true"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = http_client.put(
|
resp = http_client.put(
|
||||||
@@ -595,8 +603,11 @@ class VespaIndex(DocumentIndex):
|
|||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
error_message = f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). Details: {e.response.text}"
|
logger.error(
|
||||||
logger.error(error_message)
|
f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). "
|
||||||
|
f"Details: {e.response.text}"
|
||||||
|
)
|
||||||
|
# Re-raise so the @retry decorator will catch and retry
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def update_single(
|
def update_single(
|
||||||
|
Reference in New Issue
Block a user