mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-31 02:01:16 +02:00
More logging/fixes (#3364)
* More logging for external group syncing * Fixed edge case where some spaces were not being fetched * made refresh frequency for confluence syncs configurable * clarity
This commit is contained in:
parent
53b3dcbace
commit
53428f6e9c
@ -11,6 +11,14 @@ SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/ee/danswer/configs/saml
|
||||
#####
|
||||
# Auto Permission Sync
|
||||
#####
|
||||
# In seconds, default is 5 minutes
|
||||
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
|
||||
os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
|
||||
)
|
||||
# In seconds, default is 5 minutes
|
||||
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
|
||||
os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
|
||||
)
|
||||
NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2)
|
||||
|
||||
|
||||
|
@ -10,6 +10,9 @@ from danswer.access.utils import prefix_group_w_source
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.models import User__ExternalUserGroupId
|
||||
from danswer.db.users import batch_add_ext_perm_user_if_not_exists
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class ExternalUserGroup(BaseModel):
|
||||
@ -73,7 +76,13 @@ def replace_user__ext_group_for_cc_pair(
|
||||
new_external_permissions = []
|
||||
for external_group in group_defs:
|
||||
for user_email in external_group.user_emails:
|
||||
user_id = email_id_map[user_email]
|
||||
user_id = email_id_map.get(user_email)
|
||||
if user_id is None:
|
||||
logger.warning(
|
||||
f"User in group {external_group.id}"
|
||||
f" with email {user_email} not found"
|
||||
)
|
||||
continue
|
||||
new_external_permissions.append(
|
||||
User__ExternalUserGroupId(
|
||||
user_id=user_id,
|
||||
|
@ -195,6 +195,7 @@ def _fetch_all_page_restrictions_for_space(
|
||||
confluence_client: OnyxConfluence,
|
||||
slim_docs: list[SlimDocument],
|
||||
space_permissions_by_space_key: dict[str, ExternalAccess],
|
||||
is_cloud: bool,
|
||||
) -> list[DocExternalAccess]:
|
||||
"""
|
||||
For all pages, if a page has restrictions, then use those restrictions.
|
||||
@ -222,29 +223,50 @@ def _fetch_all_page_restrictions_for_space(
|
||||
continue
|
||||
|
||||
space_key = slim_doc.perm_sync_data.get("space_key")
|
||||
if space_permissions := space_permissions_by_space_key.get(space_key):
|
||||
# If there are no restrictions, then use the space's restrictions
|
||||
document_restrictions.append(
|
||||
DocExternalAccess(
|
||||
doc_id=slim_doc.id,
|
||||
external_access=space_permissions,
|
||||
)
|
||||
if not (space_permissions := space_permissions_by_space_key.get(space_key)):
|
||||
logger.debug(
|
||||
f"Individually fetching space permissions for space {space_key}"
|
||||
)
|
||||
if (
|
||||
not space_permissions.is_public
|
||||
and not space_permissions.external_user_emails
|
||||
and not space_permissions.external_user_group_ids
|
||||
):
|
||||
try:
|
||||
# If the space permissions are not in the cache, then fetch them
|
||||
if is_cloud:
|
||||
retrieved_space_permissions = _get_cloud_space_permissions(
|
||||
confluence_client=confluence_client, space_key=space_key
|
||||
)
|
||||
else:
|
||||
retrieved_space_permissions = _get_server_space_permissions(
|
||||
confluence_client=confluence_client, space_key=space_key
|
||||
)
|
||||
space_permissions_by_space_key[space_key] = retrieved_space_permissions
|
||||
space_permissions = retrieved_space_permissions
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Permissions are empty for document: {slim_doc.id}\n"
|
||||
"This means space permissions are may be wrong for"
|
||||
f" Space key: {space_key}"
|
||||
f"Error fetching space permissions for space {space_key}: {e}"
|
||||
)
|
||||
|
||||
if not space_permissions:
|
||||
logger.warning(
|
||||
f"No permissions found for document {slim_doc.id} in space {space_key}"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"No permissions found for document {slim_doc.id} in space {space_key}"
|
||||
# If there are no restrictions, then use the space's restrictions
|
||||
document_restrictions.append(
|
||||
DocExternalAccess(
|
||||
doc_id=slim_doc.id,
|
||||
external_access=space_permissions,
|
||||
)
|
||||
)
|
||||
if (
|
||||
not space_permissions.is_public
|
||||
and not space_permissions.external_user_emails
|
||||
and not space_permissions.external_user_group_ids
|
||||
):
|
||||
logger.warning(
|
||||
f"Permissions are empty for document: {slim_doc.id}\n"
|
||||
"This means space permissions are may be wrong for"
|
||||
f" Space key: {space_key}"
|
||||
)
|
||||
|
||||
logger.debug("Finished fetching all page restrictions for space")
|
||||
return document_restrictions
|
||||
@ -283,4 +305,5 @@ def confluence_doc_sync(
|
||||
confluence_client=confluence_connector.confluence_client,
|
||||
slim_docs=slim_docs,
|
||||
space_permissions_by_space_key=space_permissions_by_space_key,
|
||||
is_cloud=is_cloud,
|
||||
)
|
||||
|
@ -3,6 +3,8 @@ from collections.abc import Callable
|
||||
from danswer.access.models import DocExternalAccess
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.models import ConnectorCredentialPair
|
||||
from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY
|
||||
from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY
|
||||
from ee.danswer.db.external_perm import ExternalUserGroup
|
||||
from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync
|
||||
from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync
|
||||
@ -56,7 +58,7 @@ GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: set[DocumentSource] = {
|
||||
# If nothing is specified here, we run the doc_sync every time the celery beat runs
|
||||
DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
|
||||
# Polling is not supported so we fetch all doc permissions every 5 minutes
|
||||
DocumentSource.CONFLUENCE: 5 * 60,
|
||||
DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY,
|
||||
DocumentSource.SLACK: 5 * 60,
|
||||
}
|
||||
|
||||
@ -64,7 +66,7 @@ DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
|
||||
EXTERNAL_GROUP_SYNC_PERIODS: dict[DocumentSource, int] = {
|
||||
# Polling is not supported so we fetch all group permissions every 30 minutes
|
||||
DocumentSource.GOOGLE_DRIVE: 5 * 60,
|
||||
DocumentSource.CONFLUENCE: 30 * 60,
|
||||
DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY,
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user