mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-12 00:50:52 +02:00
More logging/fixes (#3364)
* More logging for external group syncing * Fixed edge case where some spaces were not being fetched * made refresh frequency for confluence syncs configurable * clarity
This commit is contained in:
parent
53b3dcbace
commit
53428f6e9c
@ -11,6 +11,14 @@ SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/ee/danswer/configs/saml
|
|||||||
#####
|
#####
|
||||||
# Auto Permission Sync
|
# Auto Permission Sync
|
||||||
#####
|
#####
|
||||||
|
# In seconds, default is 5 minutes
|
||||||
|
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
|
||||||
|
os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
|
||||||
|
)
|
||||||
|
# In seconds, default is 5 minutes
|
||||||
|
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
|
||||||
|
os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
|
||||||
|
)
|
||||||
NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2)
|
NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,6 +10,9 @@ from danswer.access.utils import prefix_group_w_source
|
|||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.db.models import User__ExternalUserGroupId
|
from danswer.db.models import User__ExternalUserGroupId
|
||||||
from danswer.db.users import batch_add_ext_perm_user_if_not_exists
|
from danswer.db.users import batch_add_ext_perm_user_if_not_exists
|
||||||
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
class ExternalUserGroup(BaseModel):
|
class ExternalUserGroup(BaseModel):
|
||||||
@ -73,7 +76,13 @@ def replace_user__ext_group_for_cc_pair(
|
|||||||
new_external_permissions = []
|
new_external_permissions = []
|
||||||
for external_group in group_defs:
|
for external_group in group_defs:
|
||||||
for user_email in external_group.user_emails:
|
for user_email in external_group.user_emails:
|
||||||
user_id = email_id_map[user_email]
|
user_id = email_id_map.get(user_email)
|
||||||
|
if user_id is None:
|
||||||
|
logger.warning(
|
||||||
|
f"User in group {external_group.id}"
|
||||||
|
f" with email {user_email} not found"
|
||||||
|
)
|
||||||
|
continue
|
||||||
new_external_permissions.append(
|
new_external_permissions.append(
|
||||||
User__ExternalUserGroupId(
|
User__ExternalUserGroupId(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
@ -195,6 +195,7 @@ def _fetch_all_page_restrictions_for_space(
|
|||||||
confluence_client: OnyxConfluence,
|
confluence_client: OnyxConfluence,
|
||||||
slim_docs: list[SlimDocument],
|
slim_docs: list[SlimDocument],
|
||||||
space_permissions_by_space_key: dict[str, ExternalAccess],
|
space_permissions_by_space_key: dict[str, ExternalAccess],
|
||||||
|
is_cloud: bool,
|
||||||
) -> list[DocExternalAccess]:
|
) -> list[DocExternalAccess]:
|
||||||
"""
|
"""
|
||||||
For all pages, if a page has restrictions, then use those restrictions.
|
For all pages, if a page has restrictions, then use those restrictions.
|
||||||
@ -222,7 +223,33 @@ def _fetch_all_page_restrictions_for_space(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
space_key = slim_doc.perm_sync_data.get("space_key")
|
space_key = slim_doc.perm_sync_data.get("space_key")
|
||||||
if space_permissions := space_permissions_by_space_key.get(space_key):
|
if not (space_permissions := space_permissions_by_space_key.get(space_key)):
|
||||||
|
logger.debug(
|
||||||
|
f"Individually fetching space permissions for space {space_key}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# If the space permissions are not in the cache, then fetch them
|
||||||
|
if is_cloud:
|
||||||
|
retrieved_space_permissions = _get_cloud_space_permissions(
|
||||||
|
confluence_client=confluence_client, space_key=space_key
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
retrieved_space_permissions = _get_server_space_permissions(
|
||||||
|
confluence_client=confluence_client, space_key=space_key
|
||||||
|
)
|
||||||
|
space_permissions_by_space_key[space_key] = retrieved_space_permissions
|
||||||
|
space_permissions = retrieved_space_permissions
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Error fetching space permissions for space {space_key}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not space_permissions:
|
||||||
|
logger.warning(
|
||||||
|
f"No permissions found for document {slim_doc.id} in space {space_key}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
# If there are no restrictions, then use the space's restrictions
|
# If there are no restrictions, then use the space's restrictions
|
||||||
document_restrictions.append(
|
document_restrictions.append(
|
||||||
DocExternalAccess(
|
DocExternalAccess(
|
||||||
@ -240,11 +267,6 @@ def _fetch_all_page_restrictions_for_space(
|
|||||||
"This means space permissions are may be wrong for"
|
"This means space permissions are may be wrong for"
|
||||||
f" Space key: {space_key}"
|
f" Space key: {space_key}"
|
||||||
)
|
)
|
||||||
continue
|
|
||||||
|
|
||||||
logger.warning(
|
|
||||||
f"No permissions found for document {slim_doc.id} in space {space_key}"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug("Finished fetching all page restrictions for space")
|
logger.debug("Finished fetching all page restrictions for space")
|
||||||
return document_restrictions
|
return document_restrictions
|
||||||
@ -283,4 +305,5 @@ def confluence_doc_sync(
|
|||||||
confluence_client=confluence_connector.confluence_client,
|
confluence_client=confluence_connector.confluence_client,
|
||||||
slim_docs=slim_docs,
|
slim_docs=slim_docs,
|
||||||
space_permissions_by_space_key=space_permissions_by_space_key,
|
space_permissions_by_space_key=space_permissions_by_space_key,
|
||||||
|
is_cloud=is_cloud,
|
||||||
)
|
)
|
||||||
|
@ -3,6 +3,8 @@ from collections.abc import Callable
|
|||||||
from danswer.access.models import DocExternalAccess
|
from danswer.access.models import DocExternalAccess
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.db.models import ConnectorCredentialPair
|
from danswer.db.models import ConnectorCredentialPair
|
||||||
|
from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY
|
||||||
|
from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY
|
||||||
from ee.danswer.db.external_perm import ExternalUserGroup
|
from ee.danswer.db.external_perm import ExternalUserGroup
|
||||||
from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync
|
from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync
|
||||||
from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync
|
from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync
|
||||||
@ -56,7 +58,7 @@ GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: set[DocumentSource] = {
|
|||||||
# If nothing is specified here, we run the doc_sync every time the celery beat runs
|
# If nothing is specified here, we run the doc_sync every time the celery beat runs
|
||||||
DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
|
DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
|
||||||
# Polling is not supported so we fetch all doc permissions every 5 minutes
|
# Polling is not supported so we fetch all doc permissions every 5 minutes
|
||||||
DocumentSource.CONFLUENCE: 5 * 60,
|
DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY,
|
||||||
DocumentSource.SLACK: 5 * 60,
|
DocumentSource.SLACK: 5 * 60,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,7 +66,7 @@ DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = {
|
|||||||
EXTERNAL_GROUP_SYNC_PERIODS: dict[DocumentSource, int] = {
|
EXTERNAL_GROUP_SYNC_PERIODS: dict[DocumentSource, int] = {
|
||||||
# Polling is not supported so we fetch all group permissions every 30 minutes
|
# Polling is not supported so we fetch all group permissions every 30 minutes
|
||||||
DocumentSource.GOOGLE_DRIVE: 5 * 60,
|
DocumentSource.GOOGLE_DRIVE: 5 * 60,
|
||||||
DocumentSource.CONFLUENCE: 30 * 60,
|
DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user