mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-28 08:51:00 +02:00
Properly account for anonymous access in Confluence
This commit is contained in:
parent
ddec239fef
commit
e100a5e965
@ -15,6 +15,12 @@ SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/ee/onyx/configs/saml_co
|
|||||||
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
|
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
|
||||||
os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
|
os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
|
||||||
)
|
)
|
||||||
|
# This is a boolean that determines if anonymous access is public
|
||||||
|
# Default behavior is to not make the page public and instead add a group
|
||||||
|
# that contains all the users that we found in Confluence
|
||||||
|
CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
|
||||||
|
os.environ.get("CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC", "").lower() == "true"
|
||||||
|
)
|
||||||
# In seconds, default is 5 minutes
|
# In seconds, default is 5 minutes
|
||||||
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
|
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
|
||||||
os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
|
os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
# This is a group that we use to store all the users that we found in Confluence
|
||||||
|
# Instead of setting a page to public, we just add this group so that the page
|
||||||
|
# is only accessible to users who have confluence accounts.
|
||||||
|
ALL_CONF_EMAILS_GROUP_NAME = "All_Confluence_Users_Found_By_Onyx"
|
@ -4,6 +4,8 @@ https://confluence.atlassian.com/conf85/check-who-can-view-a-page-1283360557.htm
|
|||||||
"""
|
"""
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from ee.onyx.configs.app_configs import CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC
|
||||||
|
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
|
||||||
from onyx.access.models import DocExternalAccess
|
from onyx.access.models import DocExternalAccess
|
||||||
from onyx.access.models import ExternalAccess
|
from onyx.access.models import ExternalAccess
|
||||||
from onyx.connectors.confluence.connector import ConfluenceConnector
|
from onyx.connectors.confluence.connector import ConfluenceConnector
|
||||||
@ -31,14 +33,32 @@ def _get_server_space_permissions(
|
|||||||
permission_category.get("spacePermissions", [])
|
permission_category.get("spacePermissions", [])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_public = False
|
||||||
user_names = set()
|
user_names = set()
|
||||||
group_names = set()
|
group_names = set()
|
||||||
for permission in viewspace_permissions:
|
for permission in viewspace_permissions:
|
||||||
if user_name := permission.get("userName"):
|
user_name = permission.get("userName")
|
||||||
|
if user_name:
|
||||||
user_names.add(user_name)
|
user_names.add(user_name)
|
||||||
if group_name := permission.get("groupName"):
|
group_name = permission.get("groupName")
|
||||||
|
if group_name:
|
||||||
group_names.add(group_name)
|
group_names.add(group_name)
|
||||||
|
|
||||||
|
# It seems that if anonymous access is turned on for the site and space,
|
||||||
|
# then the space is publicly accessible.
|
||||||
|
# For confluence server, we make a group that contains all users
|
||||||
|
# that exist in confluence and then just add that group to the space permissions
|
||||||
|
# if anonymous access is turned on for the site and space or we set is_public = True
|
||||||
|
# if they set the env variable CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC to True so
|
||||||
|
# that we can support confluence server deployments that want anonymous access
|
||||||
|
# to be public (we cant test this because its paywalled)
|
||||||
|
if user_name is None and group_name is None:
|
||||||
|
# Defaults to False
|
||||||
|
if CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC:
|
||||||
|
is_public = True
|
||||||
|
else:
|
||||||
|
group_names.add(ALL_CONF_EMAILS_GROUP_NAME)
|
||||||
|
|
||||||
user_emails = set()
|
user_emails = set()
|
||||||
for user_name in user_names:
|
for user_name in user_names:
|
||||||
user_email = get_user_email_from_username__server(confluence_client, user_name)
|
user_email = get_user_email_from_username__server(confluence_client, user_name)
|
||||||
@ -50,11 +70,7 @@ def _get_server_space_permissions(
|
|||||||
return ExternalAccess(
|
return ExternalAccess(
|
||||||
external_user_emails=user_emails,
|
external_user_emails=user_emails,
|
||||||
external_user_group_ids=group_names,
|
external_user_group_ids=group_names,
|
||||||
# TODO: Check if the space is publicly accessible
|
is_public=is_public,
|
||||||
# Currently, we assume the space is not public
|
|
||||||
# We need to check if anonymous access is turned on for the site and space
|
|
||||||
# This information is paywalled so it remains unimplemented
|
|
||||||
is_public=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -134,7 +150,7 @@ def _get_space_permissions(
|
|||||||
|
|
||||||
def _extract_read_access_restrictions(
|
def _extract_read_access_restrictions(
|
||||||
confluence_client: OnyxConfluence, restrictions: dict[str, Any]
|
confluence_client: OnyxConfluence, restrictions: dict[str, Any]
|
||||||
) -> ExternalAccess | None:
|
) -> tuple[set[str], set[str]]:
|
||||||
"""
|
"""
|
||||||
Converts a page's restrictions dict into an ExternalAccess object.
|
Converts a page's restrictions dict into an ExternalAccess object.
|
||||||
If there are no restrictions, then return None
|
If there are no restrictions, then return None
|
||||||
@ -177,21 +193,57 @@ def _extract_read_access_restrictions(
|
|||||||
group["name"] for group in read_access_group_jsons if group.get("name")
|
group["name"] for group in read_access_group_jsons if group.get("name")
|
||||||
]
|
]
|
||||||
|
|
||||||
|
return set(read_access_user_emails), set(read_access_group_names)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_all_page_restrictions(
|
||||||
|
confluence_client: OnyxConfluence,
|
||||||
|
perm_sync_data: dict[str, Any],
|
||||||
|
) -> ExternalAccess | None:
|
||||||
|
"""
|
||||||
|
This function gets the restrictions for a page by taking the intersection
|
||||||
|
of the page's restrictions and the restrictions of all the ancestors
|
||||||
|
of the page.
|
||||||
|
If the page/ancestor has no restrictions, then it is ignored (no intersection).
|
||||||
|
If no restrictions are found anywhere, then return None, indicating that the page
|
||||||
|
should inherit the space's restrictions.
|
||||||
|
"""
|
||||||
|
found_user_emails: set[str] = set()
|
||||||
|
found_group_names: set[str] = set()
|
||||||
|
|
||||||
|
found_user_emails, found_group_names = _extract_read_access_restrictions(
|
||||||
|
confluence_client=confluence_client,
|
||||||
|
restrictions=perm_sync_data.get("restrictions", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
ancestors: list[dict[str, Any]] = perm_sync_data.get("ancestors", [])
|
||||||
|
for ancestor in ancestors:
|
||||||
|
ancestor_user_emails, ancestor_group_names = _extract_read_access_restrictions(
|
||||||
|
confluence_client=confluence_client,
|
||||||
|
restrictions=ancestor.get("restrictions", {}),
|
||||||
|
)
|
||||||
|
if not ancestor_user_emails and not ancestor_group_names:
|
||||||
|
# This ancestor has no restrictions, so it has no effect on
|
||||||
|
# the page's restrictions, so we ignore it
|
||||||
|
continue
|
||||||
|
|
||||||
|
found_user_emails.intersection_update(ancestor_user_emails)
|
||||||
|
found_group_names.intersection_update(ancestor_group_names)
|
||||||
|
|
||||||
# If there are no restrictions found, then the page
|
# If there are no restrictions found, then the page
|
||||||
# inherits the space's restrictions so return None
|
# inherits the space's restrictions so return None
|
||||||
is_space_public = read_access_user_emails == [] and read_access_group_names == []
|
if not found_user_emails and not found_group_names:
|
||||||
if is_space_public:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return ExternalAccess(
|
return ExternalAccess(
|
||||||
external_user_emails=set(read_access_user_emails),
|
external_user_emails=found_user_emails,
|
||||||
external_user_group_ids=set(read_access_group_names),
|
external_user_group_ids=found_group_names,
|
||||||
# there is no way for a page to be individually public if the space isn't public
|
# there is no way for a page to be individually public if the space isn't public
|
||||||
is_public=False,
|
is_public=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _fetch_all_page_restrictions_for_space(
|
def _fetch_all_page_restrictions(
|
||||||
confluence_client: OnyxConfluence,
|
confluence_client: OnyxConfluence,
|
||||||
slim_docs: list[SlimDocument],
|
slim_docs: list[SlimDocument],
|
||||||
space_permissions_by_space_key: dict[str, ExternalAccess],
|
space_permissions_by_space_key: dict[str, ExternalAccess],
|
||||||
@ -208,11 +260,11 @@ def _fetch_all_page_restrictions_for_space(
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"No permission sync data found for document {slim_doc.id}"
|
f"No permission sync data found for document {slim_doc.id}"
|
||||||
)
|
)
|
||||||
restrictions = _extract_read_access_restrictions(
|
|
||||||
|
if restrictions := _get_all_page_restrictions(
|
||||||
confluence_client=confluence_client,
|
confluence_client=confluence_client,
|
||||||
restrictions=slim_doc.perm_sync_data.get("restrictions", {}),
|
perm_sync_data=slim_doc.perm_sync_data,
|
||||||
)
|
):
|
||||||
if restrictions:
|
|
||||||
document_restrictions.append(
|
document_restrictions.append(
|
||||||
DocExternalAccess(
|
DocExternalAccess(
|
||||||
doc_id=slim_doc.id,
|
doc_id=slim_doc.id,
|
||||||
@ -301,7 +353,7 @@ def confluence_doc_sync(
|
|||||||
slim_docs.extend(doc_batch)
|
slim_docs.extend(doc_batch)
|
||||||
|
|
||||||
logger.debug("Fetching all page restrictions for space")
|
logger.debug("Fetching all page restrictions for space")
|
||||||
return _fetch_all_page_restrictions_for_space(
|
return _fetch_all_page_restrictions(
|
||||||
confluence_client=confluence_connector.confluence_client,
|
confluence_client=confluence_connector.confluence_client,
|
||||||
slim_docs=slim_docs,
|
slim_docs=slim_docs,
|
||||||
space_permissions_by_space_key=space_permissions_by_space_key,
|
space_permissions_by_space_key=space_permissions_by_space_key,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
from ee.onyx.db.external_perm import ExternalUserGroup
|
from ee.onyx.db.external_perm import ExternalUserGroup
|
||||||
|
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
|
||||||
from onyx.connectors.confluence.onyx_confluence import build_confluence_client
|
from onyx.connectors.confluence.onyx_confluence import build_confluence_client
|
||||||
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
|
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
|
||||||
from onyx.connectors.confluence.utils import get_user_email_from_username__server
|
from onyx.connectors.confluence.utils import get_user_email_from_username__server
|
||||||
from onyx.db.models import ConnectorCredentialPair
|
from onyx.db.models import ConnectorCredentialPair
|
||||||
from onyx.utils.logger import setup_logger
|
from onyx.utils.logger import setup_logger
|
||||||
|
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
@ -53,6 +53,7 @@ def confluence_group_sync(
|
|||||||
confluence_client=confluence_client,
|
confluence_client=confluence_client,
|
||||||
)
|
)
|
||||||
onyx_groups: list[ExternalUserGroup] = []
|
onyx_groups: list[ExternalUserGroup] = []
|
||||||
|
all_found_emails = set()
|
||||||
for group_id, group_member_emails in group_member_email_map.items():
|
for group_id, group_member_emails in group_member_email_map.items():
|
||||||
onyx_groups.append(
|
onyx_groups.append(
|
||||||
ExternalUserGroup(
|
ExternalUserGroup(
|
||||||
@ -60,5 +61,15 @@ def confluence_group_sync(
|
|||||||
user_emails=list(group_member_emails),
|
user_emails=list(group_member_emails),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
all_found_emails.update(group_member_emails)
|
||||||
|
|
||||||
|
# This is so that when we find a public confleunce server page, we can
|
||||||
|
# give access to all users only in if they have an email in Confluence
|
||||||
|
if cc_pair.connector.connector_specific_config.get("is_cloud", False):
|
||||||
|
all_found_group = ExternalUserGroup(
|
||||||
|
id=ALL_CONF_EMAILS_GROUP_NAME,
|
||||||
|
user_emails=list(all_found_emails),
|
||||||
|
)
|
||||||
|
onyx_groups.append(all_found_group)
|
||||||
|
|
||||||
return onyx_groups
|
return onyx_groups
|
||||||
|
@ -52,6 +52,8 @@ _RESTRICTIONS_EXPANSION_FIELDS = [
|
|||||||
"space",
|
"space",
|
||||||
"restrictions.read.restrictions.user",
|
"restrictions.read.restrictions.user",
|
||||||
"restrictions.read.restrictions.group",
|
"restrictions.read.restrictions.group",
|
||||||
|
"ancestors.restrictions.read.restrictions.user",
|
||||||
|
"ancestors.restrictions.read.restrictions.group",
|
||||||
]
|
]
|
||||||
|
|
||||||
_SLIM_DOC_BATCH_SIZE = 5000
|
_SLIM_DOC_BATCH_SIZE = 5000
|
||||||
@ -323,9 +325,11 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
|
|||||||
# These will be used by doc_sync.py to sync permissions
|
# These will be used by doc_sync.py to sync permissions
|
||||||
page_restrictions = page.get("restrictions")
|
page_restrictions = page.get("restrictions")
|
||||||
page_space_key = page.get("space", {}).get("key")
|
page_space_key = page.get("space", {}).get("key")
|
||||||
|
page_ancestors = page.get("ancestors", [])
|
||||||
page_perm_sync_data = {
|
page_perm_sync_data = {
|
||||||
"restrictions": page_restrictions or {},
|
"restrictions": page_restrictions or {},
|
||||||
"space_key": page_space_key,
|
"space_key": page_space_key,
|
||||||
|
"ancestors": page_ancestors or [],
|
||||||
}
|
}
|
||||||
|
|
||||||
doc_metadata_list.append(
|
doc_metadata_list.append(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user