Fix confluence perm sync ancestry (#4536)

* Fix confluence perm sync ancestry

* Address EL comments

* add test for special case

* remove print

* Fix test
This commit is contained in:
Chris Weaver
2025-04-15 20:02:54 -07:00
committed by GitHub
parent 1dd32ebfce
commit 7e7b6e08ff
2 changed files with 163 additions and 19 deletions

View File

@@ -220,10 +220,9 @@ def _get_all_page_restrictions(
perm_sync_data: dict[str, Any],
) -> ExternalAccess | None:
"""
This function gets the restrictions for a page by taking the intersection
of the page's restrictions and the restrictions of all the ancestors
of the page.
If the page/ancestor has no restrictions, then it is ignored (no intersection).
This function gets the restrictions for a page. In Confluence, a child can have
at MOST the same level accessibility as its immediate parent.
If no restrictions are found anywhere, then return None, indicating that the page
should inherit the space's restrictions.
"""
@@ -234,9 +233,22 @@ def _get_all_page_restrictions(
confluence_client=confluence_client,
restrictions=perm_sync_data.get("restrictions", {}),
)
# if there are individual page-level restrictions, then this is the accurate
# restriction for the page. You cannot both have page-level restrictions AND
# inherit restrictions from the parent.
if bool(found_user_emails or found_group_names):
return ExternalAccess(
external_user_emails=found_user_emails,
external_user_group_ids=found_group_names,
is_public=False,
)
ancestors: list[dict[str, Any]] = perm_sync_data.get("ancestors", [])
for ancestor in ancestors:
# ancestors seem to be in order from root to immediate parent
# https://community.atlassian.com/forums/Confluence-questions/Order-of-ancestors-in-REST-API-response-Confluence-Server-amp/qaq-p/2385981
# we want the restrictions from the immediate parent to take precedence, so we should
# reverse the list
for ancestor in reversed(ancestors):
ancestor_user_emails, ancestor_group_names = _extract_read_access_restrictions(
confluence_client=confluence_client,
restrictions=ancestor.get("restrictions", {}),
@@ -246,20 +258,18 @@ def _get_all_page_restrictions(
# the page's restrictions, so we ignore it
continue
found_user_emails.intersection_update(ancestor_user_emails)
found_group_names.intersection_update(ancestor_group_names)
# if inheriting restrictions from the parent, then the first one we run into
# should be applied (the reason why we'd traverse more than one ancestor is if
# the ancestor also is in "inherit" mode.)
if ancestor_user_emails or ancestor_group_names:
return ExternalAccess(
external_user_emails=ancestor_user_emails,
external_user_group_ids=ancestor_group_names,
is_public=False,
)
# If there are no restrictions found, then the page
# inherits the space's restrictions so return None
if not found_user_emails and not found_group_names:
return None
return ExternalAccess(
external_user_emails=found_user_emails,
external_user_group_ids=found_group_names,
# there is no way for a page to be individually public if the space isn't public
is_public=False,
)
# we didn't find any restrictions, so the page inherits the space's restrictions
return None
def _fetch_all_page_restrictions(

View File

@@ -1,11 +1,15 @@
import os
import time
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
from onyx.db.models import ConnectorCredentialPair
from tests.daily.connectors.utils import load_all_docs_from_checkpoint_connector
@@ -30,7 +34,6 @@ def confluence_connector() -> ConfluenceConnector:
# This should never fail because even if the docs in the cloud change,
# the full doc ids retrieved should always be a subset of the slim doc ids
@pytest.mark.skip(reason="Skipping this test")
def test_confluence_connector_permissions(
confluence_connector: ConfluenceConnector,
) -> None:
@@ -53,3 +56,134 @@ def test_confluence_connector_permissions(
assert all_full_doc_ids.issubset(
all_slim_doc_ids
), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."
@patch("ee.onyx.external_permissions.confluence.doc_sync.OnyxDBCredentialsProvider")
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_confluence_connector_restriction_handling(
mock_get_api_key: MagicMock,
mock_db_provider_class: MagicMock,
) -> None:
# Test space key
test_space_key = "DailyPermS"
# Configure the mock provider instance that will be returned
mock_provider_instance = MagicMock()
mock_provider_instance.get_credentials.return_value = {
"confluence_username": os.environ["CONFLUENCE_USER_NAME"],
"confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
}
# Make the class return our configured instance when called
mock_db_provider_class.return_value = mock_provider_instance
# Mock the cc_pair to pass to the function
mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
# Mock the nested connector attribute and its config
mock_cc_pair.connector = MagicMock()
mock_cc_pair.connector.connector_specific_config = {
"wiki_base": "https://danswerai.atlassian.net",
"is_cloud": True,
"space": test_space_key,
}
# Set a mock credential ID
mock_cc_pair.credential_id = 1
# Call the confluence_doc_sync function directly with the mock cc_pair
doc_access_generator = confluence_doc_sync(mock_cc_pair, None)
doc_access_list = list(doc_access_generator)
assert len(doc_access_list) == 7
assert all(
not doc_access.external_access.is_public for doc_access in doc_access_list
)
# if no restriction is applied, the groups should give access, so no need
# for more emails outside of the owner
non_restricted_emails = {"chris@onyx.app"}
non_restricted_user_groups = {
"confluence-admins-danswerai",
"org-admins",
"atlassian-addons-admin",
"confluence-users-danswerai",
}
# if restriction is applied, only should be visible to shared users / groups
restricted_emails = {"chris@onyx.app", "hagen@danswer.ai"}
restricted_user_groups = {"confluence-admins-danswerai"}
extra_restricted_emails = {"chris@onyx.app"}
extra_restricted_user_groups: set[str] = set()
# note that this is only allowed since yuhong@onyx.app is a member of the
# confluence-admins-danswerai group
special_restricted_emails = {"chris@onyx.app", "yuhong@onyx.app"}
special_restricted_user_groups: set[str] = set()
# Check Root+Page+2 is public
root_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page+2"))
assert root_page_2.external_access.external_user_emails == non_restricted_emails
assert (
root_page_2.external_access.external_user_group_ids
== non_restricted_user_groups
)
# Check Overview page is public
overview_page = next(
d for d in doc_access_list if d.doc_id.lower().endswith("overview")
)
assert (
overview_page.external_access.external_user_emails == non_restricted_emails
), "Overview page emails do not match expected values"
assert (
overview_page.external_access.external_user_group_ids
== non_restricted_user_groups
), "Overview page groups do not match expected values"
# check root page is restricted
root_page = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page"))
assert (
root_page.external_access.external_user_emails == restricted_emails
), "Root page emails do not match expected values"
assert (
root_page.external_access.external_user_group_ids == restricted_user_groups
), "Root page groups do not match expected values"
# check child page has restriction propagated
child_page = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page"))
assert (
child_page.external_access.external_user_emails == restricted_emails
), "Child page emails do not match expected values"
assert (
child_page.external_access.external_user_group_ids == restricted_user_groups
), "Child page groups do not match expected values"
# check doubly nested child page has restriction propagated
child_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+2"))
assert (
child_page_2.external_access.external_user_emails == restricted_emails
), "Child page 2 emails do not match expected values"
assert (
child_page_2.external_access.external_user_group_ids == restricted_user_groups
), "Child page 2 groups do not match expected values"
# check child page w/ specific restrictions have those applied
child_page_3 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+3"))
assert (
child_page_3.external_access.external_user_emails == extra_restricted_emails
), "Child page 3 emails do not match expected values"
assert (
child_page_3.external_access.external_user_group_ids
== extra_restricted_user_groups
), "Child page 3 groups do not match expected values"
# check child page w/ specific restrictions have those applied
child_page_4 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+4"))
assert (
child_page_4.external_access.external_user_emails == special_restricted_emails
), "Child page 4 emails do not match expected values"
assert (
child_page_4.external_access.external_user_group_ids
== special_restricted_user_groups
), "Child page 4 groups do not match expected values"