diff --git a/backend/ee/onyx/external_permissions/confluence/doc_sync.py b/backend/ee/onyx/external_permissions/confluence/doc_sync.py index 39db2355d63..22159d0afae 100644 --- a/backend/ee/onyx/external_permissions/confluence/doc_sync.py +++ b/backend/ee/onyx/external_permissions/confluence/doc_sync.py @@ -220,10 +220,9 @@ def _get_all_page_restrictions( perm_sync_data: dict[str, Any], ) -> ExternalAccess | None: """ - This function gets the restrictions for a page by taking the intersection - of the page's restrictions and the restrictions of all the ancestors - of the page. - If the page/ancestor has no restrictions, then it is ignored (no intersection). + This function gets the restrictions for a page. In Confluence, a child can have + at MOST the same level accessibility as its immediate parent. + If no restrictions are found anywhere, then return None, indicating that the page should inherit the space's restrictions. """ @@ -234,9 +233,22 @@ def _get_all_page_restrictions( confluence_client=confluence_client, restrictions=perm_sync_data.get("restrictions", {}), ) + # if there are individual page-level restrictions, then this is the accurate + # restriction for the page. You cannot both have page-level restrictions AND + # inherit restrictions from the parent. + if bool(found_user_emails or found_group_names): + return ExternalAccess( + external_user_emails=found_user_emails, + external_user_group_ids=found_group_names, + is_public=False, + ) ancestors: list[dict[str, Any]] = perm_sync_data.get("ancestors", []) - for ancestor in ancestors: + # ancestors seem to be in order from root to immediate parent + # https://community.atlassian.com/forums/Confluence-questions/Order-of-ancestors-in-REST-API-response-Confluence-Server-amp/qaq-p/2385981 + # we want the restrictions from the immediate parent to take precedence, so we should + # reverse the list + for ancestor in reversed(ancestors): ancestor_user_emails, ancestor_group_names = _extract_read_access_restrictions( confluence_client=confluence_client, restrictions=ancestor.get("restrictions", {}), @@ -246,20 +258,18 @@ def _get_all_page_restrictions( # the page's restrictions, so we ignore it continue - found_user_emails.intersection_update(ancestor_user_emails) - found_group_names.intersection_update(ancestor_group_names) + # if inheriting restrictions from the parent, then the first one we run into + # should be applied (the reason why we'd traverse more than one ancestor is if + # the ancestor also is in "inherit" mode.) + if ancestor_user_emails or ancestor_group_names: + return ExternalAccess( + external_user_emails=ancestor_user_emails, + external_user_group_ids=ancestor_group_names, + is_public=False, + ) - # If there are no restrictions found, then the page - # inherits the space's restrictions so return None - if not found_user_emails and not found_group_names: - return None - - return ExternalAccess( - external_user_emails=found_user_emails, - external_user_group_ids=found_group_names, - # there is no way for a page to be individually public if the space isn't public - is_public=False, - ) + # we didn't find any restrictions, so the page inherits the space's restrictions + return None def _fetch_all_page_restrictions( diff --git a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py index 6501f728539..9b0c15b0a6e 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py @@ -1,11 +1,15 @@ import os import time +from unittest.mock import MagicMock +from unittest.mock import patch import pytest +from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync from onyx.configs.constants import DocumentSource from onyx.connectors.confluence.connector import ConfluenceConnector from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider +from onyx.db.models import ConnectorCredentialPair from tests.daily.connectors.utils import load_all_docs_from_checkpoint_connector @@ -30,7 +34,6 @@ def confluence_connector() -> ConfluenceConnector: # This should never fail because even if the docs in the cloud change, # the full doc ids retrieved should always be a subset of the slim doc ids -@pytest.mark.skip(reason="Skipping this test") def test_confluence_connector_permissions( confluence_connector: ConfluenceConnector, ) -> None: @@ -53,3 +56,134 @@ def test_confluence_connector_permissions( assert all_full_doc_ids.issubset( all_slim_doc_ids ), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs." + + +@patch("ee.onyx.external_permissions.confluence.doc_sync.OnyxDBCredentialsProvider") +@patch( + "onyx.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_confluence_connector_restriction_handling( + mock_get_api_key: MagicMock, + mock_db_provider_class: MagicMock, +) -> None: + # Test space key + test_space_key = "DailyPermS" + + # Configure the mock provider instance that will be returned + mock_provider_instance = MagicMock() + mock_provider_instance.get_credentials.return_value = { + "confluence_username": os.environ["CONFLUENCE_USER_NAME"], + "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"], + } + # Make the class return our configured instance when called + mock_db_provider_class.return_value = mock_provider_instance + + # Mock the cc_pair to pass to the function + mock_cc_pair = MagicMock(spec=ConnectorCredentialPair) + # Mock the nested connector attribute and its config + mock_cc_pair.connector = MagicMock() + mock_cc_pair.connector.connector_specific_config = { + "wiki_base": "https://danswerai.atlassian.net", + "is_cloud": True, + "space": test_space_key, + } + # Set a mock credential ID + mock_cc_pair.credential_id = 1 + + # Call the confluence_doc_sync function directly with the mock cc_pair + doc_access_generator = confluence_doc_sync(mock_cc_pair, None) + doc_access_list = list(doc_access_generator) + assert len(doc_access_list) == 7 + assert all( + not doc_access.external_access.is_public for doc_access in doc_access_list + ) + + # if no restriction is applied, the groups should give access, so no need + # for more emails outside of the owner + non_restricted_emails = {"chris@onyx.app"} + non_restricted_user_groups = { + "confluence-admins-danswerai", + "org-admins", + "atlassian-addons-admin", + "confluence-users-danswerai", + } + + # if restriction is applied, only should be visible to shared users / groups + restricted_emails = {"chris@onyx.app", "hagen@danswer.ai"} + restricted_user_groups = {"confluence-admins-danswerai"} + + extra_restricted_emails = {"chris@onyx.app"} + extra_restricted_user_groups: set[str] = set() + + # note that this is only allowed since yuhong@onyx.app is a member of the + # confluence-admins-danswerai group + special_restricted_emails = {"chris@onyx.app", "yuhong@onyx.app"} + special_restricted_user_groups: set[str] = set() + + # Check Root+Page+2 is public + root_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page+2")) + assert root_page_2.external_access.external_user_emails == non_restricted_emails + assert ( + root_page_2.external_access.external_user_group_ids + == non_restricted_user_groups + ) + + # Check Overview page is public + overview_page = next( + d for d in doc_access_list if d.doc_id.lower().endswith("overview") + ) + assert ( + overview_page.external_access.external_user_emails == non_restricted_emails + ), "Overview page emails do not match expected values" + assert ( + overview_page.external_access.external_user_group_ids + == non_restricted_user_groups + ), "Overview page groups do not match expected values" + + # check root page is restricted + root_page = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page")) + assert ( + root_page.external_access.external_user_emails == restricted_emails + ), "Root page emails do not match expected values" + assert ( + root_page.external_access.external_user_group_ids == restricted_user_groups + ), "Root page groups do not match expected values" + + # check child page has restriction propagated + child_page = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page")) + assert ( + child_page.external_access.external_user_emails == restricted_emails + ), "Child page emails do not match expected values" + assert ( + child_page.external_access.external_user_group_ids == restricted_user_groups + ), "Child page groups do not match expected values" + + # check doubly nested child page has restriction propagated + child_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+2")) + assert ( + child_page_2.external_access.external_user_emails == restricted_emails + ), "Child page 2 emails do not match expected values" + assert ( + child_page_2.external_access.external_user_group_ids == restricted_user_groups + ), "Child page 2 groups do not match expected values" + + # check child page w/ specific restrictions have those applied + child_page_3 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+3")) + assert ( + child_page_3.external_access.external_user_emails == extra_restricted_emails + ), "Child page 3 emails do not match expected values" + assert ( + child_page_3.external_access.external_user_group_ids + == extra_restricted_user_groups + ), "Child page 3 groups do not match expected values" + + # check child page w/ specific restrictions have those applied + child_page_4 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+4")) + assert ( + child_page_4.external_access.external_user_emails == special_restricted_emails + ), "Child page 4 emails do not match expected values" + assert ( + child_page_4.external_access.external_user_group_ids + == special_restricted_user_groups + ), "Child page 4 groups do not match expected values"