mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-08-02 05:02:48 +02:00
Fix confluence perm sync ancestry (#4536)
* Fix confluence perm sync ancestry * Address EL comments * add test for special case * remove print * Fix test
This commit is contained in:
@@ -220,10 +220,9 @@ def _get_all_page_restrictions(
|
||||
perm_sync_data: dict[str, Any],
|
||||
) -> ExternalAccess | None:
|
||||
"""
|
||||
This function gets the restrictions for a page by taking the intersection
|
||||
of the page's restrictions and the restrictions of all the ancestors
|
||||
of the page.
|
||||
If the page/ancestor has no restrictions, then it is ignored (no intersection).
|
||||
This function gets the restrictions for a page. In Confluence, a child can have
|
||||
at MOST the same level accessibility as its immediate parent.
|
||||
|
||||
If no restrictions are found anywhere, then return None, indicating that the page
|
||||
should inherit the space's restrictions.
|
||||
"""
|
||||
@@ -234,9 +233,22 @@ def _get_all_page_restrictions(
|
||||
confluence_client=confluence_client,
|
||||
restrictions=perm_sync_data.get("restrictions", {}),
|
||||
)
|
||||
# if there are individual page-level restrictions, then this is the accurate
|
||||
# restriction for the page. You cannot both have page-level restrictions AND
|
||||
# inherit restrictions from the parent.
|
||||
if bool(found_user_emails or found_group_names):
|
||||
return ExternalAccess(
|
||||
external_user_emails=found_user_emails,
|
||||
external_user_group_ids=found_group_names,
|
||||
is_public=False,
|
||||
)
|
||||
|
||||
ancestors: list[dict[str, Any]] = perm_sync_data.get("ancestors", [])
|
||||
for ancestor in ancestors:
|
||||
# ancestors seem to be in order from root to immediate parent
|
||||
# https://community.atlassian.com/forums/Confluence-questions/Order-of-ancestors-in-REST-API-response-Confluence-Server-amp/qaq-p/2385981
|
||||
# we want the restrictions from the immediate parent to take precedence, so we should
|
||||
# reverse the list
|
||||
for ancestor in reversed(ancestors):
|
||||
ancestor_user_emails, ancestor_group_names = _extract_read_access_restrictions(
|
||||
confluence_client=confluence_client,
|
||||
restrictions=ancestor.get("restrictions", {}),
|
||||
@@ -246,20 +258,18 @@ def _get_all_page_restrictions(
|
||||
# the page's restrictions, so we ignore it
|
||||
continue
|
||||
|
||||
found_user_emails.intersection_update(ancestor_user_emails)
|
||||
found_group_names.intersection_update(ancestor_group_names)
|
||||
# if inheriting restrictions from the parent, then the first one we run into
|
||||
# should be applied (the reason why we'd traverse more than one ancestor is if
|
||||
# the ancestor also is in "inherit" mode.)
|
||||
if ancestor_user_emails or ancestor_group_names:
|
||||
return ExternalAccess(
|
||||
external_user_emails=ancestor_user_emails,
|
||||
external_user_group_ids=ancestor_group_names,
|
||||
is_public=False,
|
||||
)
|
||||
|
||||
# If there are no restrictions found, then the page
|
||||
# inherits the space's restrictions so return None
|
||||
if not found_user_emails and not found_group_names:
|
||||
return None
|
||||
|
||||
return ExternalAccess(
|
||||
external_user_emails=found_user_emails,
|
||||
external_user_group_ids=found_group_names,
|
||||
# there is no way for a page to be individually public if the space isn't public
|
||||
is_public=False,
|
||||
)
|
||||
# we didn't find any restrictions, so the page inherits the space's restrictions
|
||||
return None
|
||||
|
||||
|
||||
def _fetch_all_page_restrictions(
|
||||
|
@@ -1,11 +1,15 @@
|
||||
import os
|
||||
import time
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from ee.onyx.external_permissions.confluence.doc_sync import confluence_doc_sync
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.confluence.connector import ConfluenceConnector
|
||||
from onyx.connectors.credentials_provider import OnyxStaticCredentialsProvider
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from tests.daily.connectors.utils import load_all_docs_from_checkpoint_connector
|
||||
|
||||
|
||||
@@ -30,7 +34,6 @@ def confluence_connector() -> ConfluenceConnector:
|
||||
|
||||
# This should never fail because even if the docs in the cloud change,
|
||||
# the full doc ids retrieved should always be a subset of the slim doc ids
|
||||
@pytest.mark.skip(reason="Skipping this test")
|
||||
def test_confluence_connector_permissions(
|
||||
confluence_connector: ConfluenceConnector,
|
||||
) -> None:
|
||||
@@ -53,3 +56,134 @@ def test_confluence_connector_permissions(
|
||||
assert all_full_doc_ids.issubset(
|
||||
all_slim_doc_ids
|
||||
), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."
|
||||
|
||||
|
||||
@patch("ee.onyx.external_permissions.confluence.doc_sync.OnyxDBCredentialsProvider")
|
||||
@patch(
|
||||
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_confluence_connector_restriction_handling(
|
||||
mock_get_api_key: MagicMock,
|
||||
mock_db_provider_class: MagicMock,
|
||||
) -> None:
|
||||
# Test space key
|
||||
test_space_key = "DailyPermS"
|
||||
|
||||
# Configure the mock provider instance that will be returned
|
||||
mock_provider_instance = MagicMock()
|
||||
mock_provider_instance.get_credentials.return_value = {
|
||||
"confluence_username": os.environ["CONFLUENCE_USER_NAME"],
|
||||
"confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
|
||||
}
|
||||
# Make the class return our configured instance when called
|
||||
mock_db_provider_class.return_value = mock_provider_instance
|
||||
|
||||
# Mock the cc_pair to pass to the function
|
||||
mock_cc_pair = MagicMock(spec=ConnectorCredentialPair)
|
||||
# Mock the nested connector attribute and its config
|
||||
mock_cc_pair.connector = MagicMock()
|
||||
mock_cc_pair.connector.connector_specific_config = {
|
||||
"wiki_base": "https://danswerai.atlassian.net",
|
||||
"is_cloud": True,
|
||||
"space": test_space_key,
|
||||
}
|
||||
# Set a mock credential ID
|
||||
mock_cc_pair.credential_id = 1
|
||||
|
||||
# Call the confluence_doc_sync function directly with the mock cc_pair
|
||||
doc_access_generator = confluence_doc_sync(mock_cc_pair, None)
|
||||
doc_access_list = list(doc_access_generator)
|
||||
assert len(doc_access_list) == 7
|
||||
assert all(
|
||||
not doc_access.external_access.is_public for doc_access in doc_access_list
|
||||
)
|
||||
|
||||
# if no restriction is applied, the groups should give access, so no need
|
||||
# for more emails outside of the owner
|
||||
non_restricted_emails = {"chris@onyx.app"}
|
||||
non_restricted_user_groups = {
|
||||
"confluence-admins-danswerai",
|
||||
"org-admins",
|
||||
"atlassian-addons-admin",
|
||||
"confluence-users-danswerai",
|
||||
}
|
||||
|
||||
# if restriction is applied, only should be visible to shared users / groups
|
||||
restricted_emails = {"chris@onyx.app", "hagen@danswer.ai"}
|
||||
restricted_user_groups = {"confluence-admins-danswerai"}
|
||||
|
||||
extra_restricted_emails = {"chris@onyx.app"}
|
||||
extra_restricted_user_groups: set[str] = set()
|
||||
|
||||
# note that this is only allowed since yuhong@onyx.app is a member of the
|
||||
# confluence-admins-danswerai group
|
||||
special_restricted_emails = {"chris@onyx.app", "yuhong@onyx.app"}
|
||||
special_restricted_user_groups: set[str] = set()
|
||||
|
||||
# Check Root+Page+2 is public
|
||||
root_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page+2"))
|
||||
assert root_page_2.external_access.external_user_emails == non_restricted_emails
|
||||
assert (
|
||||
root_page_2.external_access.external_user_group_ids
|
||||
== non_restricted_user_groups
|
||||
)
|
||||
|
||||
# Check Overview page is public
|
||||
overview_page = next(
|
||||
d for d in doc_access_list if d.doc_id.lower().endswith("overview")
|
||||
)
|
||||
assert (
|
||||
overview_page.external_access.external_user_emails == non_restricted_emails
|
||||
), "Overview page emails do not match expected values"
|
||||
assert (
|
||||
overview_page.external_access.external_user_group_ids
|
||||
== non_restricted_user_groups
|
||||
), "Overview page groups do not match expected values"
|
||||
|
||||
# check root page is restricted
|
||||
root_page = next(d for d in doc_access_list if d.doc_id.endswith("Root+Page"))
|
||||
assert (
|
||||
root_page.external_access.external_user_emails == restricted_emails
|
||||
), "Root page emails do not match expected values"
|
||||
assert (
|
||||
root_page.external_access.external_user_group_ids == restricted_user_groups
|
||||
), "Root page groups do not match expected values"
|
||||
|
||||
# check child page has restriction propagated
|
||||
child_page = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page"))
|
||||
assert (
|
||||
child_page.external_access.external_user_emails == restricted_emails
|
||||
), "Child page emails do not match expected values"
|
||||
assert (
|
||||
child_page.external_access.external_user_group_ids == restricted_user_groups
|
||||
), "Child page groups do not match expected values"
|
||||
|
||||
# check doubly nested child page has restriction propagated
|
||||
child_page_2 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+2"))
|
||||
assert (
|
||||
child_page_2.external_access.external_user_emails == restricted_emails
|
||||
), "Child page 2 emails do not match expected values"
|
||||
assert (
|
||||
child_page_2.external_access.external_user_group_ids == restricted_user_groups
|
||||
), "Child page 2 groups do not match expected values"
|
||||
|
||||
# check child page w/ specific restrictions have those applied
|
||||
child_page_3 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+3"))
|
||||
assert (
|
||||
child_page_3.external_access.external_user_emails == extra_restricted_emails
|
||||
), "Child page 3 emails do not match expected values"
|
||||
assert (
|
||||
child_page_3.external_access.external_user_group_ids
|
||||
== extra_restricted_user_groups
|
||||
), "Child page 3 groups do not match expected values"
|
||||
|
||||
# check child page w/ specific restrictions have those applied
|
||||
child_page_4 = next(d for d in doc_access_list if d.doc_id.endswith("Child+Page+4"))
|
||||
assert (
|
||||
child_page_4.external_access.external_user_emails == special_restricted_emails
|
||||
), "Child page 4 emails do not match expected values"
|
||||
assert (
|
||||
child_page_4.external_access.external_user_group_ids
|
||||
== special_restricted_user_groups
|
||||
), "Child page 4 groups do not match expected values"
|
||||
|
Reference in New Issue
Block a user