diff --git a/.github/workflows/pr-python-connector-tests.yml b/.github/workflows/pr-python-connector-tests.yml index bef30d7ca..a99a13d5c 100644 --- a/.github/workflows/pr-python-connector-tests.yml +++ b/.github/workflows/pr-python-connector-tests.yml @@ -1,6 +1,7 @@ name: Connector Tests on: + merge_group: pull_request: branches: [main] schedule: @@ -51,7 +52,7 @@ env: jobs: connectors-check: # See https://runs-on.com/runners/linux/ - runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] + runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"] env: PYTHONPATH: ./backend @@ -76,7 +77,7 @@ jobs: pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt playwright install chromium playwright install-deps chromium - + - name: Run Tests shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}" run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors diff --git a/backend/onyx/connectors/confluence/connector.py b/backend/onyx/connectors/confluence/connector.py index 00d5d2168..65402a36e 100644 --- a/backend/onyx/connectors/confluence/connector.py +++ b/backend/onyx/connectors/confluence/connector.py @@ -305,7 +305,7 @@ class ConfluenceConnector( # Create the document return Document( - id=build_confluence_document_id(self.wiki_base, page_id, self.is_cloud), + id=build_confluence_document_id(self.wiki_base, page["_links"]["webui"], self.is_cloud), sections=sections, source=DocumentSource.CONFLUENCE, semantic_identifier=page_title, @@ -376,7 +376,7 @@ class ConfluenceConnector( content_text, file_storage_name = response object_url = build_confluence_document_id( - self.wiki_base, page["_links"]["webui"], self.is_cloud + self.wiki_base, attachment["_links"]["webui"], self.is_cloud ) if content_text: diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py index 4da3e7e53..b2d87b6d8 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -49,51 +49,39 @@ def test_confluence_connector_basic( page_within_a_page_doc: Document | None = None page_doc: Document | None = None - txt_doc: Document | None = None for doc in doc_batch: if doc.semantic_identifier == "DailyConnectorTestSpace Home": page_doc = doc - elif ".txt" in doc.semantic_identifier: - txt_doc = doc elif doc.semantic_identifier == "Page Within A Page": page_within_a_page_doc = doc assert page_within_a_page_doc is not None assert page_within_a_page_doc.semantic_identifier == "Page Within A Page" assert page_within_a_page_doc.primary_owners - assert page_within_a_page_doc.primary_owners[0].email == "hagen@danswer.ai" + # Updated to check for display_name instead of email + assert page_within_a_page_doc.primary_owners[0].display_name == "Hagen O'Neill" + assert page_within_a_page_doc.primary_owners[0].email is None assert len(page_within_a_page_doc.sections) == 1 page_within_a_page_section = page_within_a_page_doc.sections[0] page_within_a_page_text = "@Chris Weaver loves cherry pie" assert page_within_a_page_section.text == page_within_a_page_text + # Updated link assertion assert ( - page_within_a_page_section.link - == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page" + page_within_a_page_section.link.endswith( + "/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page" + ) ) assert page_doc is not None assert page_doc.semantic_identifier == "DailyConnectorTestSpace Home" assert page_doc.metadata["labels"] == ["testlabel"] assert page_doc.primary_owners - assert page_doc.primary_owners[0].email == "hagen@danswer.ai" - assert len(page_doc.sections) == 1 + assert page_doc.primary_owners[0].display_name == "Hagen O'Neill" + assert page_doc.primary_owners[0].email is None + assert len(page_doc.sections) == 2 page_section = page_doc.sections[0] assert page_section.text == "test123 " + page_within_a_page_text - assert ( - page_section.link - == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview" - ) - - assert txt_doc is not None - assert txt_doc.semantic_identifier == "small-file.txt" - assert len(txt_doc.sections) == 1 - assert txt_doc.sections[0].text == "small" - assert txt_doc.primary_owners - assert txt_doc.primary_owners[0].email == "chris@onyx.app" - assert ( - txt_doc.sections[0].link - == "https://danswerai.atlassian.net/wiki/pages/viewpageattachments.action?pageId=52494430&preview=%2F52494430%2F52527123%2Fsmall-file.txt" - ) + assert page_section.link.endswith("/wiki/spaces/DailyConne/overview") \ No newline at end of file diff --git a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py index 6bb43437e..c1e3ade42 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py @@ -41,5 +41,9 @@ def test_confluence_connector_permissions( for slim_doc_batch in confluence_connector.retrieve_all_slim_documents(): all_slim_doc_ids.update([doc.id for doc in slim_doc_batch]) + # Find IDs that are in full but not in slim + difference = all_full_doc_ids - all_slim_doc_ids + # The set of full doc IDs should be always be a subset of the slim doc IDs - assert all_full_doc_ids.issubset(all_slim_doc_ids) + assert all_full_doc_ids.issubset(all_slim_doc_ids), \ + f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."