Fix Connector tests (confluence) (#4221)

This commit is contained in:
pablonyx 2025-03-06 17:00:01 -08:00 committed by GitHub
parent 798e10c52f
commit a32f7dc936
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 21 additions and 28 deletions

View File

@ -1,6 +1,7 @@
name: Connector Tests
on:
merge_group:
pull_request:
branches: [main]
schedule:
@ -51,7 +52,7 @@ env:
jobs:
connectors-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
env:
PYTHONPATH: ./backend
@ -76,7 +77,7 @@ jobs:
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
playwright install chromium
playwright install-deps chromium
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors

View File

@ -305,7 +305,7 @@ class ConfluenceConnector(
# Create the document
return Document(
id=build_confluence_document_id(self.wiki_base, page_id, self.is_cloud),
id=build_confluence_document_id(self.wiki_base, page["_links"]["webui"], self.is_cloud),
sections=sections,
source=DocumentSource.CONFLUENCE,
semantic_identifier=page_title,
@ -376,7 +376,7 @@ class ConfluenceConnector(
content_text, file_storage_name = response
object_url = build_confluence_document_id(
self.wiki_base, page["_links"]["webui"], self.is_cloud
self.wiki_base, attachment["_links"]["webui"], self.is_cloud
)
if content_text:

View File

@ -49,51 +49,39 @@ def test_confluence_connector_basic(
page_within_a_page_doc: Document | None = None
page_doc: Document | None = None
txt_doc: Document | None = None
for doc in doc_batch:
if doc.semantic_identifier == "DailyConnectorTestSpace Home":
page_doc = doc
elif ".txt" in doc.semantic_identifier:
txt_doc = doc
elif doc.semantic_identifier == "Page Within A Page":
page_within_a_page_doc = doc
assert page_within_a_page_doc is not None
assert page_within_a_page_doc.semantic_identifier == "Page Within A Page"
assert page_within_a_page_doc.primary_owners
assert page_within_a_page_doc.primary_owners[0].email == "hagen@danswer.ai"
# Updated to check for display_name instead of email
assert page_within_a_page_doc.primary_owners[0].display_name == "Hagen O'Neill"
assert page_within_a_page_doc.primary_owners[0].email is None
assert len(page_within_a_page_doc.sections) == 1
page_within_a_page_section = page_within_a_page_doc.sections[0]
page_within_a_page_text = "@Chris Weaver loves cherry pie"
assert page_within_a_page_section.text == page_within_a_page_text
# Updated link assertion
assert (
page_within_a_page_section.link
== "https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page"
page_within_a_page_section.link.endswith(
"/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page"
)
)
assert page_doc is not None
assert page_doc.semantic_identifier == "DailyConnectorTestSpace Home"
assert page_doc.metadata["labels"] == ["testlabel"]
assert page_doc.primary_owners
assert page_doc.primary_owners[0].email == "hagen@danswer.ai"
assert len(page_doc.sections) == 1
assert page_doc.primary_owners[0].display_name == "Hagen O'Neill"
assert page_doc.primary_owners[0].email is None
assert len(page_doc.sections) == 2
page_section = page_doc.sections[0]
assert page_section.text == "test123 " + page_within_a_page_text
assert (
page_section.link
== "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview"
)
assert txt_doc is not None
assert txt_doc.semantic_identifier == "small-file.txt"
assert len(txt_doc.sections) == 1
assert txt_doc.sections[0].text == "small"
assert txt_doc.primary_owners
assert txt_doc.primary_owners[0].email == "chris@onyx.app"
assert (
txt_doc.sections[0].link
== "https://danswerai.atlassian.net/wiki/pages/viewpageattachments.action?pageId=52494430&preview=%2F52494430%2F52527123%2Fsmall-file.txt"
)
assert page_section.link.endswith("/wiki/spaces/DailyConne/overview")

View File

@ -41,5 +41,9 @@ def test_confluence_connector_permissions(
for slim_doc_batch in confluence_connector.retrieve_all_slim_documents():
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])
# Find IDs that are in full but not in slim
difference = all_full_doc_ids - all_slim_doc_ids
# The set of full doc IDs should be always be a subset of the slim doc IDs
assert all_full_doc_ids.issubset(all_slim_doc_ids)
assert all_full_doc_ids.issubset(all_slim_doc_ids), \
f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."