Fix issue with Confluence errors not being ignored

This commit is contained in:
Weves 2023-08-21 08:47:53 -07:00 committed by Chris Weaver
parent 9f1898c384
commit e5352b6af8

View File

@ -3,6 +3,7 @@ from collections.abc import Collection
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from typing import Any from typing import Any
from typing import cast
from urllib.parse import urlparse from urllib.parse import urlparse
from atlassian import Confluence # type:ignore from atlassian import Confluence # type:ignore
@ -125,17 +126,19 @@ class ConfluenceConnector(LoadConnector, PollConnector):
return pages return pages
def _fetch_comments( def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str:
self, confluence_client: Confluence, page_id: str
) -> Collection[dict[str, Any]]:
try: try:
return confluence_client.get_page_child_by_type( comment_pages = cast(
page_id, Collection[dict[str, Any]],
type="comment", confluence_client.get_page_child_by_type(
start=None, page_id,
limit=None, type="comment",
expand="body.storage.value", start=None,
limit=None,
expand="body.storage.value",
),
) )
return _comment_dfs("", comment_pages, confluence_client)
except Exception as e: except Exception as e:
if not self.continue_on_failure: if not self.continue_on_failure:
raise e raise e
@ -143,7 +146,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
logger.exception( logger.exception(
"Ran into exception when fetching comments from Confluence" "Ran into exception when fetching comments from Confluence"
) )
return [] return ""
def _get_doc_batch( def _get_doc_batch(
self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None
@ -163,8 +166,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
page_text = ( page_text = (
page.get("title", "") + "\n" + parse_html_page_basic(page_html) page.get("title", "") + "\n" + parse_html_page_basic(page_html)
) )
comment_pages = self._fetch_comments(self.confluence_client, page["id"]) comments_text = self._fetch_comments(self.confluence_client, page["id"])
comments_text = _comment_dfs("", comment_pages, self.confluence_client)
page_text += comments_text page_text += comments_text
page_url = self.wiki_base + page["_links"]["webui"] page_url = self.wiki_base + page["_links"]["webui"]