decreased confluence retry times and added more logging (#3184)

* decreased confluence retry times and added more logging

* added check on connector startup

* no retries!

* fr no retries
This commit is contained in:
hagen-danswer 2024-11-20 16:00:14 -08:00 committed by GitHub
parent 3dac24542b
commit 049091eb01
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 42 additions and 4 deletions

View File

@ -3,6 +3,8 @@ from datetime import timezone
from typing import Any
from urllib.parse import quote
from atlassian import Confluence # type: ignore
from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from danswer.configs.app_configs import INDEX_BATCH_SIZE
@ -116,6 +118,21 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
is_cloud=self.is_cloud,
wiki_base=self.wiki_base,
)
client_without_retries = Confluence(
api_version="cloud" if self.is_cloud else "latest",
url=self.wiki_base.rstrip("/"),
username=credentials["confluence_username"] if self.is_cloud else None,
password=credentials["confluence_access_token"] if self.is_cloud else None,
token=credentials["confluence_access_token"] if not self.is_cloud else None,
)
spaces = client_without_retries.get_all_spaces(limit=1)
if not spaces:
raise RuntimeError(
f"No spaces found at {self.wiki_base}! "
"Check your credentials and wiki_base and make sure "
"is_cloud is set correctly."
)
return None
def _get_comment_string_for_page_id(self, page_id: str) -> str:

View File

@ -84,7 +84,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:
MAX_RETRIES = 5
TIMEOUT = 3600
TIMEOUT = 600
timeout_at = time.monotonic() + TIMEOUT
for attempt in range(MAX_RETRIES):
@ -99,6 +99,10 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
return confluence_call(*args, **kwargs)
except HTTPError as e:
delay_until = _handle_http_error(e, attempt)
logger.warning(
f"HTTPError in confluence call. "
f"Retrying in {delay_until} seconds..."
)
while time.monotonic() < delay_until:
# in the future, check a signal here to exit
time.sleep(1)

View File

@ -283,6 +283,6 @@ def build_confluence_client(
password=credentials_json["confluence_access_token"] if is_cloud else None,
token=credentials_json["confluence_access_token"] if not is_cloud else None,
backoff_and_retry=True,
max_backoff_retries=60,
max_backoff_retries=10,
max_backoff_seconds=60,
)

View File

@ -1,3 +1,5 @@
from atlassian import Confluence # type: ignore
from danswer.connectors.confluence.onyx_confluence import OnyxConfluence
from danswer.connectors.confluence.utils import build_confluence_client
from danswer.connectors.confluence.utils import get_user_email_from_username__server
@ -32,11 +34,26 @@ def _get_group_members_email_paginated(
def confluence_group_sync(
cc_pair: ConnectorCredentialPair,
) -> list[ExternalUserGroup]:
credentials = cc_pair.credential.credential_json
is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)
wiki_base = cc_pair.connector.connector_specific_config["wiki_base"]
# test connection with direct client, no retries
confluence_client = Confluence(
api_version="cloud" if is_cloud else "latest",
url=wiki_base.rstrip("/"),
username=credentials["confluence_username"] if is_cloud else None,
password=credentials["confluence_access_token"] if is_cloud else None,
token=credentials["confluence_access_token"] if not is_cloud else None,
)
spaces = confluence_client.get_all_spaces(limit=1)
if not spaces:
raise RuntimeError(f"No spaces found at {wiki_base}!")
confluence_client = build_confluence_client(
credentials_json=cc_pair.credential.credential_json,
credentials_json=credentials,
is_cloud=is_cloud,
wiki_base=cc_pair.connector.connector_specific_config["wiki_base"],
wiki_base=wiki_base,
)
# Get all group names