mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-11 20:30:07 +02:00
Add confluence rate limit handling (#1174)
This commit is contained in:
parent
563df1f952
commit
a7c820147e
@ -15,6 +15,9 @@ from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
|
||||
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.confluence.rate_limit_handler import (
|
||||
make_confluence_call_handle_rate_limit,
|
||||
)
|
||||
from danswer.connectors.cross_connector_utils.html_utils import format_document_soup
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
@ -100,10 +103,11 @@ def _get_user(user_id: str, confluence_client: Confluence) -> str:
|
||||
"""
|
||||
user_not_found = "Unknown User"
|
||||
|
||||
get_user_details_by_accountid = make_confluence_call_handle_rate_limit(
|
||||
confluence_client.get_user_details_by_accountid
|
||||
)
|
||||
try:
|
||||
return confluence_client.get_user_details_by_accountid(user_id).get(
|
||||
"displayName", user_not_found
|
||||
)
|
||||
return get_user_details_by_accountid(user_id).get("displayName", user_not_found)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Unable to get the User Display Name with the id: '{user_id}' - {e}"
|
||||
@ -144,12 +148,16 @@ def _comment_dfs(
|
||||
comment_pages: Collection[dict[str, Any]],
|
||||
confluence_client: Confluence,
|
||||
) -> str:
|
||||
get_page_child_by_type = make_confluence_call_handle_rate_limit(
|
||||
confluence_client.get_page_child_by_type
|
||||
)
|
||||
|
||||
for comment_page in comment_pages:
|
||||
comment_html = comment_page["body"]["storage"]["value"]
|
||||
comments_str += "\nComment:\n" + parse_html_page(
|
||||
comment_html, confluence_client
|
||||
)
|
||||
child_comment_pages = confluence_client.get_page_child_by_type(
|
||||
child_comment_pages = get_page_child_by_type(
|
||||
comment_page["id"],
|
||||
type="comment",
|
||||
start=None,
|
||||
@ -200,8 +208,11 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
||||
start_ind: int,
|
||||
) -> Collection[dict[str, Any]]:
|
||||
def _fetch(start_ind: int, batch_size: int) -> Collection[dict[str, Any]]:
|
||||
get_all_pages_from_space = make_confluence_call_handle_rate_limit(
|
||||
confluence_client.get_all_pages_from_space
|
||||
)
|
||||
try:
|
||||
return confluence_client.get_all_pages_from_space(
|
||||
return get_all_pages_from_space(
|
||||
self.space,
|
||||
start=start_ind,
|
||||
limit=batch_size,
|
||||
@ -219,7 +230,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
||||
# Could be that one of the pages here failed due to this bug:
|
||||
# https://jira.atlassian.com/browse/CONFCLOUD-76433
|
||||
view_pages.extend(
|
||||
confluence_client.get_all_pages_from_space(
|
||||
get_all_pages_from_space(
|
||||
self.space,
|
||||
start=start_ind + i,
|
||||
limit=1,
|
||||
@ -233,7 +244,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
# Use view instead, which captures most info but is less complete
|
||||
view_pages.extend(
|
||||
confluence_client.get_all_pages_from_space(
|
||||
get_all_pages_from_space(
|
||||
self.space,
|
||||
start=start_ind + i,
|
||||
limit=1,
|
||||
@ -262,10 +273,13 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
||||
return pages
|
||||
|
||||
def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str:
|
||||
get_page_child_by_type = make_confluence_call_handle_rate_limit(
|
||||
confluence_client.get_page_child_by_type
|
||||
)
|
||||
try:
|
||||
comment_pages = cast(
|
||||
Collection[dict[str, Any]],
|
||||
confluence_client.get_page_child_by_type(
|
||||
get_page_child_by_type(
|
||||
page_id,
|
||||
type="comment",
|
||||
start=None,
|
||||
@ -284,8 +298,11 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
||||
return ""
|
||||
|
||||
def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str]:
|
||||
get_page_labels = make_confluence_call_handle_rate_limit(
|
||||
confluence_client.get_page_labels
|
||||
)
|
||||
try:
|
||||
labels_response = confluence_client.get_page_labels(page_id)
|
||||
labels_response = get_page_labels(page_id)
|
||||
return [label["name"] for label in labels_response["results"]]
|
||||
except Exception as e:
|
||||
if not self.continue_on_failure:
|
||||
|
34
backend/danswer/connectors/confluence/rate_limit_handler.py
Normal file
34
backend/danswer/connectors/confluence/rate_limit_handler.py
Normal file
@ -0,0 +1,34 @@
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
from typing import TypeVar
|
||||
|
||||
from requests import HTTPError
|
||||
from retry import retry
|
||||
|
||||
|
||||
F = TypeVar("F", bound=Callable[..., Any])
|
||||
|
||||
|
||||
class ConfluenceRateLimitError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def make_confluence_call_handle_rate_limit(confluence_call: F) -> F:
|
||||
@retry(
|
||||
exceptions=ConfluenceRateLimitError,
|
||||
tries=10,
|
||||
delay=1,
|
||||
max_delay=600, # 10 minutes
|
||||
backoff=2,
|
||||
jitter=1,
|
||||
)
|
||||
def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:
|
||||
try:
|
||||
return confluence_call(*args, **kwargs)
|
||||
except HTTPError as e:
|
||||
if e.response.status_code == 429:
|
||||
raise ConfluenceRateLimitError()
|
||||
raise
|
||||
|
||||
return cast(F, wrapped_call)
|
Loading…
x
Reference in New Issue
Block a user