mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 05:15:12 +02:00
jira daylight savings handling (#4797)
This commit is contained in:
@@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string
|
||||
from onyx.connectors.confluence.utils import process_attachment
|
||||
from onyx.connectors.confluence.utils import update_param_in_path
|
||||
from onyx.connectors.confluence.utils import validate_attachment_filetype
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
is_atlassian_date_error,
|
||||
)
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.exceptions import CredentialExpiredError
|
||||
from onyx.connectors.exceptions import InsufficientPermissionsError
|
||||
@@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24
|
||||
MAX_CACHED_IDS = 100
|
||||
|
||||
|
||||
def _should_propagate_error(e: Exception) -> bool:
|
||||
return "field 'updated' is invalid" in str(e)
|
||||
|
||||
|
||||
class ConfluenceCheckpoint(ConnectorCheckpoint):
|
||||
|
||||
next_page_url: str | None
|
||||
@@ -367,7 +366,7 @@ class ConfluenceConnector(
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
|
||||
if _should_propagate_error(e):
|
||||
if is_atlassian_date_error(e): # propagate error to be caught and retried
|
||||
raise
|
||||
return ConnectorFailure(
|
||||
failed_document=DocumentFailure(
|
||||
@@ -446,7 +445,9 @@ class ConfluenceConnector(
|
||||
f"Failed to extract/summarize attachment {attachment['title']}",
|
||||
exc_info=e,
|
||||
)
|
||||
if _should_propagate_error(e):
|
||||
if is_atlassian_date_error(
|
||||
e
|
||||
): # propagate error to be caught and retried
|
||||
raise
|
||||
return ConnectorFailure(
|
||||
failed_document=DocumentFailure(
|
||||
@@ -536,7 +537,7 @@ class ConfluenceConnector(
|
||||
try:
|
||||
return self._fetch_document_batches(checkpoint, start, end)
|
||||
except Exception as e:
|
||||
if _should_propagate_error(e) and start is not None:
|
||||
if is_atlassian_date_error(e) and start is not None:
|
||||
logger.warning(
|
||||
"Confluence says we provided an invalid 'updated' field. This may indicate"
|
||||
"a real issue, but can also appear during edge cases like daylight"
|
||||
|
@@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
|
||||
# Used for development
|
||||
base_domain = CONNECTOR_LOCALHOST_OVERRIDE
|
||||
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
|
||||
|
||||
|
||||
def is_atlassian_date_error(e: Exception) -> bool:
|
||||
return "field 'updated' is invalid" in str(e)
|
||||
|
@@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
|
||||
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
is_atlassian_date_error,
|
||||
)
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.exceptions import CredentialExpiredError
|
||||
@@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
ONE_HOUR = 3600
|
||||
|
||||
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
|
||||
_JIRA_SLIM_PAGE_SIZE = 500
|
||||
_JIRA_FULL_PAGE_SIZE = 50
|
||||
@@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
|
||||
checkpoint: JiraConnectorCheckpoint,
|
||||
) -> CheckpointOutput[JiraConnectorCheckpoint]:
|
||||
jql = self._get_jql_query(start, end)
|
||||
try:
|
||||
return self._load_from_checkpoint(jql, checkpoint)
|
||||
except Exception as e:
|
||||
if is_atlassian_date_error(e):
|
||||
jql = self._get_jql_query(start - ONE_HOUR, end)
|
||||
return self._load_from_checkpoint(jql, checkpoint)
|
||||
raise e
|
||||
|
||||
def _load_from_checkpoint(
|
||||
self, jql: str, checkpoint: JiraConnectorCheckpoint
|
||||
) -> CheckpointOutput[JiraConnectorCheckpoint]:
|
||||
# Get the current offset from checkpoint or start at 0
|
||||
starting_offset = checkpoint.offset or 0
|
||||
current_offset = starting_offset
|
||||
|
Reference in New Issue
Block a user