From 71712df3202bb6a31d20756cbcaf972b8bfac09e Mon Sep 17 00:00:00 2001 From: Evan Lohn Date: Fri, 30 May 2025 15:13:38 -0400 Subject: [PATCH] jira daylight savings handling (#4797) --- backend/onyx/connectors/confluence/connector.py | 15 ++++++++------- .../cross_connector_utils/miscellaneous_utils.py | 4 ++++ backend/onyx/connectors/onyx_jira/connector.py | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/backend/onyx/connectors/confluence/connector.py b/backend/onyx/connectors/confluence/connector.py index 208c3f5ae523..503cab542a65 100644 --- a/backend/onyx/connectors/confluence/connector.py +++ b/backend/onyx/connectors/confluence/connector.py @@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string from onyx.connectors.confluence.utils import process_attachment from onyx.connectors.confluence.utils import update_param_in_path from onyx.connectors.confluence.utils import validate_attachment_filetype +from onyx.connectors.cross_connector_utils.miscellaneous_utils import ( + is_atlassian_date_error, +) from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.exceptions import CredentialExpiredError from onyx.connectors.exceptions import InsufficientPermissionsError @@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24 MAX_CACHED_IDS = 100 -def _should_propagate_error(e: Exception) -> bool: - return "field 'updated' is invalid" in str(e) - - class ConfluenceCheckpoint(ConnectorCheckpoint): next_page_url: str | None @@ -367,7 +366,7 @@ class ConfluenceConnector( ) except Exception as e: logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}") - if _should_propagate_error(e): + if is_atlassian_date_error(e): # propagate error to be caught and retried raise return ConnectorFailure( failed_document=DocumentFailure( @@ -446,7 +445,9 @@ class ConfluenceConnector( f"Failed to extract/summarize attachment {attachment['title']}", exc_info=e, ) - if _should_propagate_error(e): + if is_atlassian_date_error( + e + ): # propagate error to be caught and retried raise return ConnectorFailure( failed_document=DocumentFailure( @@ -536,7 +537,7 @@ class ConfluenceConnector( try: return self._fetch_document_batches(checkpoint, start, end) except Exception as e: - if _should_propagate_error(e) and start is not None: + if is_atlassian_date_error(e) and start is not None: logger.warning( "Confluence says we provided an invalid 'updated' field. This may indicate" "a real issue, but can also appear during edge cases like daylight" diff --git a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py index d32c4847466c..6eb82ea5f733 100644 --- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py +++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py @@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str: # Used for development base_domain = CONNECTOR_LOCALHOST_OVERRIDE return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}" + + +def is_atlassian_date_error(e: Exception) -> bool: + return "field 'updated' is invalid" in str(e) diff --git a/backend/onyx/connectors/onyx_jira/connector.py b/backend/onyx/connectors/onyx_jira/connector.py index 3e29d3acd4f0..16bb271463e6 100644 --- a/backend/onyx/connectors/onyx_jira/connector.py +++ b/backend/onyx/connectors/onyx_jira/connector.py @@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE from onyx.configs.constants import DocumentSource +from onyx.connectors.cross_connector_utils.miscellaneous_utils import ( + is_atlassian_date_error, +) from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.exceptions import CredentialExpiredError @@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger logger = setup_logger() +ONE_HOUR = 3600 + JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2" _JIRA_SLIM_PAGE_SIZE = 500 _JIRA_FULL_PAGE_SIZE = 50 @@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto checkpoint: JiraConnectorCheckpoint, ) -> CheckpointOutput[JiraConnectorCheckpoint]: jql = self._get_jql_query(start, end) + try: + return self._load_from_checkpoint(jql, checkpoint) + except Exception as e: + if is_atlassian_date_error(e): + jql = self._get_jql_query(start - ONE_HOUR, end) + return self._load_from_checkpoint(jql, checkpoint) + raise e + def _load_from_checkpoint( + self, jql: str, checkpoint: JiraConnectorCheckpoint + ) -> CheckpointOutput[JiraConnectorCheckpoint]: # Get the current offset from checkpoint or start at 0 starting_offset = checkpoint.offset or 0 current_offset = starting_offset