jira daylight savings handling (#4797)

This commit is contained in:
Evan Lohn
2025-05-30 15:13:38 -04:00
committed by Weves
parent e78637d632
commit 71712df320
3 changed files with 27 additions and 7 deletions

View File

@@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string
from onyx.connectors.confluence.utils import process_attachment from onyx.connectors.confluence.utils import process_attachment
from onyx.connectors.confluence.utils import update_param_in_path from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.confluence.utils import validate_attachment_filetype from onyx.connectors.confluence.utils import validate_attachment_filetype
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError from onyx.connectors.exceptions import InsufficientPermissionsError
@@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24
MAX_CACHED_IDS = 100 MAX_CACHED_IDS = 100
def _should_propagate_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)
class ConfluenceCheckpoint(ConnectorCheckpoint): class ConfluenceCheckpoint(ConnectorCheckpoint):
next_page_url: str | None next_page_url: str | None
@@ -367,7 +366,7 @@ class ConfluenceConnector(
) )
except Exception as e: except Exception as e:
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}") logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
if _should_propagate_error(e): if is_atlassian_date_error(e): # propagate error to be caught and retried
raise raise
return ConnectorFailure( return ConnectorFailure(
failed_document=DocumentFailure( failed_document=DocumentFailure(
@@ -446,7 +445,9 @@ class ConfluenceConnector(
f"Failed to extract/summarize attachment {attachment['title']}", f"Failed to extract/summarize attachment {attachment['title']}",
exc_info=e, exc_info=e,
) )
if _should_propagate_error(e): if is_atlassian_date_error(
e
): # propagate error to be caught and retried
raise raise
return ConnectorFailure( return ConnectorFailure(
failed_document=DocumentFailure( failed_document=DocumentFailure(
@@ -536,7 +537,7 @@ class ConfluenceConnector(
try: try:
return self._fetch_document_batches(checkpoint, start, end) return self._fetch_document_batches(checkpoint, start, end)
except Exception as e: except Exception as e:
if _should_propagate_error(e) and start is not None: if is_atlassian_date_error(e) and start is not None:
logger.warning( logger.warning(
"Confluence says we provided an invalid 'updated' field. This may indicate" "Confluence says we provided an invalid 'updated' field. This may indicate"
"a real issue, but can also appear during edge cases like daylight" "a real issue, but can also appear during edge cases like daylight"

View File

@@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
# Used for development # Used for development
base_domain = CONNECTOR_LOCALHOST_OVERRIDE base_domain = CONNECTOR_LOCALHOST_OVERRIDE
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}" return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
def is_atlassian_date_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)

View File

@@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError from onyx.connectors.exceptions import CredentialExpiredError
@@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
ONE_HOUR = 3600
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2" JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
_JIRA_SLIM_PAGE_SIZE = 500 _JIRA_SLIM_PAGE_SIZE = 500
_JIRA_FULL_PAGE_SIZE = 50 _JIRA_FULL_PAGE_SIZE = 50
@@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
checkpoint: JiraConnectorCheckpoint, checkpoint: JiraConnectorCheckpoint,
) -> CheckpointOutput[JiraConnectorCheckpoint]: ) -> CheckpointOutput[JiraConnectorCheckpoint]:
jql = self._get_jql_query(start, end) jql = self._get_jql_query(start, end)
try:
return self._load_from_checkpoint(jql, checkpoint)
except Exception as e:
if is_atlassian_date_error(e):
jql = self._get_jql_query(start - ONE_HOUR, end)
return self._load_from_checkpoint(jql, checkpoint)
raise e
def _load_from_checkpoint(
self, jql: str, checkpoint: JiraConnectorCheckpoint
) -> CheckpointOutput[JiraConnectorCheckpoint]:
# Get the current offset from checkpoint or start at 0 # Get the current offset from checkpoint or start at 0
starting_offset = checkpoint.offset or 0 starting_offset = checkpoint.offset or 0
current_offset = starting_offset current_offset = starting_offset