jira daylight savings handling (#4797)

This commit is contained in:
Evan Lohn
2025-05-30 15:13:38 -04:00
committed by Weves
parent e78637d632
commit 71712df320
3 changed files with 27 additions and 7 deletions

View File

@@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string
from onyx.connectors.confluence.utils import process_attachment
from onyx.connectors.confluence.utils import update_param_in_path
from onyx.connectors.confluence.utils import validate_attachment_filetype
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
@@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24
MAX_CACHED_IDS = 100
def _should_propagate_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)
class ConfluenceCheckpoint(ConnectorCheckpoint):
next_page_url: str | None
@@ -367,7 +366,7 @@ class ConfluenceConnector(
)
except Exception as e:
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
if _should_propagate_error(e):
if is_atlassian_date_error(e): # propagate error to be caught and retried
raise
return ConnectorFailure(
failed_document=DocumentFailure(
@@ -446,7 +445,9 @@ class ConfluenceConnector(
f"Failed to extract/summarize attachment {attachment['title']}",
exc_info=e,
)
if _should_propagate_error(e):
if is_atlassian_date_error(
e
): # propagate error to be caught and retried
raise
return ConnectorFailure(
failed_document=DocumentFailure(
@@ -536,7 +537,7 @@ class ConfluenceConnector(
try:
return self._fetch_document_batches(checkpoint, start, end)
except Exception as e:
if _should_propagate_error(e) and start is not None:
if is_atlassian_date_error(e) and start is not None:
logger.warning(
"Confluence says we provided an invalid 'updated' field. This may indicate"
"a real issue, but can also appear during edge cases like daylight"

View File

@@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
# Used for development
base_domain = CONNECTOR_LOCALHOST_OVERRIDE
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
def is_atlassian_date_error(e: Exception) -> bool:
return "field 'updated' is invalid" in str(e)

View File

@@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
is_atlassian_date_error,
)
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.exceptions import CredentialExpiredError
@@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
ONE_HOUR = 3600
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
_JIRA_SLIM_PAGE_SIZE = 500
_JIRA_FULL_PAGE_SIZE = 50
@@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
checkpoint: JiraConnectorCheckpoint,
) -> CheckpointOutput[JiraConnectorCheckpoint]:
jql = self._get_jql_query(start, end)
try:
return self._load_from_checkpoint(jql, checkpoint)
except Exception as e:
if is_atlassian_date_error(e):
jql = self._get_jql_query(start - ONE_HOUR, end)
return self._load_from_checkpoint(jql, checkpoint)
raise e
def _load_from_checkpoint(
self, jql: str, checkpoint: JiraConnectorCheckpoint
) -> CheckpointOutput[JiraConnectorCheckpoint]:
# Get the current offset from checkpoint or start at 0
starting_offset = checkpoint.offset or 0
current_offset = starting_offset