mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-04 12:58:42 +02:00
jira daylight savings handling (#4797)
This commit is contained in:
@@ -21,6 +21,9 @@ from onyx.connectors.confluence.utils import datetime_from_string
|
|||||||
from onyx.connectors.confluence.utils import process_attachment
|
from onyx.connectors.confluence.utils import process_attachment
|
||||||
from onyx.connectors.confluence.utils import update_param_in_path
|
from onyx.connectors.confluence.utils import update_param_in_path
|
||||||
from onyx.connectors.confluence.utils import validate_attachment_filetype
|
from onyx.connectors.confluence.utils import validate_attachment_filetype
|
||||||
|
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||||
|
is_atlassian_date_error,
|
||||||
|
)
|
||||||
from onyx.connectors.exceptions import ConnectorValidationError
|
from onyx.connectors.exceptions import ConnectorValidationError
|
||||||
from onyx.connectors.exceptions import CredentialExpiredError
|
from onyx.connectors.exceptions import CredentialExpiredError
|
||||||
from onyx.connectors.exceptions import InsufficientPermissionsError
|
from onyx.connectors.exceptions import InsufficientPermissionsError
|
||||||
@@ -76,10 +79,6 @@ ONE_DAY = ONE_HOUR * 24
|
|||||||
MAX_CACHED_IDS = 100
|
MAX_CACHED_IDS = 100
|
||||||
|
|
||||||
|
|
||||||
def _should_propagate_error(e: Exception) -> bool:
|
|
||||||
return "field 'updated' is invalid" in str(e)
|
|
||||||
|
|
||||||
|
|
||||||
class ConfluenceCheckpoint(ConnectorCheckpoint):
|
class ConfluenceCheckpoint(ConnectorCheckpoint):
|
||||||
|
|
||||||
next_page_url: str | None
|
next_page_url: str | None
|
||||||
@@ -367,7 +366,7 @@ class ConfluenceConnector(
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
|
logger.error(f"Error converting page {page.get('id', 'unknown')}: {e}")
|
||||||
if _should_propagate_error(e):
|
if is_atlassian_date_error(e): # propagate error to be caught and retried
|
||||||
raise
|
raise
|
||||||
return ConnectorFailure(
|
return ConnectorFailure(
|
||||||
failed_document=DocumentFailure(
|
failed_document=DocumentFailure(
|
||||||
@@ -446,7 +445,9 @@ class ConfluenceConnector(
|
|||||||
f"Failed to extract/summarize attachment {attachment['title']}",
|
f"Failed to extract/summarize attachment {attachment['title']}",
|
||||||
exc_info=e,
|
exc_info=e,
|
||||||
)
|
)
|
||||||
if _should_propagate_error(e):
|
if is_atlassian_date_error(
|
||||||
|
e
|
||||||
|
): # propagate error to be caught and retried
|
||||||
raise
|
raise
|
||||||
return ConnectorFailure(
|
return ConnectorFailure(
|
||||||
failed_document=DocumentFailure(
|
failed_document=DocumentFailure(
|
||||||
@@ -536,7 +537,7 @@ class ConfluenceConnector(
|
|||||||
try:
|
try:
|
||||||
return self._fetch_document_batches(checkpoint, start, end)
|
return self._fetch_document_batches(checkpoint, start, end)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if _should_propagate_error(e) and start is not None:
|
if is_atlassian_date_error(e) and start is not None:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Confluence says we provided an invalid 'updated' field. This may indicate"
|
"Confluence says we provided an invalid 'updated' field. This may indicate"
|
||||||
"a real issue, but can also appear during edge cases like daylight"
|
"a real issue, but can also appear during edge cases like daylight"
|
||||||
|
@@ -86,3 +86,7 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:
|
|||||||
# Used for development
|
# Used for development
|
||||||
base_domain = CONNECTOR_LOCALHOST_OVERRIDE
|
base_domain = CONNECTOR_LOCALHOST_OVERRIDE
|
||||||
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
|
return f"{base_domain.strip('/')}/connector/oauth/callback/{connector_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def is_atlassian_date_error(e: Exception) -> bool:
|
||||||
|
return "field 'updated' is invalid" in str(e)
|
||||||
|
@@ -12,6 +12,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
|||||||
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
|
from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
|
||||||
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
|
from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
|
||||||
from onyx.configs.constants import DocumentSource
|
from onyx.configs.constants import DocumentSource
|
||||||
|
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||||
|
is_atlassian_date_error,
|
||||||
|
)
|
||||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||||
from onyx.connectors.exceptions import ConnectorValidationError
|
from onyx.connectors.exceptions import ConnectorValidationError
|
||||||
from onyx.connectors.exceptions import CredentialExpiredError
|
from onyx.connectors.exceptions import CredentialExpiredError
|
||||||
@@ -40,6 +43,8 @@ from onyx.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
ONE_HOUR = 3600
|
||||||
|
|
||||||
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
|
JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
|
||||||
_JIRA_SLIM_PAGE_SIZE = 500
|
_JIRA_SLIM_PAGE_SIZE = 500
|
||||||
_JIRA_FULL_PAGE_SIZE = 50
|
_JIRA_FULL_PAGE_SIZE = 50
|
||||||
@@ -240,7 +245,17 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
|
|||||||
checkpoint: JiraConnectorCheckpoint,
|
checkpoint: JiraConnectorCheckpoint,
|
||||||
) -> CheckpointOutput[JiraConnectorCheckpoint]:
|
) -> CheckpointOutput[JiraConnectorCheckpoint]:
|
||||||
jql = self._get_jql_query(start, end)
|
jql = self._get_jql_query(start, end)
|
||||||
|
try:
|
||||||
|
return self._load_from_checkpoint(jql, checkpoint)
|
||||||
|
except Exception as e:
|
||||||
|
if is_atlassian_date_error(e):
|
||||||
|
jql = self._get_jql_query(start - ONE_HOUR, end)
|
||||||
|
return self._load_from_checkpoint(jql, checkpoint)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def _load_from_checkpoint(
|
||||||
|
self, jql: str, checkpoint: JiraConnectorCheckpoint
|
||||||
|
) -> CheckpointOutput[JiraConnectorCheckpoint]:
|
||||||
# Get the current offset from checkpoint or start at 0
|
# Get the current offset from checkpoint or start at 0
|
||||||
starting_offset = checkpoint.offset or 0
|
starting_offset = checkpoint.offset or 0
|
||||||
current_offset = starting_offset
|
current_offset = starting_offset
|
||||||
|
Reference in New Issue
Block a user