diff --git a/backend/onyx/connectors/airtable/airtable_connector.py b/backend/onyx/connectors/airtable/airtable_connector.py index 48b538aa1..50b63632d 100644 --- a/backend/onyx/connectors/airtable/airtable_connector.py +++ b/backend/onyx/connectors/airtable/airtable_connector.py @@ -70,17 +70,23 @@ class AirtableConnector(LoadConnector): self.base_id = base_id self.table_name_or_id = table_name_or_id self.batch_size = batch_size - self.airtable_client: AirtableApi | None = None + self._airtable_client: AirtableApi | None = None self.treat_all_non_attachment_fields_as_metadata = ( treat_all_non_attachment_fields_as_metadata ) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - self.airtable_client = AirtableApi(credentials["airtable_access_token"]) + self._airtable_client = AirtableApi(credentials["airtable_access_token"]) return None - @staticmethod + @property + def airtable_client(self) -> AirtableApi: + if not self._airtable_client: + raise AirtableClientNotSetUpError() + return self._airtable_client + def _extract_field_values( + self, field_id: str, field_info: Any, field_type: str, @@ -120,12 +126,29 @@ class AirtableConnector(LoadConnector): backoff=2, max_delay=10, ) - def get_attachment_with_retry(url: str) -> bytes | None: - attachment_response = requests.get(url) - attachment_response.raise_for_status() - return attachment_response.content + def get_attachment_with_retry(url: str, record_id: str) -> bytes | None: + try: + attachment_response = requests.get(url) + attachment_response.raise_for_status() + return attachment_response.content + except requests.exceptions.HTTPError as e: + if e.response.status_code == 410: + # Re-fetch the record to get a fresh URL + refreshed_record = self.airtable_client.table( + base_id, table_id + ).get(record_id) + for refreshed_attachment in refreshed_record["fields"].get( + field_id, [] + ): + if refreshed_attachment.get("filename") == filename: + new_url = refreshed_attachment.get("url") + if new_url: + attachment_response = requests.get(new_url) + attachment_response.raise_for_status() + return attachment_response.content + raise - attachment_content = get_attachment_with_retry(url) + attachment_content = get_attachment_with_retry(url, record_id) if attachment_content: try: file_ext = get_file_ext(filename)