Handle expiring attachments

This commit is contained in:
Weves 2025-02-02 12:01:34 -08:00 committed by Chris Weaver
parent 569639eb90
commit 87bccc13cc

View File

@ -70,17 +70,23 @@ class AirtableConnector(LoadConnector):
self.base_id = base_id
self.table_name_or_id = table_name_or_id
self.batch_size = batch_size
self.airtable_client: AirtableApi | None = None
self._airtable_client: AirtableApi | None = None
self.treat_all_non_attachment_fields_as_metadata = (
treat_all_non_attachment_fields_as_metadata
)
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
self.airtable_client = AirtableApi(credentials["airtable_access_token"])
self._airtable_client = AirtableApi(credentials["airtable_access_token"])
return None
@staticmethod
@property
def airtable_client(self) -> AirtableApi:
if not self._airtable_client:
raise AirtableClientNotSetUpError()
return self._airtable_client
def _extract_field_values(
self,
field_id: str,
field_info: Any,
field_type: str,
@ -120,12 +126,29 @@ class AirtableConnector(LoadConnector):
backoff=2,
max_delay=10,
)
def get_attachment_with_retry(url: str) -> bytes | None:
attachment_response = requests.get(url)
attachment_response.raise_for_status()
return attachment_response.content
def get_attachment_with_retry(url: str, record_id: str) -> bytes | None:
try:
attachment_response = requests.get(url)
attachment_response.raise_for_status()
return attachment_response.content
except requests.exceptions.HTTPError as e:
if e.response.status_code == 410:
# Re-fetch the record to get a fresh URL
refreshed_record = self.airtable_client.table(
base_id, table_id
).get(record_id)
for refreshed_attachment in refreshed_record["fields"].get(
field_id, []
):
if refreshed_attachment.get("filename") == filename:
new_url = refreshed_attachment.get("url")
if new_url:
attachment_response = requests.get(new_url)
attachment_response.raise_for_status()
return attachment_response.content
raise
attachment_content = get_attachment_with_retry(url)
attachment_content = get_attachment_with_retry(url, record_id)
if attachment_content:
try:
file_ext = get_file_ext(filename)