Support share/view IDs for Airtable (#3967)

This commit is contained in:
Chris Weaver
2025-02-11 16:19:38 -08:00
committed by GitHub
parent f9485b1325
commit 037943c6ff
3 changed files with 168 additions and 15 deletions

View File

@ -65,10 +65,25 @@ class AirtableConnector(LoadConnector):
base_id: str, base_id: str,
table_name_or_id: str, table_name_or_id: str,
treat_all_non_attachment_fields_as_metadata: bool = False, treat_all_non_attachment_fields_as_metadata: bool = False,
view_id: str | None = None,
share_id: str | None = None,
batch_size: int = INDEX_BATCH_SIZE, batch_size: int = INDEX_BATCH_SIZE,
) -> None: ) -> None:
"""Initialize an AirtableConnector.
Args:
base_id: The ID of the Airtable base to connect to
table_name_or_id: The name or ID of the table to index
treat_all_non_attachment_fields_as_metadata: If True, all fields except attachments will be treated as metadata.
If False, only fields with types in DEFAULT_METADATA_FIELD_TYPES will be treated as metadata.
view_id: Optional ID of a specific view to use
share_id: Optional ID of a "share" to use for generating record URLs (https://airtable.com/developers/web/api/list-shares)
batch_size: Number of records to process in each batch
"""
self.base_id = base_id self.base_id = base_id
self.table_name_or_id = table_name_or_id self.table_name_or_id = table_name_or_id
self.view_id = view_id
self.share_id = share_id
self.batch_size = batch_size self.batch_size = batch_size
self._airtable_client: AirtableApi | None = None self._airtable_client: AirtableApi | None = None
self.treat_all_non_attachment_fields_as_metadata = ( self.treat_all_non_attachment_fields_as_metadata = (
@ -85,6 +100,39 @@ class AirtableConnector(LoadConnector):
raise AirtableClientNotSetUpError() raise AirtableClientNotSetUpError()
return self._airtable_client return self._airtable_client
@classmethod
def _get_record_url(
cls,
base_id: str,
table_id: str,
record_id: str,
share_id: str | None,
view_id: str | None,
field_id: str | None = None,
attachment_id: str | None = None,
) -> str:
"""Constructs the URL for a record, optionally including field and attachment IDs
Full possible structure is:
https://airtable.com/BASE_ID/SHARE_ID/TABLE_ID/VIEW_ID/RECORD_ID/FIELD_ID/ATTACHMENT_ID
"""
# If we have a shared link, use that view for better UX
if share_id:
base_url = f"https://airtable.com/{base_id}/{share_id}/{table_id}"
else:
base_url = f"https://airtable.com/{base_id}/{table_id}"
if view_id:
base_url = f"{base_url}/{view_id}"
base_url = f"{base_url}/{record_id}"
if field_id and attachment_id:
return f"{base_url}/{field_id}/{attachment_id}?blocks=hide"
return base_url
def _extract_field_values( def _extract_field_values(
self, self,
field_id: str, field_id: str,
@ -110,8 +158,10 @@ class AirtableConnector(LoadConnector):
if field_type == "multipleRecordLinks": if field_type == "multipleRecordLinks":
return [] return []
# default link to use for non-attachment fields # Get the base URL for this record
default_link = f"https://airtable.com/{base_id}/{table_id}/{record_id}" default_link = self._get_record_url(
base_id, table_id, record_id, self.share_id, self.view_id or view_id
)
if field_type == "multipleAttachments": if field_type == "multipleAttachments":
attachment_texts: list[tuple[str, str]] = [] attachment_texts: list[tuple[str, str]] = []
@ -165,16 +215,15 @@ class AirtableConnector(LoadConnector):
extension=file_ext, extension=file_ext,
) )
if attachment_text: if attachment_text:
# slightly nicer loading experience if we can specify the view ID # Use the helper method to construct attachment URLs
if view_id: attachment_link = self._get_record_url(
attachment_link = ( base_id,
f"https://airtable.com/{base_id}/{table_id}/{view_id}/{record_id}" table_id,
f"/{field_id}/{attachment_id}?blocks=hide" record_id,
) self.share_id,
else: self.view_id or view_id,
attachment_link = ( field_id,
f"https://airtable.com/{base_id}/{table_id}/{record_id}" attachment_id,
f"/{field_id}/{attachment_id}?blocks=hide"
) )
attachment_texts.append( attachment_texts.append(
(f"{filename}:\n{attachment_text}", attachment_link) (f"{filename}:\n{attachment_text}", attachment_link)

View File

@ -9,6 +9,8 @@ from onyx.connectors.airtable.airtable_connector import AirtableConnector
from onyx.connectors.models import Document from onyx.connectors.models import Document
from onyx.connectors.models import Section from onyx.connectors.models import Section
BASE_VIEW_ID = "viwVUEJjWPd8XYjh8"
class AirtableConfig(BaseModel): class AirtableConfig(BaseModel):
base_id: str base_id: str
@ -46,6 +48,8 @@ def create_test_document(
days_since_status_change: int | None, days_since_status_change: int | None,
attachments: list[tuple[str, str]] | None = None, attachments: list[tuple[str, str]] | None = None,
all_fields_as_metadata: bool = False, all_fields_as_metadata: bool = False,
share_id: str | None = None,
view_id: str | None = None,
) -> Document: ) -> Document:
base_id = os.environ.get("AIRTABLE_TEST_BASE_ID") base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID") table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
@ -60,7 +64,13 @@ def create_test_document(
f"Required environment variables not set: {', '.join(missing_vars)}. " f"Required environment variables not set: {', '.join(missing_vars)}. "
"These variables are required to run Airtable connector tests." "These variables are required to run Airtable connector tests."
) )
link_base = f"https://airtable.com/{base_id}/{table_id}" link_base = f"https://airtable.com/{base_id}"
if share_id:
link_base = f"{link_base}/{share_id}"
link_base = f"{link_base}/{table_id}"
if view_id:
link_base = f"{link_base}/{view_id}"
sections = [] sections = []
if not all_fields_as_metadata: if not all_fields_as_metadata:
@ -214,6 +224,7 @@ def test_airtable_connector_basic(
assignee="Chris Weaver (chris@onyx.app)", assignee="Chris Weaver (chris@onyx.app)",
submitted_by="Chris Weaver (chris@onyx.app)", submitted_by="Chris Weaver (chris@onyx.app)",
all_fields_as_metadata=False, all_fields_as_metadata=False,
view_id=BASE_VIEW_ID,
), ),
create_test_document( create_test_document(
id="reccSlIA4pZEFxPBg", id="reccSlIA4pZEFxPBg",
@ -234,6 +245,7 @@ def test_airtable_connector_basic(
) )
], ],
all_fields_as_metadata=False, all_fields_as_metadata=False,
view_id=BASE_VIEW_ID,
), ),
] ]
@ -285,6 +297,81 @@ def test_airtable_connector_all_metadata(
) )
], ],
all_fields_as_metadata=True, all_fields_as_metadata=True,
view_id=BASE_VIEW_ID,
),
]
# Compare documents using the utility function
compare_documents(doc_batch, expected_docs)
def test_airtable_connector_with_share_and_view(
mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
) -> None:
"""Test behavior when using share_id and view_id for URL generation."""
SHARE_ID = "shrkfjEzDmLaDtK83"
connector = AirtableConnector(
base_id=airtable_config.base_id,
table_name_or_id=airtable_config.table_identifier,
treat_all_non_attachment_fields_as_metadata=False,
share_id=SHARE_ID,
view_id=BASE_VIEW_ID,
)
connector.load_credentials(
{
"airtable_access_token": airtable_config.access_token,
}
)
doc_batch_generator = connector.load_from_state()
doc_batch = next(doc_batch_generator)
with pytest.raises(StopIteration):
next(doc_batch_generator)
assert len(doc_batch) == 2
expected_docs = [
create_test_document(
id="rec8BnxDLyWeegOuO",
title="Slow Internet",
description="The internet connection is very slow.",
priority="Medium",
status="In Progress",
ticket_id="2",
created_time="2024-12-24T21:02:49.000Z",
status_last_changed="2024-12-24T21:02:49.000Z",
days_since_status_change=0,
assignee="Chris Weaver (chris@onyx.app)",
submitted_by="Chris Weaver (chris@onyx.app)",
all_fields_as_metadata=False,
share_id=SHARE_ID,
view_id=BASE_VIEW_ID,
),
create_test_document(
id="reccSlIA4pZEFxPBg",
title="Printer Issue",
description="The office printer is not working.",
priority="High",
status="Open",
ticket_id="1",
created_time="2024-12-24T21:02:49.000Z",
status_last_changed="2024-12-24T21:02:49.000Z",
days_since_status_change=0,
assignee="Chris Weaver (chris@onyx.app)",
submitted_by="Chris Weaver (chris@onyx.app)",
attachments=[
(
"Test.pdf:\ntesting!!!",
(
f"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/"
f"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/"
"fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide"
),
)
],
all_fields_as_metadata=False,
share_id=SHARE_ID,
view_id=BASE_VIEW_ID,
), ),
] ]

View File

@ -1115,7 +1115,24 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
optional: false, optional: false,
}, },
], ],
advanced_values: [], advanced_values: [
{
type: "text",
label: "View ID",
name: "view_id",
optional: true,
description:
"If you need to link to a specific View, put that ID here e.g. viwVUEJjWPd8XYjh8.",
},
{
type: "text",
label: "Share ID",
name: "share_id",
optional: true,
description:
"If you need to link to a specific Share, put that ID here e.g. shrkfjEzDmLaDtK83.",
},
],
overrideDefaultFreq: 60 * 60 * 24, overrideDefaultFreq: 60 * 60 * 24,
}, },
}; };