diff --git a/backend/onyx/connectors/airtable/airtable_connector.py b/backend/onyx/connectors/airtable/airtable_connector.py index c4d0e63d901..aaf88acacf9 100644 --- a/backend/onyx/connectors/airtable/airtable_connector.py +++ b/backend/onyx/connectors/airtable/airtable_connector.py @@ -65,10 +65,25 @@ class AirtableConnector(LoadConnector): base_id: str, table_name_or_id: str, treat_all_non_attachment_fields_as_metadata: bool = False, + view_id: str | None = None, + share_id: str | None = None, batch_size: int = INDEX_BATCH_SIZE, ) -> None: + """Initialize an AirtableConnector. + + Args: + base_id: The ID of the Airtable base to connect to + table_name_or_id: The name or ID of the table to index + treat_all_non_attachment_fields_as_metadata: If True, all fields except attachments will be treated as metadata. + If False, only fields with types in DEFAULT_METADATA_FIELD_TYPES will be treated as metadata. + view_id: Optional ID of a specific view to use + share_id: Optional ID of a "share" to use for generating record URLs (https://airtable.com/developers/web/api/list-shares) + batch_size: Number of records to process in each batch + """ self.base_id = base_id self.table_name_or_id = table_name_or_id + self.view_id = view_id + self.share_id = share_id self.batch_size = batch_size self._airtable_client: AirtableApi | None = None self.treat_all_non_attachment_fields_as_metadata = ( @@ -85,6 +100,39 @@ class AirtableConnector(LoadConnector): raise AirtableClientNotSetUpError() return self._airtable_client + @classmethod + def _get_record_url( + cls, + base_id: str, + table_id: str, + record_id: str, + share_id: str | None, + view_id: str | None, + field_id: str | None = None, + attachment_id: str | None = None, + ) -> str: + """Constructs the URL for a record, optionally including field and attachment IDs + + Full possible structure is: + + https://airtable.com/BASE_ID/SHARE_ID/TABLE_ID/VIEW_ID/RECORD_ID/FIELD_ID/ATTACHMENT_ID + """ + # If we have a shared link, use that view for better UX + if share_id: + base_url = f"https://airtable.com/{base_id}/{share_id}/{table_id}" + else: + base_url = f"https://airtable.com/{base_id}/{table_id}" + + if view_id: + base_url = f"{base_url}/{view_id}" + + base_url = f"{base_url}/{record_id}" + + if field_id and attachment_id: + return f"{base_url}/{field_id}/{attachment_id}?blocks=hide" + + return base_url + def _extract_field_values( self, field_id: str, @@ -110,8 +158,10 @@ class AirtableConnector(LoadConnector): if field_type == "multipleRecordLinks": return [] - # default link to use for non-attachment fields - default_link = f"https://airtable.com/{base_id}/{table_id}/{record_id}" + # Get the base URL for this record + default_link = self._get_record_url( + base_id, table_id, record_id, self.share_id, self.view_id or view_id + ) if field_type == "multipleAttachments": attachment_texts: list[tuple[str, str]] = [] @@ -165,17 +215,16 @@ class AirtableConnector(LoadConnector): extension=file_ext, ) if attachment_text: - # slightly nicer loading experience if we can specify the view ID - if view_id: - attachment_link = ( - f"https://airtable.com/{base_id}/{table_id}/{view_id}/{record_id}" - f"/{field_id}/{attachment_id}?blocks=hide" - ) - else: - attachment_link = ( - f"https://airtable.com/{base_id}/{table_id}/{record_id}" - f"/{field_id}/{attachment_id}?blocks=hide" - ) + # Use the helper method to construct attachment URLs + attachment_link = self._get_record_url( + base_id, + table_id, + record_id, + self.share_id, + self.view_id or view_id, + field_id, + attachment_id, + ) attachment_texts.append( (f"{filename}:\n{attachment_text}", attachment_link) ) diff --git a/backend/tests/daily/connectors/airtable/test_airtable_basic.py b/backend/tests/daily/connectors/airtable/test_airtable_basic.py index 6610d91d6f6..788543fe073 100644 --- a/backend/tests/daily/connectors/airtable/test_airtable_basic.py +++ b/backend/tests/daily/connectors/airtable/test_airtable_basic.py @@ -9,6 +9,8 @@ from onyx.connectors.airtable.airtable_connector import AirtableConnector from onyx.connectors.models import Document from onyx.connectors.models import Section +BASE_VIEW_ID = "viwVUEJjWPd8XYjh8" + class AirtableConfig(BaseModel): base_id: str @@ -46,6 +48,8 @@ def create_test_document( days_since_status_change: int | None, attachments: list[tuple[str, str]] | None = None, all_fields_as_metadata: bool = False, + share_id: str | None = None, + view_id: str | None = None, ) -> Document: base_id = os.environ.get("AIRTABLE_TEST_BASE_ID") table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID") @@ -60,7 +64,13 @@ def create_test_document( f"Required environment variables not set: {', '.join(missing_vars)}. " "These variables are required to run Airtable connector tests." ) - link_base = f"https://airtable.com/{base_id}/{table_id}" + link_base = f"https://airtable.com/{base_id}" + if share_id: + link_base = f"{link_base}/{share_id}" + link_base = f"{link_base}/{table_id}" + if view_id: + link_base = f"{link_base}/{view_id}" + sections = [] if not all_fields_as_metadata: @@ -214,6 +224,7 @@ def test_airtable_connector_basic( assignee="Chris Weaver (chris@onyx.app)", submitted_by="Chris Weaver (chris@onyx.app)", all_fields_as_metadata=False, + view_id=BASE_VIEW_ID, ), create_test_document( id="reccSlIA4pZEFxPBg", @@ -234,6 +245,7 @@ def test_airtable_connector_basic( ) ], all_fields_as_metadata=False, + view_id=BASE_VIEW_ID, ), ] @@ -285,6 +297,81 @@ def test_airtable_connector_all_metadata( ) ], all_fields_as_metadata=True, + view_id=BASE_VIEW_ID, + ), + ] + + # Compare documents using the utility function + compare_documents(doc_batch, expected_docs) + + +def test_airtable_connector_with_share_and_view( + mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig +) -> None: + """Test behavior when using share_id and view_id for URL generation.""" + SHARE_ID = "shrkfjEzDmLaDtK83" + + connector = AirtableConnector( + base_id=airtable_config.base_id, + table_name_or_id=airtable_config.table_identifier, + treat_all_non_attachment_fields_as_metadata=False, + share_id=SHARE_ID, + view_id=BASE_VIEW_ID, + ) + connector.load_credentials( + { + "airtable_access_token": airtable_config.access_token, + } + ) + doc_batch_generator = connector.load_from_state() + doc_batch = next(doc_batch_generator) + with pytest.raises(StopIteration): + next(doc_batch_generator) + + assert len(doc_batch) == 2 + + expected_docs = [ + create_test_document( + id="rec8BnxDLyWeegOuO", + title="Slow Internet", + description="The internet connection is very slow.", + priority="Medium", + status="In Progress", + ticket_id="2", + created_time="2024-12-24T21:02:49.000Z", + status_last_changed="2024-12-24T21:02:49.000Z", + days_since_status_change=0, + assignee="Chris Weaver (chris@onyx.app)", + submitted_by="Chris Weaver (chris@onyx.app)", + all_fields_as_metadata=False, + share_id=SHARE_ID, + view_id=BASE_VIEW_ID, + ), + create_test_document( + id="reccSlIA4pZEFxPBg", + title="Printer Issue", + description="The office printer is not working.", + priority="High", + status="Open", + ticket_id="1", + created_time="2024-12-24T21:02:49.000Z", + status_last_changed="2024-12-24T21:02:49.000Z", + days_since_status_change=0, + assignee="Chris Weaver (chris@onyx.app)", + submitted_by="Chris Weaver (chris@onyx.app)", + attachments=[ + ( + "Test.pdf:\ntesting!!!", + ( + f"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/" + f"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/" + "fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide" + ), + ) + ], + all_fields_as_metadata=False, + share_id=SHARE_ID, + view_id=BASE_VIEW_ID, ), ] diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index 2478e78560f..58958c7422d 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -1115,7 +1115,24 @@ For example, specifying .*-support.* as a "channel" will cause the connector to optional: false, }, ], - advanced_values: [], + advanced_values: [ + { + type: "text", + label: "View ID", + name: "view_id", + optional: true, + description: + "If you need to link to a specific View, put that ID here e.g. viwVUEJjWPd8XYjh8.", + }, + { + type: "text", + label: "Share ID", + name: "share_id", + optional: true, + description: + "If you need to link to a specific Share, put that ID here e.g. shrkfjEzDmLaDtK83.", + }, + ], overrideDefaultFreq: 60 * 60 * 24, }, };