mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-21 18:43:30 +02:00
Support share/view IDs for Airtable (#3967)
This commit is contained in:
@ -65,10 +65,25 @@ class AirtableConnector(LoadConnector):
|
|||||||
base_id: str,
|
base_id: str,
|
||||||
table_name_or_id: str,
|
table_name_or_id: str,
|
||||||
treat_all_non_attachment_fields_as_metadata: bool = False,
|
treat_all_non_attachment_fields_as_metadata: bool = False,
|
||||||
|
view_id: str | None = None,
|
||||||
|
share_id: str | None = None,
|
||||||
batch_size: int = INDEX_BATCH_SIZE,
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Initialize an AirtableConnector.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_id: The ID of the Airtable base to connect to
|
||||||
|
table_name_or_id: The name or ID of the table to index
|
||||||
|
treat_all_non_attachment_fields_as_metadata: If True, all fields except attachments will be treated as metadata.
|
||||||
|
If False, only fields with types in DEFAULT_METADATA_FIELD_TYPES will be treated as metadata.
|
||||||
|
view_id: Optional ID of a specific view to use
|
||||||
|
share_id: Optional ID of a "share" to use for generating record URLs (https://airtable.com/developers/web/api/list-shares)
|
||||||
|
batch_size: Number of records to process in each batch
|
||||||
|
"""
|
||||||
self.base_id = base_id
|
self.base_id = base_id
|
||||||
self.table_name_or_id = table_name_or_id
|
self.table_name_or_id = table_name_or_id
|
||||||
|
self.view_id = view_id
|
||||||
|
self.share_id = share_id
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self._airtable_client: AirtableApi | None = None
|
self._airtable_client: AirtableApi | None = None
|
||||||
self.treat_all_non_attachment_fields_as_metadata = (
|
self.treat_all_non_attachment_fields_as_metadata = (
|
||||||
@ -85,6 +100,39 @@ class AirtableConnector(LoadConnector):
|
|||||||
raise AirtableClientNotSetUpError()
|
raise AirtableClientNotSetUpError()
|
||||||
return self._airtable_client
|
return self._airtable_client
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_record_url(
|
||||||
|
cls,
|
||||||
|
base_id: str,
|
||||||
|
table_id: str,
|
||||||
|
record_id: str,
|
||||||
|
share_id: str | None,
|
||||||
|
view_id: str | None,
|
||||||
|
field_id: str | None = None,
|
||||||
|
attachment_id: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Constructs the URL for a record, optionally including field and attachment IDs
|
||||||
|
|
||||||
|
Full possible structure is:
|
||||||
|
|
||||||
|
https://airtable.com/BASE_ID/SHARE_ID/TABLE_ID/VIEW_ID/RECORD_ID/FIELD_ID/ATTACHMENT_ID
|
||||||
|
"""
|
||||||
|
# If we have a shared link, use that view for better UX
|
||||||
|
if share_id:
|
||||||
|
base_url = f"https://airtable.com/{base_id}/{share_id}/{table_id}"
|
||||||
|
else:
|
||||||
|
base_url = f"https://airtable.com/{base_id}/{table_id}"
|
||||||
|
|
||||||
|
if view_id:
|
||||||
|
base_url = f"{base_url}/{view_id}"
|
||||||
|
|
||||||
|
base_url = f"{base_url}/{record_id}"
|
||||||
|
|
||||||
|
if field_id and attachment_id:
|
||||||
|
return f"{base_url}/{field_id}/{attachment_id}?blocks=hide"
|
||||||
|
|
||||||
|
return base_url
|
||||||
|
|
||||||
def _extract_field_values(
|
def _extract_field_values(
|
||||||
self,
|
self,
|
||||||
field_id: str,
|
field_id: str,
|
||||||
@ -110,8 +158,10 @@ class AirtableConnector(LoadConnector):
|
|||||||
if field_type == "multipleRecordLinks":
|
if field_type == "multipleRecordLinks":
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# default link to use for non-attachment fields
|
# Get the base URL for this record
|
||||||
default_link = f"https://airtable.com/{base_id}/{table_id}/{record_id}"
|
default_link = self._get_record_url(
|
||||||
|
base_id, table_id, record_id, self.share_id, self.view_id or view_id
|
||||||
|
)
|
||||||
|
|
||||||
if field_type == "multipleAttachments":
|
if field_type == "multipleAttachments":
|
||||||
attachment_texts: list[tuple[str, str]] = []
|
attachment_texts: list[tuple[str, str]] = []
|
||||||
@ -165,16 +215,15 @@ class AirtableConnector(LoadConnector):
|
|||||||
extension=file_ext,
|
extension=file_ext,
|
||||||
)
|
)
|
||||||
if attachment_text:
|
if attachment_text:
|
||||||
# slightly nicer loading experience if we can specify the view ID
|
# Use the helper method to construct attachment URLs
|
||||||
if view_id:
|
attachment_link = self._get_record_url(
|
||||||
attachment_link = (
|
base_id,
|
||||||
f"https://airtable.com/{base_id}/{table_id}/{view_id}/{record_id}"
|
table_id,
|
||||||
f"/{field_id}/{attachment_id}?blocks=hide"
|
record_id,
|
||||||
)
|
self.share_id,
|
||||||
else:
|
self.view_id or view_id,
|
||||||
attachment_link = (
|
field_id,
|
||||||
f"https://airtable.com/{base_id}/{table_id}/{record_id}"
|
attachment_id,
|
||||||
f"/{field_id}/{attachment_id}?blocks=hide"
|
|
||||||
)
|
)
|
||||||
attachment_texts.append(
|
attachment_texts.append(
|
||||||
(f"{filename}:\n{attachment_text}", attachment_link)
|
(f"{filename}:\n{attachment_text}", attachment_link)
|
||||||
|
@ -9,6 +9,8 @@ from onyx.connectors.airtable.airtable_connector import AirtableConnector
|
|||||||
from onyx.connectors.models import Document
|
from onyx.connectors.models import Document
|
||||||
from onyx.connectors.models import Section
|
from onyx.connectors.models import Section
|
||||||
|
|
||||||
|
BASE_VIEW_ID = "viwVUEJjWPd8XYjh8"
|
||||||
|
|
||||||
|
|
||||||
class AirtableConfig(BaseModel):
|
class AirtableConfig(BaseModel):
|
||||||
base_id: str
|
base_id: str
|
||||||
@ -46,6 +48,8 @@ def create_test_document(
|
|||||||
days_since_status_change: int | None,
|
days_since_status_change: int | None,
|
||||||
attachments: list[tuple[str, str]] | None = None,
|
attachments: list[tuple[str, str]] | None = None,
|
||||||
all_fields_as_metadata: bool = False,
|
all_fields_as_metadata: bool = False,
|
||||||
|
share_id: str | None = None,
|
||||||
|
view_id: str | None = None,
|
||||||
) -> Document:
|
) -> Document:
|
||||||
base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
|
base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
|
||||||
table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
|
table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
|
||||||
@ -60,7 +64,13 @@ def create_test_document(
|
|||||||
f"Required environment variables not set: {', '.join(missing_vars)}. "
|
f"Required environment variables not set: {', '.join(missing_vars)}. "
|
||||||
"These variables are required to run Airtable connector tests."
|
"These variables are required to run Airtable connector tests."
|
||||||
)
|
)
|
||||||
link_base = f"https://airtable.com/{base_id}/{table_id}"
|
link_base = f"https://airtable.com/{base_id}"
|
||||||
|
if share_id:
|
||||||
|
link_base = f"{link_base}/{share_id}"
|
||||||
|
link_base = f"{link_base}/{table_id}"
|
||||||
|
if view_id:
|
||||||
|
link_base = f"{link_base}/{view_id}"
|
||||||
|
|
||||||
sections = []
|
sections = []
|
||||||
|
|
||||||
if not all_fields_as_metadata:
|
if not all_fields_as_metadata:
|
||||||
@ -214,6 +224,7 @@ def test_airtable_connector_basic(
|
|||||||
assignee="Chris Weaver (chris@onyx.app)",
|
assignee="Chris Weaver (chris@onyx.app)",
|
||||||
submitted_by="Chris Weaver (chris@onyx.app)",
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
||||||
all_fields_as_metadata=False,
|
all_fields_as_metadata=False,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
),
|
),
|
||||||
create_test_document(
|
create_test_document(
|
||||||
id="reccSlIA4pZEFxPBg",
|
id="reccSlIA4pZEFxPBg",
|
||||||
@ -234,6 +245,7 @@ def test_airtable_connector_basic(
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
all_fields_as_metadata=False,
|
all_fields_as_metadata=False,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -285,6 +297,81 @@ def test_airtable_connector_all_metadata(
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
all_fields_as_metadata=True,
|
all_fields_as_metadata=True,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Compare documents using the utility function
|
||||||
|
compare_documents(doc_batch, expected_docs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_airtable_connector_with_share_and_view(
|
||||||
|
mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
|
||||||
|
) -> None:
|
||||||
|
"""Test behavior when using share_id and view_id for URL generation."""
|
||||||
|
SHARE_ID = "shrkfjEzDmLaDtK83"
|
||||||
|
|
||||||
|
connector = AirtableConnector(
|
||||||
|
base_id=airtable_config.base_id,
|
||||||
|
table_name_or_id=airtable_config.table_identifier,
|
||||||
|
treat_all_non_attachment_fields_as_metadata=False,
|
||||||
|
share_id=SHARE_ID,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
|
)
|
||||||
|
connector.load_credentials(
|
||||||
|
{
|
||||||
|
"airtable_access_token": airtable_config.access_token,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
doc_batch_generator = connector.load_from_state()
|
||||||
|
doc_batch = next(doc_batch_generator)
|
||||||
|
with pytest.raises(StopIteration):
|
||||||
|
next(doc_batch_generator)
|
||||||
|
|
||||||
|
assert len(doc_batch) == 2
|
||||||
|
|
||||||
|
expected_docs = [
|
||||||
|
create_test_document(
|
||||||
|
id="rec8BnxDLyWeegOuO",
|
||||||
|
title="Slow Internet",
|
||||||
|
description="The internet connection is very slow.",
|
||||||
|
priority="Medium",
|
||||||
|
status="In Progress",
|
||||||
|
ticket_id="2",
|
||||||
|
created_time="2024-12-24T21:02:49.000Z",
|
||||||
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
||||||
|
days_since_status_change=0,
|
||||||
|
assignee="Chris Weaver (chris@onyx.app)",
|
||||||
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
||||||
|
all_fields_as_metadata=False,
|
||||||
|
share_id=SHARE_ID,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
|
),
|
||||||
|
create_test_document(
|
||||||
|
id="reccSlIA4pZEFxPBg",
|
||||||
|
title="Printer Issue",
|
||||||
|
description="The office printer is not working.",
|
||||||
|
priority="High",
|
||||||
|
status="Open",
|
||||||
|
ticket_id="1",
|
||||||
|
created_time="2024-12-24T21:02:49.000Z",
|
||||||
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
||||||
|
days_since_status_change=0,
|
||||||
|
assignee="Chris Weaver (chris@onyx.app)",
|
||||||
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
||||||
|
attachments=[
|
||||||
|
(
|
||||||
|
"Test.pdf:\ntesting!!!",
|
||||||
|
(
|
||||||
|
f"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/"
|
||||||
|
f"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/"
|
||||||
|
"fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
all_fields_as_metadata=False,
|
||||||
|
share_id=SHARE_ID,
|
||||||
|
view_id=BASE_VIEW_ID,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1115,7 +1115,24 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
|
|||||||
optional: false,
|
optional: false,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
advanced_values: [],
|
advanced_values: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
label: "View ID",
|
||||||
|
name: "view_id",
|
||||||
|
optional: true,
|
||||||
|
description:
|
||||||
|
"If you need to link to a specific View, put that ID here e.g. viwVUEJjWPd8XYjh8.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
label: "Share ID",
|
||||||
|
name: "share_id",
|
||||||
|
optional: true,
|
||||||
|
description:
|
||||||
|
"If you need to link to a specific Share, put that ID here e.g. shrkfjEzDmLaDtK83.",
|
||||||
|
},
|
||||||
|
],
|
||||||
overrideDefaultFreq: 60 * 60 * 24,
|
overrideDefaultFreq: 60 * 60 * 24,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user