mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-21 14:12:42 +02:00
feat: add option to treat all non-attachment fields as metadata in Airtable connector (#3817)
* feat: add option to treat all non-attachment fields as metadata in Airtable connector - Added new UI option 'treat_all_non_attachment_fields_as_metadata' - Updated backend logic to support treating all fields except attachments as metadata - Added tests for both default and all-metadata behaviors Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: handle missing environment variables gracefully in airtable tests Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: clean up test file and handle environment variables properly Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: add missing test fixture and fix formatting Co-Authored-By: Chris Weaver <chris@onyx.app> * chore: fix black formatting Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: add type annotation for metadata dict in airtable tests Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: add type annotation for mock_get_api_key fixture Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: update Generator import to use collections.abc Co-Authored-By: Chris Weaver <chris@onyx.app> * refactor: make treat_all_non_attachment_fields_as_metadata a direct required parameter - Move parameter from connector_config to direct class parameter - Place parameter right under table_name_or_id argument - Make parameter required in UI with no default value - Update tests to use new parameter structure Co-Authored-By: Chris Weaver <chris@onyx.app> * chore: fix black formatting Co-Authored-By: Chris Weaver <chris@onyx.app> * chore: rename _METADATA_FIELD_TYPES to DEFAULT_METADATA_FIELD_TYPES and clarify usage Co-Authored-By: Chris Weaver <chris@onyx.app> * chore: fix black formatting in docstring Co-Authored-By: Chris Weaver <chris@onyx.app> * test: make airtable tests fail loudly on missing env vars Co-Authored-By: Chris Weaver <chris@onyx.app> * style: fix black formatting in test file Co-Authored-By: Chris Weaver <chris@onyx.app> * style: add required newline between test functions Co-Authored-By: Chris Weaver <chris@onyx.app> * test: update error message pattern in parameter validation test Co-Authored-By: Chris Weaver <chris@onyx.app> * style: fix black formatting in test file Co-Authored-By: Chris Weaver <chris@onyx.app> * test: fix error message pattern in parameter validation test Co-Authored-By: Chris Weaver <chris@onyx.app> * style: fix line length in test file Co-Authored-By: Chris Weaver <chris@onyx.app> * test: simplify error message pattern in parameter validation test Co-Authored-By: Chris Weaver <chris@onyx.app> * test: add type validation test for treat_all_non_attachment_fields_as_metadata Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: add missing required parameter in test Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: remove parameter from test to properly validate it is required Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: add type validation for treat_all_non_attachment_fields_as_metadata parameter Co-Authored-By: Chris Weaver <chris@onyx.app> * style: fix black formatting in airtable_connector.py Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: update type validation test to handle mypy errors Co-Authored-By: Chris Weaver <chris@onyx.app> * fix: specify mypy ignore type for call-arg Co-Authored-By: Chris Weaver <chris@onyx.app> * Also handle rows w/o sections * style: fix black formatting in test assertion Co-Authored-By: Chris Weaver <chris@onyx.app> * add TODO * Remove unnecessary check * Fix test * Do not break existing airtable connectors --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Chris Weaver <chris@onyx.app> Co-authored-by: Weves <chrisweaver101@gmail.com>
This commit is contained in:
committed by
GitHub
parent
d2aea63573
commit
d903e5912a
@@ -20,9 +20,9 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
# NOTE: all are made lowercase to avoid case sensitivity issues
|
||||
# these are the field types that are considered metadata rather
|
||||
# than sections
|
||||
_METADATA_FIELD_TYPES = {
|
||||
# These field types are considered metadata by default when
|
||||
# treat_all_non_attachment_fields_as_metadata is False
|
||||
DEFAULT_METADATA_FIELD_TYPES = {
|
||||
"singlecollaborator",
|
||||
"collaborator",
|
||||
"createdby",
|
||||
@@ -60,12 +60,16 @@ class AirtableConnector(LoadConnector):
|
||||
self,
|
||||
base_id: str,
|
||||
table_name_or_id: str,
|
||||
treat_all_non_attachment_fields_as_metadata: bool = False,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
self.base_id = base_id
|
||||
self.table_name_or_id = table_name_or_id
|
||||
self.batch_size = batch_size
|
||||
self.airtable_client: AirtableApi | None = None
|
||||
self.treat_all_non_attachment_fields_as_metadata = (
|
||||
treat_all_non_attachment_fields_as_metadata
|
||||
)
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
self.airtable_client = AirtableApi(credentials["airtable_access_token"])
|
||||
@@ -166,8 +170,14 @@ class AirtableConnector(LoadConnector):
|
||||
return [(str(field_info), default_link)]
|
||||
|
||||
def _should_be_metadata(self, field_type: str) -> bool:
|
||||
"""Determine if a field type should be treated as metadata."""
|
||||
return field_type.lower() in _METADATA_FIELD_TYPES
|
||||
"""Determine if a field type should be treated as metadata.
|
||||
|
||||
When treat_all_non_attachment_fields_as_metadata is True, all fields except
|
||||
attachments are treated as metadata. Otherwise, only fields with types listed
|
||||
in DEFAULT_METADATA_FIELD_TYPES are treated as metadata."""
|
||||
if self.treat_all_non_attachment_fields_as_metadata:
|
||||
return field_type.lower() != "multipleattachments"
|
||||
return field_type.lower() in DEFAULT_METADATA_FIELD_TYPES
|
||||
|
||||
def _process_field(
|
||||
self,
|
||||
@@ -233,7 +243,7 @@ class AirtableConnector(LoadConnector):
|
||||
record: RecordDict,
|
||||
table_schema: TableSchema,
|
||||
primary_field_name: str | None,
|
||||
) -> Document:
|
||||
) -> Document | None:
|
||||
"""Process a single Airtable record into a Document.
|
||||
|
||||
Args:
|
||||
@@ -277,6 +287,10 @@ class AirtableConnector(LoadConnector):
|
||||
sections.extend(field_sections)
|
||||
metadata.update(field_metadata)
|
||||
|
||||
if not sections:
|
||||
logger.warning(f"No sections found for record {record_id}")
|
||||
return None
|
||||
|
||||
semantic_id = (
|
||||
f"{table_name}: {primary_field_value}"
|
||||
if primary_field_value
|
||||
@@ -320,7 +334,8 @@ class AirtableConnector(LoadConnector):
|
||||
table_schema=table_schema,
|
||||
primary_field_name=primary_field_name,
|
||||
)
|
||||
record_documents.append(document)
|
||||
if document:
|
||||
record_documents.append(document)
|
||||
|
||||
if len(record_documents) >= self.batch_size:
|
||||
yield record_documents
|
||||
|
Reference in New Issue
Block a user