mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-28 13:53:28 +02:00
380 lines
13 KiB
Python
380 lines
13 KiB
Python
import os
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from pydantic import BaseModel
|
|
|
|
from onyx.configs.constants import DocumentSource
|
|
from onyx.connectors.airtable.airtable_connector import AirtableConnector
|
|
from onyx.connectors.models import Document
|
|
from onyx.connectors.models import Section
|
|
|
|
BASE_VIEW_ID = "viwVUEJjWPd8XYjh8"
|
|
|
|
|
|
class AirtableConfig(BaseModel):
|
|
base_id: str
|
|
table_identifier: str
|
|
access_token: str
|
|
|
|
|
|
@pytest.fixture(params=[True, False])
|
|
def airtable_config(request: pytest.FixtureRequest) -> AirtableConfig:
|
|
table_identifier = (
|
|
os.environ["AIRTABLE_TEST_TABLE_NAME"]
|
|
if request.param
|
|
else os.environ["AIRTABLE_TEST_TABLE_ID"]
|
|
)
|
|
return AirtableConfig(
|
|
base_id=os.environ["AIRTABLE_TEST_BASE_ID"],
|
|
table_identifier=table_identifier,
|
|
access_token=os.environ["AIRTABLE_ACCESS_TOKEN"],
|
|
)
|
|
|
|
|
|
def create_test_document(
|
|
id: str,
|
|
title: str,
|
|
description: str,
|
|
priority: str,
|
|
status: str,
|
|
# Link to another record is skipped for now
|
|
# category: str,
|
|
ticket_id: str,
|
|
created_time: str,
|
|
status_last_changed: str,
|
|
submitted_by: str,
|
|
assignee: str,
|
|
days_since_status_change: int | None,
|
|
attachments: list[tuple[str, str]] | None = None,
|
|
all_fields_as_metadata: bool = False,
|
|
share_id: str | None = None,
|
|
view_id: str | None = None,
|
|
) -> Document:
|
|
base_id = os.environ.get("AIRTABLE_TEST_BASE_ID")
|
|
table_id = os.environ.get("AIRTABLE_TEST_TABLE_ID")
|
|
missing_vars = []
|
|
if not base_id:
|
|
missing_vars.append("AIRTABLE_TEST_BASE_ID")
|
|
if not table_id:
|
|
missing_vars.append("AIRTABLE_TEST_TABLE_ID")
|
|
|
|
if missing_vars:
|
|
raise RuntimeError(
|
|
f"Required environment variables not set: {', '.join(missing_vars)}. "
|
|
"These variables are required to run Airtable connector tests."
|
|
)
|
|
link_base = f"https://airtable.com/{base_id}"
|
|
if share_id:
|
|
link_base = f"{link_base}/{share_id}"
|
|
link_base = f"{link_base}/{table_id}"
|
|
if view_id:
|
|
link_base = f"{link_base}/{view_id}"
|
|
|
|
sections = []
|
|
|
|
if not all_fields_as_metadata:
|
|
sections.extend(
|
|
[
|
|
Section(
|
|
text=f"Title:\n------------------------\n{title}\n------------------------",
|
|
link=f"{link_base}/{id}",
|
|
),
|
|
Section(
|
|
text=f"Description:\n------------------------\n{description}\n------------------------",
|
|
link=f"{link_base}/{id}",
|
|
),
|
|
]
|
|
)
|
|
|
|
if attachments:
|
|
for attachment_text, attachment_link in attachments:
|
|
sections.append(
|
|
Section(
|
|
text=f"Attachment:\n------------------------\n{attachment_text}\n------------------------",
|
|
link=attachment_link,
|
|
),
|
|
)
|
|
|
|
metadata: dict[str, str | list[str]] = {
|
|
# "Category": category,
|
|
"Assignee": assignee,
|
|
"Submitted by": submitted_by,
|
|
"Priority": priority,
|
|
"Status": status,
|
|
"Created time": created_time,
|
|
"ID": ticket_id,
|
|
"Status last changed": status_last_changed,
|
|
**(
|
|
{"Days since status change": str(days_since_status_change)}
|
|
if days_since_status_change is not None
|
|
else {}
|
|
),
|
|
}
|
|
|
|
if all_fields_as_metadata:
|
|
metadata.update(
|
|
{
|
|
"Title": title,
|
|
"Description": description,
|
|
}
|
|
)
|
|
|
|
return Document(
|
|
id=f"airtable__{id}",
|
|
sections=sections,
|
|
source=DocumentSource.AIRTABLE,
|
|
semantic_identifier=f"{os.environ.get('AIRTABLE_TEST_TABLE_NAME', '')}: {title}",
|
|
metadata=metadata,
|
|
doc_updated_at=None,
|
|
primary_owners=None,
|
|
secondary_owners=None,
|
|
title=None,
|
|
from_ingestion_api=False,
|
|
additional_info=None,
|
|
)
|
|
|
|
|
|
def compare_documents(
|
|
actual_docs: list[Document], expected_docs: list[Document]
|
|
) -> None:
|
|
"""Utility function to compare actual and expected documents, ignoring order."""
|
|
actual_docs_dict = {doc.id: doc for doc in actual_docs}
|
|
expected_docs_dict = {doc.id: doc for doc in expected_docs}
|
|
|
|
assert actual_docs_dict.keys() == expected_docs_dict.keys(), "Document ID mismatch"
|
|
|
|
for doc_id in actual_docs_dict:
|
|
actual = actual_docs_dict[doc_id]
|
|
expected = expected_docs_dict[doc_id]
|
|
|
|
assert (
|
|
actual.source == expected.source
|
|
), f"Source mismatch for document {doc_id}"
|
|
assert (
|
|
actual.semantic_identifier == expected.semantic_identifier
|
|
), f"Semantic identifier mismatch for document {doc_id}"
|
|
assert (
|
|
actual.metadata == expected.metadata
|
|
), f"Metadata mismatch for document {doc_id}"
|
|
assert (
|
|
actual.doc_updated_at == expected.doc_updated_at
|
|
), f"Updated at mismatch for document {doc_id}"
|
|
assert (
|
|
actual.primary_owners == expected.primary_owners
|
|
), f"Primary owners mismatch for document {doc_id}"
|
|
assert (
|
|
actual.secondary_owners == expected.secondary_owners
|
|
), f"Secondary owners mismatch for document {doc_id}"
|
|
assert actual.title == expected.title, f"Title mismatch for document {doc_id}"
|
|
assert (
|
|
actual.from_ingestion_api == expected.from_ingestion_api
|
|
), f"Ingestion API flag mismatch for document {doc_id}"
|
|
assert (
|
|
actual.additional_info == expected.additional_info
|
|
), f"Additional info mismatch for document {doc_id}"
|
|
|
|
# Compare sections
|
|
assert len(actual.sections) == len(
|
|
expected.sections
|
|
), f"Number of sections mismatch for document {doc_id}"
|
|
for i, (actual_section, expected_section) in enumerate(
|
|
zip(actual.sections, expected.sections)
|
|
):
|
|
assert (
|
|
actual_section.text == expected_section.text
|
|
), f"Section {i} text mismatch for document {doc_id}"
|
|
assert (
|
|
actual_section.link == expected_section.link
|
|
), f"Section {i} link mismatch for document {doc_id}"
|
|
|
|
|
|
def test_airtable_connector_basic(
|
|
mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
|
|
) -> None:
|
|
"""Test behavior when all non-attachment fields are treated as metadata."""
|
|
connector = AirtableConnector(
|
|
base_id=airtable_config.base_id,
|
|
table_name_or_id=airtable_config.table_identifier,
|
|
treat_all_non_attachment_fields_as_metadata=False,
|
|
)
|
|
connector.load_credentials(
|
|
{
|
|
"airtable_access_token": airtable_config.access_token,
|
|
}
|
|
)
|
|
doc_batch_generator = connector.load_from_state()
|
|
doc_batch = next(doc_batch_generator)
|
|
with pytest.raises(StopIteration):
|
|
next(doc_batch_generator)
|
|
|
|
assert len(doc_batch) == 2
|
|
|
|
expected_docs = [
|
|
create_test_document(
|
|
id="rec8BnxDLyWeegOuO",
|
|
title="Slow Internet",
|
|
description="The internet connection is very slow.",
|
|
priority="Medium",
|
|
status="In Progress",
|
|
ticket_id="2",
|
|
created_time="2024-12-24T21:02:49.000Z",
|
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
|
days_since_status_change=0,
|
|
assignee="Chris Weaver (chris@onyx.app)",
|
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
|
all_fields_as_metadata=False,
|
|
view_id=BASE_VIEW_ID,
|
|
),
|
|
create_test_document(
|
|
id="reccSlIA4pZEFxPBg",
|
|
title="Printer Issue",
|
|
description="The office printer is not working.",
|
|
priority="High",
|
|
status="Open",
|
|
ticket_id="1",
|
|
created_time="2024-12-24T21:02:49.000Z",
|
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
|
days_since_status_change=0,
|
|
assignee="Chris Weaver (chris@onyx.app)",
|
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
|
attachments=[
|
|
(
|
|
"Test.pdf:\ntesting!!!",
|
|
"https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
|
|
)
|
|
],
|
|
all_fields_as_metadata=False,
|
|
view_id=BASE_VIEW_ID,
|
|
),
|
|
]
|
|
|
|
# Compare documents using the utility function
|
|
compare_documents(doc_batch, expected_docs)
|
|
|
|
|
|
def test_airtable_connector_all_metadata(
|
|
mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
|
|
) -> None:
|
|
connector = AirtableConnector(
|
|
base_id=airtable_config.base_id,
|
|
table_name_or_id=airtable_config.table_identifier,
|
|
treat_all_non_attachment_fields_as_metadata=True,
|
|
)
|
|
connector.load_credentials(
|
|
{
|
|
"airtable_access_token": airtable_config.access_token,
|
|
}
|
|
)
|
|
doc_batch_generator = connector.load_from_state()
|
|
doc_batch = next(doc_batch_generator)
|
|
with pytest.raises(StopIteration):
|
|
next(doc_batch_generator)
|
|
|
|
# NOTE: one of the rows has no attachments -> no content -> no document
|
|
assert len(doc_batch) == 1
|
|
|
|
expected_docs = [
|
|
create_test_document(
|
|
id="reccSlIA4pZEFxPBg",
|
|
title="Printer Issue",
|
|
description="The office printer is not working.",
|
|
priority="High",
|
|
status="Open",
|
|
# Link to another record is skipped for now
|
|
# category="Software Development",
|
|
ticket_id="1",
|
|
created_time="2024-12-24T21:02:49.000Z",
|
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
|
days_since_status_change=0,
|
|
assignee="Chris Weaver (chris@onyx.app)",
|
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
|
attachments=[
|
|
(
|
|
"Test.pdf:\ntesting!!!",
|
|
# hard code link for now
|
|
"https://airtable.com/appCXJqDFS4gea8tn/tblRxFQsTlBBZdRY1/viwVUEJjWPd8XYjh8/reccSlIA4pZEFxPBg/fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide",
|
|
)
|
|
],
|
|
all_fields_as_metadata=True,
|
|
view_id=BASE_VIEW_ID,
|
|
),
|
|
]
|
|
|
|
# Compare documents using the utility function
|
|
compare_documents(doc_batch, expected_docs)
|
|
|
|
|
|
def test_airtable_connector_with_share_and_view(
|
|
mock_get_unstructured_api_key: MagicMock, airtable_config: AirtableConfig
|
|
) -> None:
|
|
"""Test behavior when using share_id and view_id for URL generation."""
|
|
SHARE_ID = "shrkfjEzDmLaDtK83"
|
|
|
|
connector = AirtableConnector(
|
|
base_id=airtable_config.base_id,
|
|
table_name_or_id=airtable_config.table_identifier,
|
|
treat_all_non_attachment_fields_as_metadata=False,
|
|
share_id=SHARE_ID,
|
|
view_id=BASE_VIEW_ID,
|
|
)
|
|
connector.load_credentials(
|
|
{
|
|
"airtable_access_token": airtable_config.access_token,
|
|
}
|
|
)
|
|
doc_batch_generator = connector.load_from_state()
|
|
doc_batch = next(doc_batch_generator)
|
|
with pytest.raises(StopIteration):
|
|
next(doc_batch_generator)
|
|
|
|
assert len(doc_batch) == 2
|
|
|
|
expected_docs = [
|
|
create_test_document(
|
|
id="rec8BnxDLyWeegOuO",
|
|
title="Slow Internet",
|
|
description="The internet connection is very slow.",
|
|
priority="Medium",
|
|
status="In Progress",
|
|
ticket_id="2",
|
|
created_time="2024-12-24T21:02:49.000Z",
|
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
|
days_since_status_change=0,
|
|
assignee="Chris Weaver (chris@onyx.app)",
|
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
|
all_fields_as_metadata=False,
|
|
share_id=SHARE_ID,
|
|
view_id=BASE_VIEW_ID,
|
|
),
|
|
create_test_document(
|
|
id="reccSlIA4pZEFxPBg",
|
|
title="Printer Issue",
|
|
description="The office printer is not working.",
|
|
priority="High",
|
|
status="Open",
|
|
ticket_id="1",
|
|
created_time="2024-12-24T21:02:49.000Z",
|
|
status_last_changed="2024-12-24T21:02:49.000Z",
|
|
days_since_status_change=0,
|
|
assignee="Chris Weaver (chris@onyx.app)",
|
|
submitted_by="Chris Weaver (chris@onyx.app)",
|
|
attachments=[
|
|
(
|
|
"Test.pdf:\ntesting!!!",
|
|
(
|
|
f"https://airtable.com/{airtable_config.base_id}/{SHARE_ID}/"
|
|
f"{os.environ['AIRTABLE_TEST_TABLE_ID']}/{BASE_VIEW_ID}/reccSlIA4pZEFxPBg/"
|
|
"fld1u21zkJACIvAEF/attlj2UBWNEDZngCc?blocks=hide"
|
|
),
|
|
)
|
|
],
|
|
all_fields_as_metadata=False,
|
|
share_id=SHARE_ID,
|
|
view_id=BASE_VIEW_ID,
|
|
),
|
|
]
|
|
|
|
# Compare documents using the utility function
|
|
compare_documents(doc_batch, expected_docs)
|