mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
parent
965f9e98bf
commit
6ead9510a4
@ -48,6 +48,8 @@ env:
|
||||
# Gitbook
|
||||
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
|
||||
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
|
||||
# Notion
|
||||
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
|
||||
|
||||
jobs:
|
||||
connectors-check:
|
||||
|
@ -1,4 +1,3 @@
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import fields
|
||||
@ -32,6 +31,7 @@ from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_NOTION_PAGE_SIZE = 100
|
||||
_NOTION_CALL_TIMEOUT = 30 # 30 seconds
|
||||
|
||||
|
||||
@ -537,9 +537,9 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
"""
|
||||
filtered_pages: list[NotionPage] = []
|
||||
for page in pages:
|
||||
compare_time = time.mktime(
|
||||
time.strptime(page[filter_field], "%Y-%m-%dT%H:%M:%S.000Z")
|
||||
)
|
||||
# Parse ISO 8601 timestamp and convert to UTC epoch time
|
||||
timestamp = page[filter_field].replace(".000Z", "+00:00")
|
||||
compare_time = datetime.fromisoformat(timestamp).timestamp()
|
||||
if compare_time > start and compare_time <= end:
|
||||
filtered_pages += [NotionPage(**page)]
|
||||
return filtered_pages
|
||||
@ -578,7 +578,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
query_dict = {
|
||||
"filter": {"property": "object", "value": "page"},
|
||||
"page_size": self.batch_size,
|
||||
"page_size": _NOTION_PAGE_SIZE,
|
||||
}
|
||||
while True:
|
||||
db_res = self._search_notion(query_dict)
|
||||
@ -604,7 +604,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
return
|
||||
|
||||
query_dict = {
|
||||
"page_size": self.batch_size,
|
||||
"page_size": _NOTION_PAGE_SIZE,
|
||||
"sort": {"timestamp": "last_edited_time", "direction": "descending"},
|
||||
"filter": {"property": "object", "value": "page"},
|
||||
}
|
||||
|
128
backend/tests/daily/connectors/notion/test_notion_connector.py
Normal file
128
backend/tests/daily/connectors/notion/test_notion_connector.py
Normal file
@ -0,0 +1,128 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.notion.connector import NotionConnector
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def notion_connector() -> NotionConnector:
|
||||
"""Create a NotionConnector with credentials from environment variables"""
|
||||
connector = NotionConnector()
|
||||
connector.load_credentials(
|
||||
{
|
||||
"notion_integration_token": os.environ["NOTION_INTEGRATION_TOKEN"],
|
||||
}
|
||||
)
|
||||
return connector
|
||||
|
||||
|
||||
def test_notion_connector_basic(notion_connector: NotionConnector) -> None:
|
||||
"""Test the NotionConnector with a real Notion page.
|
||||
|
||||
Uses a Notion workspace under the onyx-test.com domain.
|
||||
"""
|
||||
doc_batch_generator = notion_connector.poll_source(0, time.time())
|
||||
|
||||
# Get first batch of documents
|
||||
doc_batch = next(doc_batch_generator)
|
||||
assert (
|
||||
len(doc_batch) == 5
|
||||
), "Expected exactly 5 documents (root, two children, table entry, and table entry child)"
|
||||
|
||||
# Find root and child documents by semantic identifier
|
||||
root_doc = None
|
||||
child1_doc = None
|
||||
child2_doc = None
|
||||
table_entry_doc = None
|
||||
table_entry_child_doc = None
|
||||
for doc in doc_batch:
|
||||
if doc.semantic_identifier == "Root":
|
||||
root_doc = doc
|
||||
elif doc.semantic_identifier == "Child1":
|
||||
child1_doc = doc
|
||||
elif doc.semantic_identifier == "Child2":
|
||||
child2_doc = doc
|
||||
elif doc.semantic_identifier == "table-entry01":
|
||||
table_entry_doc = doc
|
||||
elif doc.semantic_identifier == "Child-table-entry01":
|
||||
table_entry_child_doc = doc
|
||||
|
||||
assert root_doc is not None, "Root document not found"
|
||||
assert child1_doc is not None, "Child1 document not found"
|
||||
assert child2_doc is not None, "Child2 document not found"
|
||||
assert table_entry_doc is not None, "Table entry document not found"
|
||||
assert table_entry_child_doc is not None, "Table entry child document not found"
|
||||
|
||||
# Verify root document structure
|
||||
assert root_doc.id is not None
|
||||
assert root_doc.source == DocumentSource.NOTION
|
||||
|
||||
# Section checks for root
|
||||
assert len(root_doc.sections) == 1
|
||||
root_section = root_doc.sections[0]
|
||||
|
||||
# Content specific checks for root
|
||||
assert root_section.text == "\nroot"
|
||||
assert root_section.link is not None
|
||||
assert root_section.link.startswith("https://www.notion.so/")
|
||||
|
||||
# Verify child1 document structure
|
||||
assert child1_doc.id is not None
|
||||
assert child1_doc.source == DocumentSource.NOTION
|
||||
|
||||
# Section checks for child1
|
||||
assert len(child1_doc.sections) == 1
|
||||
child1_section = child1_doc.sections[0]
|
||||
|
||||
# Content specific checks for child1
|
||||
assert child1_section.text == "\nchild1"
|
||||
assert child1_section.link is not None
|
||||
assert child1_section.link.startswith("https://www.notion.so/")
|
||||
|
||||
# Verify child2 document structure (includes database)
|
||||
assert child2_doc.id is not None
|
||||
assert child2_doc.source == DocumentSource.NOTION
|
||||
|
||||
# Section checks for child2
|
||||
assert len(child2_doc.sections) == 2 # One for content, one for database
|
||||
child2_section = child2_doc.sections[0]
|
||||
child2_db_section = child2_doc.sections[1]
|
||||
|
||||
# Content specific checks for child2
|
||||
assert child2_section.text == "\nchild2"
|
||||
assert child2_section.link is not None
|
||||
assert child2_section.link.startswith("https://www.notion.so/")
|
||||
|
||||
# Database section checks for child2
|
||||
assert child2_db_section.text.strip() != "" # Should contain some database content
|
||||
assert child2_db_section.link is not None
|
||||
assert child2_db_section.link.startswith("https://www.notion.so/")
|
||||
|
||||
# Verify table entry document structure
|
||||
assert table_entry_doc.id is not None
|
||||
assert table_entry_doc.source == DocumentSource.NOTION
|
||||
|
||||
# Section checks for table entry
|
||||
assert len(table_entry_doc.sections) == 1
|
||||
table_entry_section = table_entry_doc.sections[0]
|
||||
|
||||
# Content specific checks for table entry
|
||||
assert table_entry_section.text == "\ntable-entry01"
|
||||
assert table_entry_section.link is not None
|
||||
assert table_entry_section.link.startswith("https://www.notion.so/")
|
||||
|
||||
# Verify table entry child document structure
|
||||
assert table_entry_child_doc.id is not None
|
||||
assert table_entry_child_doc.source == DocumentSource.NOTION
|
||||
|
||||
# Section checks for table entry child
|
||||
assert len(table_entry_child_doc.sections) == 1
|
||||
table_entry_child_section = table_entry_child_doc.sections[0]
|
||||
|
||||
# Content specific checks for table entry child
|
||||
assert table_entry_child_section.text == "\nchild-table-entry01"
|
||||
assert table_entry_child_section.link is not None
|
||||
assert table_entry_child_section.link.startswith("https://www.notion.so/")
|
Loading…
x
Reference in New Issue
Block a user