Small notion tweaks (#4244)

* Small notion tweaks

* Add comment
This commit is contained in:
Chris Weaver 2025-03-10 08:51:12 -07:00 committed by GitHub
parent 965f9e98bf
commit 6ead9510a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 136 additions and 6 deletions

View File

@ -48,6 +48,8 @@ env:
# Gitbook
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
# Notion
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
jobs:
connectors-check:

View File

@ -1,4 +1,3 @@
import time
from collections.abc import Generator
from dataclasses import dataclass
from dataclasses import fields
@ -32,6 +31,7 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
_NOTION_PAGE_SIZE = 100
_NOTION_CALL_TIMEOUT = 30 # 30 seconds
@ -537,9 +537,9 @@ class NotionConnector(LoadConnector, PollConnector):
"""
filtered_pages: list[NotionPage] = []
for page in pages:
compare_time = time.mktime(
time.strptime(page[filter_field], "%Y-%m-%dT%H:%M:%S.000Z")
)
# Parse ISO 8601 timestamp and convert to UTC epoch time
timestamp = page[filter_field].replace(".000Z", "+00:00")
compare_time = datetime.fromisoformat(timestamp).timestamp()
if compare_time > start and compare_time <= end:
filtered_pages += [NotionPage(**page)]
return filtered_pages
@ -578,7 +578,7 @@ class NotionConnector(LoadConnector, PollConnector):
query_dict = {
"filter": {"property": "object", "value": "page"},
"page_size": self.batch_size,
"page_size": _NOTION_PAGE_SIZE,
}
while True:
db_res = self._search_notion(query_dict)
@ -604,7 +604,7 @@ class NotionConnector(LoadConnector, PollConnector):
return
query_dict = {
"page_size": self.batch_size,
"page_size": _NOTION_PAGE_SIZE,
"sort": {"timestamp": "last_edited_time", "direction": "descending"},
"filter": {"property": "object", "value": "page"},
}

View File

@ -0,0 +1,128 @@
import os
import time
import pytest
from onyx.configs.constants import DocumentSource
from onyx.connectors.notion.connector import NotionConnector
@pytest.fixture
def notion_connector() -> NotionConnector:
"""Create a NotionConnector with credentials from environment variables"""
connector = NotionConnector()
connector.load_credentials(
{
"notion_integration_token": os.environ["NOTION_INTEGRATION_TOKEN"],
}
)
return connector
def test_notion_connector_basic(notion_connector: NotionConnector) -> None:
"""Test the NotionConnector with a real Notion page.
Uses a Notion workspace under the onyx-test.com domain.
"""
doc_batch_generator = notion_connector.poll_source(0, time.time())
# Get first batch of documents
doc_batch = next(doc_batch_generator)
assert (
len(doc_batch) == 5
), "Expected exactly 5 documents (root, two children, table entry, and table entry child)"
# Find root and child documents by semantic identifier
root_doc = None
child1_doc = None
child2_doc = None
table_entry_doc = None
table_entry_child_doc = None
for doc in doc_batch:
if doc.semantic_identifier == "Root":
root_doc = doc
elif doc.semantic_identifier == "Child1":
child1_doc = doc
elif doc.semantic_identifier == "Child2":
child2_doc = doc
elif doc.semantic_identifier == "table-entry01":
table_entry_doc = doc
elif doc.semantic_identifier == "Child-table-entry01":
table_entry_child_doc = doc
assert root_doc is not None, "Root document not found"
assert child1_doc is not None, "Child1 document not found"
assert child2_doc is not None, "Child2 document not found"
assert table_entry_doc is not None, "Table entry document not found"
assert table_entry_child_doc is not None, "Table entry child document not found"
# Verify root document structure
assert root_doc.id is not None
assert root_doc.source == DocumentSource.NOTION
# Section checks for root
assert len(root_doc.sections) == 1
root_section = root_doc.sections[0]
# Content specific checks for root
assert root_section.text == "\nroot"
assert root_section.link is not None
assert root_section.link.startswith("https://www.notion.so/")
# Verify child1 document structure
assert child1_doc.id is not None
assert child1_doc.source == DocumentSource.NOTION
# Section checks for child1
assert len(child1_doc.sections) == 1
child1_section = child1_doc.sections[0]
# Content specific checks for child1
assert child1_section.text == "\nchild1"
assert child1_section.link is not None
assert child1_section.link.startswith("https://www.notion.so/")
# Verify child2 document structure (includes database)
assert child2_doc.id is not None
assert child2_doc.source == DocumentSource.NOTION
# Section checks for child2
assert len(child2_doc.sections) == 2 # One for content, one for database
child2_section = child2_doc.sections[0]
child2_db_section = child2_doc.sections[1]
# Content specific checks for child2
assert child2_section.text == "\nchild2"
assert child2_section.link is not None
assert child2_section.link.startswith("https://www.notion.so/")
# Database section checks for child2
assert child2_db_section.text.strip() != "" # Should contain some database content
assert child2_db_section.link is not None
assert child2_db_section.link.startswith("https://www.notion.so/")
# Verify table entry document structure
assert table_entry_doc.id is not None
assert table_entry_doc.source == DocumentSource.NOTION
# Section checks for table entry
assert len(table_entry_doc.sections) == 1
table_entry_section = table_entry_doc.sections[0]
# Content specific checks for table entry
assert table_entry_section.text == "\ntable-entry01"
assert table_entry_section.link is not None
assert table_entry_section.link.startswith("https://www.notion.so/")
# Verify table entry child document structure
assert table_entry_child_doc.id is not None
assert table_entry_child_doc.source == DocumentSource.NOTION
# Section checks for table entry child
assert len(table_entry_child_doc.sections) == 1
table_entry_child_section = table_entry_child_doc.sections[0]
# Content specific checks for table entry child
assert table_entry_child_section.text == "\nchild-table-entry01"
assert table_entry_child_section.link is not None
assert table_entry_child_section.link.startswith("https://www.notion.so/")