mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
parent
965f9e98bf
commit
6ead9510a4
@ -48,6 +48,8 @@ env:
|
|||||||
# Gitbook
|
# Gitbook
|
||||||
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
|
GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
|
||||||
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
|
GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
|
||||||
|
# Notion
|
||||||
|
NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
connectors-check:
|
connectors-check:
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import time
|
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
@ -32,6 +31,7 @@ from onyx.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
_NOTION_PAGE_SIZE = 100
|
||||||
_NOTION_CALL_TIMEOUT = 30 # 30 seconds
|
_NOTION_CALL_TIMEOUT = 30 # 30 seconds
|
||||||
|
|
||||||
|
|
||||||
@ -537,9 +537,9 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
"""
|
"""
|
||||||
filtered_pages: list[NotionPage] = []
|
filtered_pages: list[NotionPage] = []
|
||||||
for page in pages:
|
for page in pages:
|
||||||
compare_time = time.mktime(
|
# Parse ISO 8601 timestamp and convert to UTC epoch time
|
||||||
time.strptime(page[filter_field], "%Y-%m-%dT%H:%M:%S.000Z")
|
timestamp = page[filter_field].replace(".000Z", "+00:00")
|
||||||
)
|
compare_time = datetime.fromisoformat(timestamp).timestamp()
|
||||||
if compare_time > start and compare_time <= end:
|
if compare_time > start and compare_time <= end:
|
||||||
filtered_pages += [NotionPage(**page)]
|
filtered_pages += [NotionPage(**page)]
|
||||||
return filtered_pages
|
return filtered_pages
|
||||||
@ -578,7 +578,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
query_dict = {
|
query_dict = {
|
||||||
"filter": {"property": "object", "value": "page"},
|
"filter": {"property": "object", "value": "page"},
|
||||||
"page_size": self.batch_size,
|
"page_size": _NOTION_PAGE_SIZE,
|
||||||
}
|
}
|
||||||
while True:
|
while True:
|
||||||
db_res = self._search_notion(query_dict)
|
db_res = self._search_notion(query_dict)
|
||||||
@ -604,7 +604,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
return
|
return
|
||||||
|
|
||||||
query_dict = {
|
query_dict = {
|
||||||
"page_size": self.batch_size,
|
"page_size": _NOTION_PAGE_SIZE,
|
||||||
"sort": {"timestamp": "last_edited_time", "direction": "descending"},
|
"sort": {"timestamp": "last_edited_time", "direction": "descending"},
|
||||||
"filter": {"property": "object", "value": "page"},
|
"filter": {"property": "object", "value": "page"},
|
||||||
}
|
}
|
||||||
|
128
backend/tests/daily/connectors/notion/test_notion_connector.py
Normal file
128
backend/tests/daily/connectors/notion/test_notion_connector.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from onyx.configs.constants import DocumentSource
|
||||||
|
from onyx.connectors.notion.connector import NotionConnector
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def notion_connector() -> NotionConnector:
|
||||||
|
"""Create a NotionConnector with credentials from environment variables"""
|
||||||
|
connector = NotionConnector()
|
||||||
|
connector.load_credentials(
|
||||||
|
{
|
||||||
|
"notion_integration_token": os.environ["NOTION_INTEGRATION_TOKEN"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return connector
|
||||||
|
|
||||||
|
|
||||||
|
def test_notion_connector_basic(notion_connector: NotionConnector) -> None:
|
||||||
|
"""Test the NotionConnector with a real Notion page.
|
||||||
|
|
||||||
|
Uses a Notion workspace under the onyx-test.com domain.
|
||||||
|
"""
|
||||||
|
doc_batch_generator = notion_connector.poll_source(0, time.time())
|
||||||
|
|
||||||
|
# Get first batch of documents
|
||||||
|
doc_batch = next(doc_batch_generator)
|
||||||
|
assert (
|
||||||
|
len(doc_batch) == 5
|
||||||
|
), "Expected exactly 5 documents (root, two children, table entry, and table entry child)"
|
||||||
|
|
||||||
|
# Find root and child documents by semantic identifier
|
||||||
|
root_doc = None
|
||||||
|
child1_doc = None
|
||||||
|
child2_doc = None
|
||||||
|
table_entry_doc = None
|
||||||
|
table_entry_child_doc = None
|
||||||
|
for doc in doc_batch:
|
||||||
|
if doc.semantic_identifier == "Root":
|
||||||
|
root_doc = doc
|
||||||
|
elif doc.semantic_identifier == "Child1":
|
||||||
|
child1_doc = doc
|
||||||
|
elif doc.semantic_identifier == "Child2":
|
||||||
|
child2_doc = doc
|
||||||
|
elif doc.semantic_identifier == "table-entry01":
|
||||||
|
table_entry_doc = doc
|
||||||
|
elif doc.semantic_identifier == "Child-table-entry01":
|
||||||
|
table_entry_child_doc = doc
|
||||||
|
|
||||||
|
assert root_doc is not None, "Root document not found"
|
||||||
|
assert child1_doc is not None, "Child1 document not found"
|
||||||
|
assert child2_doc is not None, "Child2 document not found"
|
||||||
|
assert table_entry_doc is not None, "Table entry document not found"
|
||||||
|
assert table_entry_child_doc is not None, "Table entry child document not found"
|
||||||
|
|
||||||
|
# Verify root document structure
|
||||||
|
assert root_doc.id is not None
|
||||||
|
assert root_doc.source == DocumentSource.NOTION
|
||||||
|
|
||||||
|
# Section checks for root
|
||||||
|
assert len(root_doc.sections) == 1
|
||||||
|
root_section = root_doc.sections[0]
|
||||||
|
|
||||||
|
# Content specific checks for root
|
||||||
|
assert root_section.text == "\nroot"
|
||||||
|
assert root_section.link is not None
|
||||||
|
assert root_section.link.startswith("https://www.notion.so/")
|
||||||
|
|
||||||
|
# Verify child1 document structure
|
||||||
|
assert child1_doc.id is not None
|
||||||
|
assert child1_doc.source == DocumentSource.NOTION
|
||||||
|
|
||||||
|
# Section checks for child1
|
||||||
|
assert len(child1_doc.sections) == 1
|
||||||
|
child1_section = child1_doc.sections[0]
|
||||||
|
|
||||||
|
# Content specific checks for child1
|
||||||
|
assert child1_section.text == "\nchild1"
|
||||||
|
assert child1_section.link is not None
|
||||||
|
assert child1_section.link.startswith("https://www.notion.so/")
|
||||||
|
|
||||||
|
# Verify child2 document structure (includes database)
|
||||||
|
assert child2_doc.id is not None
|
||||||
|
assert child2_doc.source == DocumentSource.NOTION
|
||||||
|
|
||||||
|
# Section checks for child2
|
||||||
|
assert len(child2_doc.sections) == 2 # One for content, one for database
|
||||||
|
child2_section = child2_doc.sections[0]
|
||||||
|
child2_db_section = child2_doc.sections[1]
|
||||||
|
|
||||||
|
# Content specific checks for child2
|
||||||
|
assert child2_section.text == "\nchild2"
|
||||||
|
assert child2_section.link is not None
|
||||||
|
assert child2_section.link.startswith("https://www.notion.so/")
|
||||||
|
|
||||||
|
# Database section checks for child2
|
||||||
|
assert child2_db_section.text.strip() != "" # Should contain some database content
|
||||||
|
assert child2_db_section.link is not None
|
||||||
|
assert child2_db_section.link.startswith("https://www.notion.so/")
|
||||||
|
|
||||||
|
# Verify table entry document structure
|
||||||
|
assert table_entry_doc.id is not None
|
||||||
|
assert table_entry_doc.source == DocumentSource.NOTION
|
||||||
|
|
||||||
|
# Section checks for table entry
|
||||||
|
assert len(table_entry_doc.sections) == 1
|
||||||
|
table_entry_section = table_entry_doc.sections[0]
|
||||||
|
|
||||||
|
# Content specific checks for table entry
|
||||||
|
assert table_entry_section.text == "\ntable-entry01"
|
||||||
|
assert table_entry_section.link is not None
|
||||||
|
assert table_entry_section.link.startswith("https://www.notion.so/")
|
||||||
|
|
||||||
|
# Verify table entry child document structure
|
||||||
|
assert table_entry_child_doc.id is not None
|
||||||
|
assert table_entry_child_doc.source == DocumentSource.NOTION
|
||||||
|
|
||||||
|
# Section checks for table entry child
|
||||||
|
assert len(table_entry_child_doc.sections) == 1
|
||||||
|
table_entry_child_section = table_entry_child_doc.sections[0]
|
||||||
|
|
||||||
|
# Content specific checks for table entry child
|
||||||
|
assert table_entry_child_section.text == "\nchild-table-entry01"
|
||||||
|
assert table_entry_child_section.link is not None
|
||||||
|
assert table_entry_child_section.link.startswith("https://www.notion.so/")
|
Loading…
x
Reference in New Issue
Block a user