mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-20 21:27:03 +02:00
Fix Notion recursive
This commit is contained in:
@@ -90,9 +90,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
# NOTE: this also removes all benefits polling, since we need to traverse
|
# NOTE: this also removes all benefits polling, since we need to traverse
|
||||||
# all pages regardless of if they are updated. If the notion workspace is
|
# all pages regardless of if they are updated. If the notion workspace is
|
||||||
# very large, this may not be practical.
|
# very large, this may not be practical.
|
||||||
self.recursive_index_enabled = (
|
self.recursive_index_enabled = recursive_index_enabled or self.root_page_id
|
||||||
recursive_index_enabled or self.root_page_id is not None
|
|
||||||
)
|
|
||||||
|
|
||||||
@retry(tries=3, delay=1, backoff=2)
|
@retry(tries=3, delay=1, backoff=2)
|
||||||
def _fetch_blocks(self, block_id: str, cursor: str | None = None) -> dict[str, Any]:
|
def _fetch_blocks(self, block_id: str, cursor: str | None = None) -> dict[str, Any]:
|
||||||
@@ -289,12 +287,15 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
if self.recursive_index_enabled and all_child_page_ids:
|
if self.recursive_index_enabled and all_child_page_ids:
|
||||||
# NOTE: checking if page_id is in self.indexed_pages to prevent extra
|
# NOTE: checking if page_id is in self.indexed_pages to prevent extra
|
||||||
# calls to `_fetch_page` for pages we've already indexed
|
# calls to `_fetch_page` for pages we've already indexed
|
||||||
all_child_pages = [
|
for child_page_batch_ids in batch_generator(
|
||||||
self._fetch_page(page_id)
|
all_child_page_ids, batch_size=INDEX_BATCH_SIZE
|
||||||
for page_id in all_child_page_ids
|
):
|
||||||
if page_id not in self.indexed_pages
|
child_page_batch = [
|
||||||
]
|
self._fetch_page(page_id)
|
||||||
yield from self._read_pages(all_child_pages)
|
for page_id in child_page_batch_ids
|
||||||
|
if page_id not in self.indexed_pages
|
||||||
|
]
|
||||||
|
yield from self._read_pages(child_page_batch)
|
||||||
|
|
||||||
@retry(tries=3, delay=1, backoff=2)
|
@retry(tries=3, delay=1, backoff=2)
|
||||||
def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:
|
def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:
|
||||||
|
Reference in New Issue
Block a user