mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-27 00:11:01 +02:00
Enable database reading in recursive notion crawl
This commit is contained in:
parent
174f54473e
commit
68b23b6339
@ -117,6 +117,50 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
raise e
|
raise e
|
||||||
return NotionPage(**res.json())
|
return NotionPage(**res.json())
|
||||||
|
|
||||||
|
@retry(tries=3, delay=1, backoff=2)
|
||||||
|
def _fetch_database(
|
||||||
|
self, database_id: str, cursor: str | None = None
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Fetch a database from it's ID via the Notion API."""
|
||||||
|
logger.debug(f"Fetching database for ID '{database_id}'")
|
||||||
|
block_url = f"https://api.notion.com/v1/databases/{database_id}/query"
|
||||||
|
body = None if not cursor else {"start_cursor": cursor}
|
||||||
|
res = requests.post(block_url, headers=self.headers, json=body)
|
||||||
|
try:
|
||||||
|
res.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Error fetching database - {res.json()}")
|
||||||
|
raise e
|
||||||
|
return res.json()
|
||||||
|
|
||||||
|
def _read_pages_from_database(self, database_id: str) -> list[str]:
|
||||||
|
"""Returns a list of all page IDs in the database"""
|
||||||
|
result_pages: list[str] = []
|
||||||
|
cursor = None
|
||||||
|
while True:
|
||||||
|
data = self._fetch_database(database_id, cursor)
|
||||||
|
|
||||||
|
for result in data["results"]:
|
||||||
|
obj_id = result["id"]
|
||||||
|
obj_type = result["object"]
|
||||||
|
if obj_type == "page":
|
||||||
|
logger.debug(
|
||||||
|
f"Found page with ID '{obj_id}' in database '{database_id}'"
|
||||||
|
)
|
||||||
|
result_pages.append(result["id"])
|
||||||
|
elif obj_type == "database":
|
||||||
|
logger.debug(
|
||||||
|
f"Found database with ID '{obj_id}' in database '{database_id}'"
|
||||||
|
)
|
||||||
|
result_pages.extend(self._read_pages_from_database(obj_id))
|
||||||
|
|
||||||
|
if data["next_cursor"] is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
cursor = data["next_cursor"]
|
||||||
|
|
||||||
|
return result_pages
|
||||||
|
|
||||||
def _read_blocks(
|
def _read_blocks(
|
||||||
self, page_block_id: str
|
self, page_block_id: str
|
||||||
) -> tuple[list[tuple[str, str]], list[str]]:
|
) -> tuple[list[tuple[str, str]], list[str]]:
|
||||||
@ -145,12 +189,17 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
if result_type == "child_page":
|
if result_type == "child_page":
|
||||||
child_pages.append(result_block_id)
|
child_pages.append(result_block_id)
|
||||||
else:
|
else:
|
||||||
|
logger.debug(f"Entering sub-block: {result_block_id}")
|
||||||
subblock_result_lines, subblock_child_pages = self._read_blocks(
|
subblock_result_lines, subblock_child_pages = self._read_blocks(
|
||||||
result_block_id
|
result_block_id
|
||||||
)
|
)
|
||||||
|
logger.debug(f"Finished sub-block: {result_block_id}")
|
||||||
result_lines.extend(subblock_result_lines)
|
result_lines.extend(subblock_result_lines)
|
||||||
child_pages.extend(subblock_child_pages)
|
child_pages.extend(subblock_child_pages)
|
||||||
|
|
||||||
|
if result_type == "child_database" and self.recursive_index_enabled:
|
||||||
|
child_pages.extend(self._read_pages_from_database(result_block_id))
|
||||||
|
|
||||||
cur_result_text = "\n".join(cur_result_text_arr)
|
cur_result_text = "\n".join(cur_result_text_arr)
|
||||||
if cur_result_text:
|
if cur_result_text:
|
||||||
result_lines.append((cur_result_text, result_block_id))
|
result_lines.append((cur_result_text, result_block_id))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user