mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-06 18:14:35 +02:00
Add some more docstrings
This commit is contained in:
@@ -17,6 +17,7 @@ from danswer.connectors.models import Section
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class NotionPage:
|
class NotionPage:
|
||||||
|
"""Represents a Notion Page object"""
|
||||||
id: str
|
id: str
|
||||||
created_time: str
|
created_time: str
|
||||||
last_edited_time: str
|
last_edited_time: str
|
||||||
@@ -33,6 +34,7 @@ class NotionPage:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class NotionSearchResponse:
|
class NotionSearchResponse:
|
||||||
|
"""Represents the response from the Notion Search API"""
|
||||||
results: List[Dict[str, Any]]
|
results: List[Dict[str, Any]]
|
||||||
next_cursor: Optional[str]
|
next_cursor: Optional[str]
|
||||||
has_more: bool = False
|
has_more: bool = False
|
||||||
@@ -62,13 +64,14 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
|
"""Applies integration token to headers"""
|
||||||
self.headers[
|
self.headers[
|
||||||
"Authorization"
|
"Authorization"
|
||||||
] = f'Bearer {credentials["notion_integration_token"]}'
|
] = f'Bearer {credentials["notion_integration_token"]}'
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _read_blocks(self, block_id: str, num_tabs: int = 0) -> str:
|
def _read_blocks(self, block_id: str, num_tabs: int = 0) -> str:
|
||||||
"""Read a block."""
|
"""Reads blocks for a page"""
|
||||||
done = False
|
done = False
|
||||||
result_lines_arr = []
|
result_lines_arr = []
|
||||||
cur_block_id = block_id
|
cur_block_id = block_id
|
||||||
@@ -115,7 +118,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
return result_lines
|
return result_lines
|
||||||
|
|
||||||
def _read_pages(self, pages: List[NotionPage]) -> List[Document]:
|
def _read_pages(self, pages: List[NotionPage]) -> List[Document]:
|
||||||
"""Read a page."""
|
"""Reads pages for rich text content and generates Documents"""
|
||||||
docs_batch = []
|
docs_batch = []
|
||||||
for page in pages:
|
for page in pages:
|
||||||
page_text = self._read_blocks(page.id)
|
page_text = self._read_blocks(page.id)
|
||||||
@@ -138,7 +141,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
return docs_batch
|
return docs_batch
|
||||||
|
|
||||||
def _search_notion(self, query_dict: Dict[str, Any]) -> NotionSearchResponse:
|
def _search_notion(self, query_dict: Dict[str, Any]) -> NotionSearchResponse:
|
||||||
"""Get all the pages from a Notion database."""
|
"""Search for pages from a Notion database."""
|
||||||
res = requests.post(
|
res = requests.post(
|
||||||
"https://api.notion.com/v1/search",
|
"https://api.notion.com/v1/search",
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
@@ -148,15 +151,10 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
return NotionSearchResponse(**res.json())
|
return NotionSearchResponse(**res.json())
|
||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
"""Load data from the input directory.
|
"""Loads all page data from a Notion workspace.
|
||||||
|
|
||||||
Args:
|
|
||||||
page_ids (List[str]): List of page ids to load.
|
|
||||||
database_id (str): Database_id from which to load page ids.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Document]: List of documents.
|
List[Document]: List of documents.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
query_dict = {
|
query_dict = {
|
||||||
"filter": {"property": "object", "value": "page"},
|
"filter": {"property": "object", "value": "page"},
|
||||||
@@ -178,6 +176,16 @@ class NotionConnector(LoadConnector, PollConnector):
|
|||||||
end: SecondsSinceUnixEpoch,
|
end: SecondsSinceUnixEpoch,
|
||||||
filter_field: str = "last_edited_time",
|
filter_field: str = "last_edited_time",
|
||||||
) -> List[NotionPage]:
|
) -> List[NotionPage]:
|
||||||
|
"""A helper function to filter out pages outside of a time
|
||||||
|
range. This functionality doesn't yet exist in the Notion Search API,
|
||||||
|
but when it does, this approach can be deprecated.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pages (List[Dict]) - Pages to filter
|
||||||
|
start (float) - start epoch time to filter from
|
||||||
|
end (float) - end epoch time to filter to
|
||||||
|
filter_field (str) - the attribute on the page to apply the filter
|
||||||
|
"""
|
||||||
filtered_pages = []
|
filtered_pages = []
|
||||||
for page in pages:
|
for page in pages:
|
||||||
compare_time = time.mktime(
|
compare_time = time.mktime(
|
||||||
|
Reference in New Issue
Block a user