diff --git a/backend/onyx/connectors/gitbook/connector.py b/backend/onyx/connectors/gitbook/connector.py index 68ee812cc8ac..fb76427e8dad 100644 --- a/backend/onyx/connectors/gitbook/connector.py +++ b/backend/onyx/connectors/gitbook/connector.py @@ -229,16 +229,20 @@ class GitbookConnector(LoadConnector, PollConnector): try: content = self.client.get(f"/spaces/{self.space_id}/content") - pages = content.get("pages", []) - + pages: list[dict[str, Any]] = content.get("pages", []) current_batch: list[Document] = [] - for page in pages: - updated_at = datetime.fromisoformat(page["updatedAt"]) + while pages: + page = pages.pop(0) + + updated_at_raw = page.get("updatedAt") + if updated_at_raw is None: + # if updatedAt is not present, that means the page has never been edited + continue + + updated_at = datetime.fromisoformat(updated_at_raw) if start and updated_at < start: - if current_batch: - yield current_batch - return + continue if end and updated_at > end: continue @@ -250,6 +254,8 @@ class GitbookConnector(LoadConnector, PollConnector): yield current_batch current_batch = [] + pages.extend(page.get("pages", [])) + if current_batch: yield current_batch