SMall gitbook tweaks

This commit is contained in:
Weves 2025-03-09 14:08:00 -07:00 committed by Chris Weaver
parent 06dcc28d05
commit 7f7621d7c0
2 changed files with 35 additions and 12 deletions

View File

@ -228,10 +228,15 @@ class GitbookConnector(LoadConnector, PollConnector):
raise ConnectorMissingCredentialError("GitBook")
try:
content = self.client.get(f"/spaces/{self.space_id}/content")
content = self.client.get(f"/spaces/{self.space_id}/content/pages")
pages: list[dict[str, Any]] = content.get("pages", [])
current_batch: list[Document] = []
logger.info(f"Found {len(pages)} root pages.")
logger.info(
f"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}"
)
while pages:
page = pages.pop(0)

View File

@ -20,29 +20,32 @@ def gitbook_connector() -> GitbookConnector:
return connector
NUM_PAGES = 3
def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
doc_batch_generator = gitbook_connector.load_from_state()
# Get first batch of documents
doc_batch = next(doc_batch_generator)
assert len(doc_batch) > 0
assert len(doc_batch) == NUM_PAGES
# Verify first document structure
doc = doc_batch[0]
main_doc = doc_batch[0]
# Basic document properties
assert doc.id.startswith("gitbook-")
assert doc.semantic_identifier == "Acme Corp Internal Handbook"
assert doc.source == DocumentSource.GITBOOK
assert main_doc.id.startswith("gitbook-")
assert main_doc.semantic_identifier == "Acme Corp Internal Handbook"
assert main_doc.source == DocumentSource.GITBOOK
# Metadata checks
assert "path" in doc.metadata
assert "type" in doc.metadata
assert "kind" in doc.metadata
assert "path" in main_doc.metadata
assert "type" in main_doc.metadata
assert "kind" in main_doc.metadata
# Section checks
assert len(doc.sections) == 1
section = doc.sections[0]
assert len(main_doc.sections) == 1
section = main_doc.sections[0]
# Content specific checks
content = section.text
@ -74,8 +77,23 @@ def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
assert section.link # Should have a URL
nested1 = doc_batch[1]
assert nested1.id.startswith("gitbook-")
assert nested1.semantic_identifier == "Nested1"
assert len(nested1.sections) == 1
# extra newlines at the end, remove them to make test easier
assert nested1.sections[0].text.strip() == "nested1"
assert nested1.source == DocumentSource.GITBOOK
nested2 = doc_batch[2]
assert nested2.id.startswith("gitbook-")
assert nested2.semantic_identifier == "Nested2"
assert len(nested2.sections) == 1
assert nested2.sections[0].text.strip() == "nested2"
assert nested2.source == DocumentSource.GITBOOK
# Time-based polling test
current_time = time.time()
poll_docs = gitbook_connector.poll_source(0, current_time)
poll_batch = next(poll_docs)
assert len(poll_batch) > 0
assert len(poll_batch) == NUM_PAGES