mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
SMall gitbook tweaks
This commit is contained in:
parent
06dcc28d05
commit
7f7621d7c0
@ -228,10 +228,15 @@ class GitbookConnector(LoadConnector, PollConnector):
|
||||
raise ConnectorMissingCredentialError("GitBook")
|
||||
|
||||
try:
|
||||
content = self.client.get(f"/spaces/{self.space_id}/content")
|
||||
content = self.client.get(f"/spaces/{self.space_id}/content/pages")
|
||||
pages: list[dict[str, Any]] = content.get("pages", [])
|
||||
current_batch: list[Document] = []
|
||||
|
||||
logger.info(f"Found {len(pages)} root pages.")
|
||||
logger.info(
|
||||
f"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}"
|
||||
)
|
||||
|
||||
while pages:
|
||||
page = pages.pop(0)
|
||||
|
||||
|
@ -20,29 +20,32 @@ def gitbook_connector() -> GitbookConnector:
|
||||
return connector
|
||||
|
||||
|
||||
NUM_PAGES = 3
|
||||
|
||||
|
||||
def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
||||
doc_batch_generator = gitbook_connector.load_from_state()
|
||||
|
||||
# Get first batch of documents
|
||||
doc_batch = next(doc_batch_generator)
|
||||
assert len(doc_batch) > 0
|
||||
assert len(doc_batch) == NUM_PAGES
|
||||
|
||||
# Verify first document structure
|
||||
doc = doc_batch[0]
|
||||
main_doc = doc_batch[0]
|
||||
|
||||
# Basic document properties
|
||||
assert doc.id.startswith("gitbook-")
|
||||
assert doc.semantic_identifier == "Acme Corp Internal Handbook"
|
||||
assert doc.source == DocumentSource.GITBOOK
|
||||
assert main_doc.id.startswith("gitbook-")
|
||||
assert main_doc.semantic_identifier == "Acme Corp Internal Handbook"
|
||||
assert main_doc.source == DocumentSource.GITBOOK
|
||||
|
||||
# Metadata checks
|
||||
assert "path" in doc.metadata
|
||||
assert "type" in doc.metadata
|
||||
assert "kind" in doc.metadata
|
||||
assert "path" in main_doc.metadata
|
||||
assert "type" in main_doc.metadata
|
||||
assert "kind" in main_doc.metadata
|
||||
|
||||
# Section checks
|
||||
assert len(doc.sections) == 1
|
||||
section = doc.sections[0]
|
||||
assert len(main_doc.sections) == 1
|
||||
section = main_doc.sections[0]
|
||||
|
||||
# Content specific checks
|
||||
content = section.text
|
||||
@ -74,8 +77,23 @@ def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
||||
|
||||
assert section.link # Should have a URL
|
||||
|
||||
nested1 = doc_batch[1]
|
||||
assert nested1.id.startswith("gitbook-")
|
||||
assert nested1.semantic_identifier == "Nested1"
|
||||
assert len(nested1.sections) == 1
|
||||
# extra newlines at the end, remove them to make test easier
|
||||
assert nested1.sections[0].text.strip() == "nested1"
|
||||
assert nested1.source == DocumentSource.GITBOOK
|
||||
|
||||
nested2 = doc_batch[2]
|
||||
assert nested2.id.startswith("gitbook-")
|
||||
assert nested2.semantic_identifier == "Nested2"
|
||||
assert len(nested2.sections) == 1
|
||||
assert nested2.sections[0].text.strip() == "nested2"
|
||||
assert nested2.source == DocumentSource.GITBOOK
|
||||
|
||||
# Time-based polling test
|
||||
current_time = time.time()
|
||||
poll_docs = gitbook_connector.poll_source(0, current_time)
|
||||
poll_batch = next(poll_docs)
|
||||
assert len(poll_batch) > 0
|
||||
assert len(poll_batch) == NUM_PAGES
|
||||
|
Loading…
x
Reference in New Issue
Block a user