mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-15 07:33:35 +02:00
SMall gitbook tweaks
This commit is contained in:
@ -228,10 +228,15 @@ class GitbookConnector(LoadConnector, PollConnector):
|
|||||||
raise ConnectorMissingCredentialError("GitBook")
|
raise ConnectorMissingCredentialError("GitBook")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
content = self.client.get(f"/spaces/{self.space_id}/content")
|
content = self.client.get(f"/spaces/{self.space_id}/content/pages")
|
||||||
pages: list[dict[str, Any]] = content.get("pages", [])
|
pages: list[dict[str, Any]] = content.get("pages", [])
|
||||||
current_batch: list[Document] = []
|
current_batch: list[Document] = []
|
||||||
|
|
||||||
|
logger.info(f"Found {len(pages)} root pages.")
|
||||||
|
logger.info(
|
||||||
|
f"First 20 Page Ids: {[page.get('id', 'Unknown') for page in pages[:20]]}"
|
||||||
|
)
|
||||||
|
|
||||||
while pages:
|
while pages:
|
||||||
page = pages.pop(0)
|
page = pages.pop(0)
|
||||||
|
|
||||||
|
@ -20,29 +20,32 @@ def gitbook_connector() -> GitbookConnector:
|
|||||||
return connector
|
return connector
|
||||||
|
|
||||||
|
|
||||||
|
NUM_PAGES = 3
|
||||||
|
|
||||||
|
|
||||||
def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
||||||
doc_batch_generator = gitbook_connector.load_from_state()
|
doc_batch_generator = gitbook_connector.load_from_state()
|
||||||
|
|
||||||
# Get first batch of documents
|
# Get first batch of documents
|
||||||
doc_batch = next(doc_batch_generator)
|
doc_batch = next(doc_batch_generator)
|
||||||
assert len(doc_batch) > 0
|
assert len(doc_batch) == NUM_PAGES
|
||||||
|
|
||||||
# Verify first document structure
|
# Verify first document structure
|
||||||
doc = doc_batch[0]
|
main_doc = doc_batch[0]
|
||||||
|
|
||||||
# Basic document properties
|
# Basic document properties
|
||||||
assert doc.id.startswith("gitbook-")
|
assert main_doc.id.startswith("gitbook-")
|
||||||
assert doc.semantic_identifier == "Acme Corp Internal Handbook"
|
assert main_doc.semantic_identifier == "Acme Corp Internal Handbook"
|
||||||
assert doc.source == DocumentSource.GITBOOK
|
assert main_doc.source == DocumentSource.GITBOOK
|
||||||
|
|
||||||
# Metadata checks
|
# Metadata checks
|
||||||
assert "path" in doc.metadata
|
assert "path" in main_doc.metadata
|
||||||
assert "type" in doc.metadata
|
assert "type" in main_doc.metadata
|
||||||
assert "kind" in doc.metadata
|
assert "kind" in main_doc.metadata
|
||||||
|
|
||||||
# Section checks
|
# Section checks
|
||||||
assert len(doc.sections) == 1
|
assert len(main_doc.sections) == 1
|
||||||
section = doc.sections[0]
|
section = main_doc.sections[0]
|
||||||
|
|
||||||
# Content specific checks
|
# Content specific checks
|
||||||
content = section.text
|
content = section.text
|
||||||
@ -74,8 +77,23 @@ def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
|
|||||||
|
|
||||||
assert section.link # Should have a URL
|
assert section.link # Should have a URL
|
||||||
|
|
||||||
|
nested1 = doc_batch[1]
|
||||||
|
assert nested1.id.startswith("gitbook-")
|
||||||
|
assert nested1.semantic_identifier == "Nested1"
|
||||||
|
assert len(nested1.sections) == 1
|
||||||
|
# extra newlines at the end, remove them to make test easier
|
||||||
|
assert nested1.sections[0].text.strip() == "nested1"
|
||||||
|
assert nested1.source == DocumentSource.GITBOOK
|
||||||
|
|
||||||
|
nested2 = doc_batch[2]
|
||||||
|
assert nested2.id.startswith("gitbook-")
|
||||||
|
assert nested2.semantic_identifier == "Nested2"
|
||||||
|
assert len(nested2.sections) == 1
|
||||||
|
assert nested2.sections[0].text.strip() == "nested2"
|
||||||
|
assert nested2.source == DocumentSource.GITBOOK
|
||||||
|
|
||||||
# Time-based polling test
|
# Time-based polling test
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
poll_docs = gitbook_connector.poll_source(0, current_time)
|
poll_docs = gitbook_connector.poll_source(0, current_time)
|
||||||
poll_batch = next(poll_docs)
|
poll_batch = next(poll_docs)
|
||||||
assert len(poll_batch) > 0
|
assert len(poll_batch) == NUM_PAGES
|
||||||
|
Reference in New Issue
Block a user