Confluence fixes (#4220)

* Confluence fixes

* Small tweak

* Address greptile comments
This commit is contained in:
Chris Weaver 2025-03-06 12:57:07 -08:00 committed by GitHub
parent facc8cc2fa
commit a7a374dc81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 4 deletions

View File

@ -240,7 +240,7 @@ class ConfluenceConnector(
# Extract basic page information # Extract basic page information
page_id = page["id"] page_id = page["id"]
page_title = page["title"] page_title = page["title"]
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}" page_url = f"{self.wiki_base}{page['_links']['webui']}"
# Get the page content # Get the page content
page_content = extract_text_from_confluence_html( page_content = extract_text_from_confluence_html(

View File

@ -464,12 +464,29 @@ def index_doc_batch(
), ),
) )
successful_doc_ids = {record.document_id for record in insertion_records} all_returned_doc_ids = (
if successful_doc_ids != set(updatable_ids): {record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError( raise RuntimeError(
f"Some documents were not successfully indexed. " f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, " f"Updatable IDs: {updatable_ids}, "
f"Successful IDs: {successful_doc_ids}" f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
) )
last_modified_ids = [] last_modified_ids = []