Confluence fixes (#4220)

* Confluence fixes

* Small tweak

* Address greptile comments
This commit is contained in:
Chris Weaver 2025-03-06 12:57:07 -08:00 committed by GitHub
parent facc8cc2fa
commit a7a374dc81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 4 deletions

View File

@ -240,7 +240,7 @@ class ConfluenceConnector(
# Extract basic page information
page_id = page["id"]
page_title = page["title"]
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
page_url = f"{self.wiki_base}{page['_links']['webui']}"
# Get the page content
page_content = extract_text_from_confluence_html(

View File

@ -464,12 +464,29 @@ def index_doc_batch(
),
)
successful_doc_ids = {record.document_id for record in insertion_records}
if successful_doc_ids != set(updatable_ids):
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Successful IDs: {successful_doc_ids}"
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
)
last_modified_ids = []