From 34fc84298d66a963009460b5c72652027d57e1b3 Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 6 Mar 2025 11:42:18 -0800 Subject: [PATCH] Confluence fixes --- backend/onyx/connectors/confluence/connector.py | 2 +- backend/onyx/indexing/indexing_pipeline.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/backend/onyx/connectors/confluence/connector.py b/backend/onyx/connectors/confluence/connector.py index 3116bfb35..00d5d2168 100644 --- a/backend/onyx/connectors/confluence/connector.py +++ b/backend/onyx/connectors/confluence/connector.py @@ -240,7 +240,7 @@ class ConfluenceConnector( # Extract basic page information page_id = page["id"] page_title = page["title"] - page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}" + page_url = f"{self.wiki_base}{page['_links']['webui']}" # Get the page content page_content = extract_text_from_confluence_html( diff --git a/backend/onyx/indexing/indexing_pipeline.py b/backend/onyx/indexing/indexing_pipeline.py index fe95f2a9b..c0b6dbc71 100644 --- a/backend/onyx/indexing/indexing_pipeline.py +++ b/backend/onyx/indexing/indexing_pipeline.py @@ -464,12 +464,21 @@ def index_doc_batch( ), ) - successful_doc_ids = {record.document_id for record in insertion_records} - if successful_doc_ids != set(updatable_ids): + all_returned_doc_ids = { + record.document_id for record in insertion_records + }.union( + { + record.failed_document.document_id + for record in vector_db_write_failures + if record.failed_document + } + ) + if all_returned_doc_ids != set(updatable_ids): raise RuntimeError( f"Some documents were not successfully indexed. " f"Updatable IDs: {updatable_ids}, " - f"Successful IDs: {successful_doc_ids}" + f"Returned IDs: {all_returned_doc_ids}" + "This should never happen." ) last_modified_ids = []