mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-08-03 13:43:18 +02:00
Improved indexing (#3594)
* nit * k * add steps * main util functions * functioning fully * quick nit * k * typing fix * k * address comments
This commit is contained in:
@@ -25,6 +25,7 @@ from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.index_attempt import mock_successful_index_attempt
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.document_index.interfaces import IndexBatchParams
|
||||
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
|
||||
from onyx.indexing.models import ChunkEmbedding
|
||||
from onyx.indexing.models import DocMetadataAwareIndexChunk
|
||||
@@ -86,6 +87,7 @@ def _create_indexable_chunks(
|
||||
access=default_public_access,
|
||||
document_sets=set(),
|
||||
boost=DEFAULT_BOOST,
|
||||
large_chunk_id=None,
|
||||
)
|
||||
chunks.append(chunk)
|
||||
|
||||
@@ -217,7 +219,15 @@ def seed_initial_documents(
|
||||
# as we just sent over the Vespa schema and there is a slight delay
|
||||
|
||||
index_with_retries = retry_builder(tries=15)(document_index.index)
|
||||
index_with_retries(chunks=chunks, fresh_index=cohere_enabled)
|
||||
index_with_retries(
|
||||
chunks=chunks,
|
||||
index_batch_params=IndexBatchParams(
|
||||
doc_id_to_previous_chunk_cnt={},
|
||||
doc_id_to_new_chunk_cnt={},
|
||||
large_chunks_enabled=False,
|
||||
tenant_id=tenant_id,
|
||||
),
|
||||
)
|
||||
|
||||
# Mock a run for the UI even though it did not actually call out to anything
|
||||
mock_successful_index_attempt(
|
||||
|
Reference in New Issue
Block a user