mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-07 21:20:39 +02:00
Improved indexing (#3594)
* nit * k * add steps * main util functions * functioning fully * quick nit * k * typing fix * k * address comments
This commit is contained in:
@ -1,12 +1,13 @@
|
||||
import math
|
||||
import uuid
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.db.search_settings import get_secondary_search_settings
|
||||
from onyx.indexing.models import IndexChunk
|
||||
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
|
||||
from onyx.indexing.models import DocMetadataAwareIndexChunk
|
||||
|
||||
|
||||
DEFAULT_BATCH_SIZE = 30
|
||||
@ -36,25 +37,118 @@ def translate_boost_count_to_multiplier(boost: int) -> float:
|
||||
return 2 / (1 + math.exp(-1 * boost / 3))
|
||||
|
||||
|
||||
def get_uuid_from_chunk(
|
||||
chunk: IndexChunk | InferenceChunk, mini_chunk_ind: int = 0
|
||||
) -> uuid.UUID:
|
||||
doc_str = (
|
||||
chunk.document_id
|
||||
if isinstance(chunk, InferenceChunk)
|
||||
else chunk.source_document.id
|
||||
)
|
||||
def assemble_document_chunk_info(
|
||||
enriched_document_info_list: list[EnrichedDocumentIndexingInfo],
|
||||
tenant_id: str | None,
|
||||
large_chunks_enabled: bool,
|
||||
) -> list[UUID]:
|
||||
doc_chunk_ids = []
|
||||
|
||||
for enriched_document_info in enriched_document_info_list:
|
||||
for chunk_index in range(
|
||||
enriched_document_info.chunk_start_index,
|
||||
enriched_document_info.chunk_end_index,
|
||||
):
|
||||
if not enriched_document_info.old_version:
|
||||
doc_chunk_ids.append(
|
||||
get_uuid_from_chunk_info(
|
||||
document_id=enriched_document_info.doc_id,
|
||||
chunk_id=chunk_index,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
)
|
||||
else:
|
||||
doc_chunk_ids.append(
|
||||
get_uuid_from_chunk_info_old(
|
||||
document_id=enriched_document_info.doc_id,
|
||||
chunk_id=chunk_index,
|
||||
)
|
||||
)
|
||||
|
||||
if large_chunks_enabled and chunk_index % 4 == 0:
|
||||
large_chunk_id = int(chunk_index / 4)
|
||||
large_chunk_reference_ids = [
|
||||
large_chunk_id + i
|
||||
for i in range(4)
|
||||
if large_chunk_id + i < enriched_document_info.chunk_end_index
|
||||
]
|
||||
if enriched_document_info.old_version:
|
||||
doc_chunk_ids.append(
|
||||
get_uuid_from_chunk_info_old(
|
||||
document_id=enriched_document_info.doc_id,
|
||||
chunk_id=large_chunk_id,
|
||||
large_chunk_reference_ids=large_chunk_reference_ids,
|
||||
)
|
||||
)
|
||||
else:
|
||||
doc_chunk_ids.append(
|
||||
get_uuid_from_chunk_info(
|
||||
document_id=enriched_document_info.doc_id,
|
||||
chunk_id=large_chunk_id,
|
||||
tenant_id=tenant_id,
|
||||
large_chunk_id=large_chunk_id,
|
||||
)
|
||||
)
|
||||
|
||||
return doc_chunk_ids
|
||||
|
||||
|
||||
def get_uuid_from_chunk_info(
|
||||
*,
|
||||
document_id: str,
|
||||
chunk_id: int,
|
||||
tenant_id: str | None,
|
||||
large_chunk_id: int | None = None,
|
||||
) -> UUID:
|
||||
doc_str = document_id
|
||||
|
||||
# Web parsing URL duplicate catching
|
||||
if doc_str and doc_str[-1] == "/":
|
||||
doc_str = doc_str[:-1]
|
||||
unique_identifier_string = "_".join(
|
||||
[doc_str, str(chunk.chunk_id), str(mini_chunk_ind)]
|
||||
|
||||
chunk_index = (
|
||||
"large_" + str(large_chunk_id) if large_chunk_id is not None else str(chunk_id)
|
||||
)
|
||||
if chunk.large_chunk_reference_ids:
|
||||
unique_identifier_string = "_".join([doc_str, chunk_index])
|
||||
if tenant_id:
|
||||
unique_identifier_string += "_" + tenant_id
|
||||
|
||||
return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
|
||||
|
||||
|
||||
def get_uuid_from_chunk_info_old(
|
||||
*, document_id: str, chunk_id: int, large_chunk_reference_ids: list[int] = []
|
||||
) -> UUID:
|
||||
doc_str = document_id
|
||||
|
||||
# Web parsing URL duplicate catching
|
||||
if doc_str and doc_str[-1] == "/":
|
||||
doc_str = doc_str[:-1]
|
||||
unique_identifier_string = "_".join([doc_str, str(chunk_id), "0"])
|
||||
if large_chunk_reference_ids:
|
||||
unique_identifier_string += "_large" + "_".join(
|
||||
[
|
||||
str(referenced_chunk_id)
|
||||
for referenced_chunk_id in chunk.large_chunk_reference_ids
|
||||
for referenced_chunk_id in large_chunk_reference_ids
|
||||
]
|
||||
)
|
||||
return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
|
||||
|
||||
|
||||
def get_uuid_from_chunk(chunk: DocMetadataAwareIndexChunk) -> uuid.UUID:
|
||||
return get_uuid_from_chunk_info(
|
||||
document_id=chunk.source_document.id,
|
||||
chunk_id=chunk.chunk_id,
|
||||
tenant_id=chunk.tenant_id,
|
||||
large_chunk_id=chunk.large_chunk_id,
|
||||
)
|
||||
|
||||
|
||||
def get_uuid_from_chunk_old(
|
||||
chunk: DocMetadataAwareIndexChunk, large_chunk_reference_ids: list[int] = []
|
||||
) -> UUID:
|
||||
return get_uuid_from_chunk_info_old(
|
||||
document_id=chunk.source_document.id,
|
||||
chunk_id=chunk.chunk_id,
|
||||
large_chunk_reference_ids=large_chunk_reference_ids,
|
||||
)
|
||||
|
@ -35,6 +35,38 @@ class VespaChunkRequest:
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexBatchParams:
|
||||
"""
|
||||
Information necessary for efficiently indexing a batch of documents
|
||||
"""
|
||||
|
||||
doc_id_to_previous_chunk_cnt: dict[str, int | None]
|
||||
doc_id_to_new_chunk_cnt: dict[str, int]
|
||||
tenant_id: str | None
|
||||
large_chunks_enabled: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class MinimalDocumentIndexingInfo:
|
||||
"""
|
||||
Minimal information necessary for indexing a document
|
||||
"""
|
||||
|
||||
doc_id: str
|
||||
chunk_start_index: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnrichedDocumentIndexingInfo(MinimalDocumentIndexingInfo):
|
||||
"""
|
||||
Enriched information necessary for indexing a document, including version and chunk range.
|
||||
"""
|
||||
|
||||
old_version: bool
|
||||
chunk_end_index: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentMetadata:
|
||||
"""
|
||||
@ -148,7 +180,7 @@ class Indexable(abc.ABC):
|
||||
def index(
|
||||
self,
|
||||
chunks: list[DocMetadataAwareIndexChunk],
|
||||
fresh_index: bool = False,
|
||||
index_batch_params: IndexBatchParams,
|
||||
) -> set[DocumentInsertionRecord]:
|
||||
"""
|
||||
Takes a list of document chunks and indexes them in the document index
|
||||
@ -166,14 +198,11 @@ class Indexable(abc.ABC):
|
||||
only needs to index chunks into the PRIMARY index. Do not update the secondary index here,
|
||||
it is done automatically outside of this code.
|
||||
|
||||
NOTE: The fresh_index parameter, when set to True, assumes no documents have been previously
|
||||
indexed for the given index/tenant. This can be used to optimize the indexing process for
|
||||
new or empty indices.
|
||||
|
||||
Parameters:
|
||||
- chunks: Document chunks with all of the information needed for indexing to the document
|
||||
index.
|
||||
- fresh_index: Boolean indicating whether this is a fresh index with no existing documents.
|
||||
- tenant_id: The tenant id of the user whose chunks are being indexed
|
||||
- large_chunks_enabled: Whether large chunks are enabled
|
||||
|
||||
Returns:
|
||||
List of document ids which map to unique documents and are used for deduping chunks
|
||||
@ -185,7 +214,7 @@ class Indexable(abc.ABC):
|
||||
|
||||
class Deletable(abc.ABC):
|
||||
"""
|
||||
Class must implement the ability to delete document by their unique document ids.
|
||||
Class must implement the ability to delete document by a given unique document id.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
@ -198,16 +227,6 @@ class Deletable(abc.ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def delete(self, doc_ids: list[str]) -> None:
|
||||
"""
|
||||
Given a list of document ids, hard delete them from the document index
|
||||
|
||||
Parameters:
|
||||
- doc_ids: list of document ids as specified by the connector
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Updatable(abc.ABC):
|
||||
"""
|
||||
|
@ -1,11 +1,9 @@
|
||||
import concurrent.futures
|
||||
from uuid import UUID
|
||||
|
||||
import httpx
|
||||
from retry import retry
|
||||
|
||||
from onyx.document_index.vespa.chunk_retrieval import (
|
||||
get_all_vespa_ids_for_document_id,
|
||||
)
|
||||
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
|
||||
from onyx.document_index.vespa_constants import NUM_THREADS
|
||||
from onyx.utils.logger import setup_logger
|
||||
@ -22,29 +20,21 @@ def _retryable_http_delete(http_client: httpx.Client, url: str) -> None:
|
||||
res.raise_for_status()
|
||||
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _delete_vespa_doc_chunks(
|
||||
document_id: str, index_name: str, http_client: httpx.Client
|
||||
def _delete_vespa_chunk(
|
||||
doc_chunk_id: UUID, index_name: str, http_client: httpx.Client
|
||||
) -> None:
|
||||
doc_chunk_ids = get_all_vespa_ids_for_document_id(
|
||||
document_id=document_id,
|
||||
index_name=index_name,
|
||||
get_large_chunks=True,
|
||||
)
|
||||
|
||||
for chunk_id in doc_chunk_ids:
|
||||
try:
|
||||
_retryable_http_delete(
|
||||
http_client,
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}",
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Failed to delete chunk, details: {e.response.text}")
|
||||
raise
|
||||
try:
|
||||
_retryable_http_delete(
|
||||
http_client,
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}",
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Failed to delete chunk, details: {e.response.text}")
|
||||
raise
|
||||
|
||||
|
||||
def delete_vespa_docs(
|
||||
document_ids: list[str],
|
||||
def delete_vespa_chunks(
|
||||
doc_chunk_ids: list[UUID],
|
||||
index_name: str,
|
||||
http_client: httpx.Client,
|
||||
executor: concurrent.futures.ThreadPoolExecutor | None = None,
|
||||
@ -56,13 +46,13 @@ def delete_vespa_docs(
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)
|
||||
|
||||
try:
|
||||
doc_deletion_future = {
|
||||
chunk_deletion_future = {
|
||||
executor.submit(
|
||||
_delete_vespa_doc_chunks, doc_id, index_name, http_client
|
||||
): doc_id
|
||||
for doc_id in document_ids
|
||||
_delete_vespa_chunk, doc_chunk_id, index_name, http_client
|
||||
): doc_chunk_id
|
||||
for doc_chunk_id in doc_chunk_ids
|
||||
}
|
||||
for future in concurrent.futures.as_completed(doc_deletion_future):
|
||||
for future in concurrent.futures.as_completed(chunk_deletion_future):
|
||||
# Will raise exception if the deletion raised an exception
|
||||
future.result()
|
||||
|
||||
|
@ -25,8 +25,12 @@ from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
|
||||
from onyx.configs.constants import KV_REINDEX_KEY
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import InferenceChunkUncleaned
|
||||
from onyx.document_index.document_index_utils import assemble_document_chunk_info
|
||||
from onyx.document_index.interfaces import DocumentIndex
|
||||
from onyx.document_index.interfaces import DocumentInsertionRecord
|
||||
from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
|
||||
from onyx.document_index.interfaces import IndexBatchParams
|
||||
from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
|
||||
from onyx.document_index.interfaces import UpdateRequest
|
||||
from onyx.document_index.interfaces import VespaChunkRequest
|
||||
from onyx.document_index.interfaces import VespaDocumentFields
|
||||
@ -38,12 +42,10 @@ from onyx.document_index.vespa.chunk_retrieval import (
|
||||
parallel_visit_api_retrieval,
|
||||
)
|
||||
from onyx.document_index.vespa.chunk_retrieval import query_vespa
|
||||
from onyx.document_index.vespa.deletion import delete_vespa_docs
|
||||
from onyx.document_index.vespa.deletion import delete_vespa_chunks
|
||||
from onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks
|
||||
from onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence
|
||||
from onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy
|
||||
from onyx.document_index.vespa.indexing_utils import (
|
||||
get_existing_documents_from_chunks,
|
||||
)
|
||||
from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
|
||||
from onyx.document_index.vespa.shared_utils.utils import (
|
||||
replace_invalid_doc_id_characters,
|
||||
@ -307,12 +309,18 @@ class VespaIndex(DocumentIndex):
|
||||
def index(
|
||||
self,
|
||||
chunks: list[DocMetadataAwareIndexChunk],
|
||||
fresh_index: bool = False,
|
||||
index_batch_params: IndexBatchParams,
|
||||
) -> set[DocumentInsertionRecord]:
|
||||
"""Receive a list of chunks from a batch of documents and index the chunks into Vespa along
|
||||
with updating the associated permissions. Assumes that a document will not be split into
|
||||
multiple chunk batches calling this function multiple times, otherwise only the last set of
|
||||
chunks will be kept"""
|
||||
|
||||
doc_id_to_previous_chunk_cnt = index_batch_params.doc_id_to_previous_chunk_cnt
|
||||
doc_id_to_new_chunk_cnt = index_batch_params.doc_id_to_new_chunk_cnt
|
||||
tenant_id = index_batch_params.tenant_id
|
||||
large_chunks_enabled = index_batch_params.large_chunks_enabled
|
||||
|
||||
# IMPORTANT: This must be done one index at a time, do not use secondary index here
|
||||
cleaned_chunks = [clean_chunk_id_copy(chunk) for chunk in chunks]
|
||||
|
||||
@ -324,30 +332,55 @@ class VespaIndex(DocumentIndex):
|
||||
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
|
||||
get_vespa_http_client() as http_client,
|
||||
):
|
||||
if not fresh_index:
|
||||
# Check for existing documents, existing documents need to have all of their chunks deleted
|
||||
# prior to indexing as the document size (num chunks) may have shrunk
|
||||
first_chunks = [
|
||||
chunk for chunk in cleaned_chunks if chunk.chunk_id == 0
|
||||
]
|
||||
for chunk_batch in batch_generator(first_chunks, BATCH_SIZE):
|
||||
existing_docs.update(
|
||||
get_existing_documents_from_chunks(
|
||||
chunks=chunk_batch,
|
||||
index_name=self.index_name,
|
||||
http_client=http_client,
|
||||
executor=executor,
|
||||
)
|
||||
)
|
||||
# We require the start and end index for each document in order to
|
||||
# know precisely which chunks to delete. This information exists for
|
||||
# documents that have `chunk_count` in the database, but not for
|
||||
# `old_version` documents.
|
||||
|
||||
for doc_id_batch in batch_generator(existing_docs, BATCH_SIZE):
|
||||
delete_vespa_docs(
|
||||
document_ids=doc_id_batch,
|
||||
enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = []
|
||||
for document_id, _ in doc_id_to_previous_chunk_cnt.items():
|
||||
last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None)
|
||||
# If the document has no `chunk_count` in the database, we know that it
|
||||
# has the old chunk ID system and we must check for the final chunk index
|
||||
is_old_version = False
|
||||
if last_indexed_chunk is None:
|
||||
is_old_version = True
|
||||
minimal_doc_info = MinimalDocumentIndexingInfo(
|
||||
doc_id=document_id,
|
||||
chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
|
||||
)
|
||||
last_indexed_chunk = check_for_final_chunk_existence(
|
||||
minimal_doc_info=minimal_doc_info,
|
||||
start_index=doc_id_to_new_chunk_cnt[document_id],
|
||||
index_name=self.index_name,
|
||||
http_client=http_client,
|
||||
executor=executor,
|
||||
)
|
||||
|
||||
enriched_doc_info = EnrichedDocumentIndexingInfo(
|
||||
doc_id=document_id,
|
||||
chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
|
||||
chunk_end_index=last_indexed_chunk,
|
||||
old_version=is_old_version,
|
||||
)
|
||||
enriched_doc_infos.append(enriched_doc_info)
|
||||
|
||||
# Now, for each doc, we know exactly where to start and end our deletion
|
||||
# So let's generate the chunk IDs for each chunk to delete
|
||||
chunks_to_delete = assemble_document_chunk_info(
|
||||
enriched_document_info_list=enriched_doc_infos,
|
||||
tenant_id=tenant_id,
|
||||
large_chunks_enabled=large_chunks_enabled,
|
||||
)
|
||||
|
||||
# Delete old Vespa documents
|
||||
for doc_chunk_ids_batch in batch_generator(chunks_to_delete, BATCH_SIZE):
|
||||
delete_vespa_chunks(
|
||||
doc_chunk_ids=doc_chunk_ids_batch,
|
||||
index_name=self.index_name,
|
||||
http_client=http_client,
|
||||
executor=executor,
|
||||
)
|
||||
|
||||
for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
|
||||
batch_index_vespa_chunks(
|
||||
chunks=chunk_batch,
|
||||
@ -588,24 +621,6 @@ class VespaIndex(DocumentIndex):
|
||||
|
||||
return total_chunks_updated
|
||||
|
||||
def delete(self, doc_ids: list[str]) -> None:
|
||||
logger.info(f"Deleting {len(doc_ids)} documents from Vespa")
|
||||
|
||||
doc_ids = [replace_invalid_doc_id_characters(doc_id) for doc_id in doc_ids]
|
||||
|
||||
# NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
|
||||
# indexing / updates / deletes since we have to make a large volume of requests.
|
||||
with get_vespa_http_client() as http_client:
|
||||
index_names = [self.index_name]
|
||||
if self.secondary_index_name:
|
||||
index_names.append(self.secondary_index_name)
|
||||
|
||||
for index_name in index_names:
|
||||
delete_vespa_docs(
|
||||
document_ids=doc_ids, index_name=index_name, http_client=http_client
|
||||
)
|
||||
return
|
||||
|
||||
def delete_single(self, doc_id: str) -> int:
|
||||
"""Possibly faster overall than the delete method due to using a single
|
||||
delete call with a selection query."""
|
||||
|
@ -1,5 +1,6 @@
|
||||
import concurrent.futures
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from http import HTTPStatus
|
||||
@ -11,6 +12,8 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
get_experts_stores_representations,
|
||||
)
|
||||
from onyx.document_index.document_index_utils import get_uuid_from_chunk
|
||||
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
|
||||
from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
|
||||
from onyx.document_index.vespa.shared_utils.utils import remove_invalid_unicode_chars
|
||||
from onyx.document_index.vespa.shared_utils.utils import (
|
||||
replace_invalid_doc_id_characters,
|
||||
@ -48,14 +51,9 @@ logger = setup_logger()
|
||||
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _does_document_exist(
|
||||
doc_chunk_id: str,
|
||||
index_name: str,
|
||||
http_client: httpx.Client,
|
||||
def _does_doc_chunk_exist(
|
||||
doc_chunk_id: uuid.UUID, index_name: str, http_client: httpx.Client
|
||||
) -> bool:
|
||||
"""Returns whether the document already exists and the users/group whitelists
|
||||
Specifically in this case, document refers to a vespa document which is equivalent to a Onyx
|
||||
chunk. This checks for whether the chunk exists already in the index"""
|
||||
doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
|
||||
doc_fetch_response = http_client.get(doc_url)
|
||||
if doc_fetch_response.status_code == 404:
|
||||
@ -98,8 +96,8 @@ def get_existing_documents_from_chunks(
|
||||
try:
|
||||
chunk_existence_future = {
|
||||
executor.submit(
|
||||
_does_document_exist,
|
||||
str(get_uuid_from_chunk(chunk)),
|
||||
_does_doc_chunk_exist,
|
||||
get_uuid_from_chunk(chunk),
|
||||
index_name,
|
||||
http_client,
|
||||
): chunk
|
||||
@ -248,3 +246,22 @@ def clean_chunk_id_copy(
|
||||
}
|
||||
)
|
||||
return clean_chunk
|
||||
|
||||
|
||||
def check_for_final_chunk_existence(
|
||||
minimal_doc_info: MinimalDocumentIndexingInfo,
|
||||
start_index: int,
|
||||
index_name: str,
|
||||
http_client: httpx.Client,
|
||||
) -> int:
|
||||
index = start_index
|
||||
while True:
|
||||
doc_chunk_id = get_uuid_from_chunk_info_old(
|
||||
document_id=minimal_doc_info.doc_id,
|
||||
chunk_id=index,
|
||||
large_chunk_reference_ids=[],
|
||||
)
|
||||
if not _does_doc_chunk_exist(doc_chunk_id, index_name, http_client):
|
||||
return index
|
||||
|
||||
index += 1
|
||||
|
@ -35,6 +35,8 @@ DOCUMENT_ID_ENDPOINT = (
|
||||
f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid"
|
||||
)
|
||||
|
||||
# the default document id endpoint is http://localhost:8080/document/v1/default/danswer_chunk/docid
|
||||
|
||||
SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/"
|
||||
|
||||
NUM_THREADS = (
|
||||
|
Reference in New Issue
Block a user