mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-04 12:58:42 +02:00
Vespa edge case ID does not follow expected format (#541)
This commit is contained in:
@@ -104,7 +104,13 @@ def _get_vespa_chunk_ids_by_document_id(
|
|||||||
while True:
|
while True:
|
||||||
results = requests.get(SEARCH_ENDPOINT, params=params).json()
|
results = requests.get(SEARCH_ENDPOINT, params=params).json()
|
||||||
hits = results["root"].get("children", [])
|
hits = results["root"].get("children", [])
|
||||||
doc_chunk_ids.extend([hit["id"].split("::")[1] for hit in hits])
|
|
||||||
|
# Temporary logging to catch the rare index out of bounds issue
|
||||||
|
problematic_ids = [hit["id"] for hit in hits if len(hit["id"].split("::")) < 2]
|
||||||
|
if problematic_ids:
|
||||||
|
logger.error(f'IDs without "::" {problematic_ids}')
|
||||||
|
|
||||||
|
doc_chunk_ids.extend([hit["id"].split("::", 1)[-1] for hit in hits])
|
||||||
params["offset"] += hits_per_page # type: ignore
|
params["offset"] += hits_per_page # type: ignore
|
||||||
|
|
||||||
if len(hits) < hits_per_page:
|
if len(hits) < hits_per_page:
|
||||||
|
Reference in New Issue
Block a user