mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Fix Vespa Issue where Documents with no Content could be retrieved via Vector Search (#448)
This commit is contained in:
@@ -314,6 +314,17 @@ def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]:
|
||||
response.raise_for_status()
|
||||
|
||||
hits = response.json()["root"].get("children", [])
|
||||
|
||||
for hit in hits:
|
||||
if hit["fields"].get(CONTENT) is None:
|
||||
logger.error(
|
||||
f"Vespa Index with Vespa ID {hit['id']} has no contents. "
|
||||
f"This is invalid because the vector is not meaningful and keywordsearch cannot "
|
||||
f"fetch this document"
|
||||
)
|
||||
|
||||
filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]
|
||||
|
||||
inference_chunks = [
|
||||
InferenceChunk.from_dict(
|
||||
dict(
|
||||
@@ -330,7 +341,7 @@ def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]:
|
||||
},
|
||||
)
|
||||
)
|
||||
for hit in hits
|
||||
for hit in filtered_hits
|
||||
]
|
||||
|
||||
return inference_chunks
|
||||
|
Reference in New Issue
Block a user