diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index ecf7df7b7..2da76f34d 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -605,7 +605,7 @@ def _vespa_hit_to_inference_chunk( section_continuation=fields[SECTION_CONTINUATION], document_id=fields[DOCUMENT_ID], source_type=fields[SOURCE_TYPE], - title=fields[TITLE], + title=fields.get(TITLE), semantic_identifier=fields[SEMANTIC_IDENTIFIER], boost=fields.get(BOOST, 1), recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0), @@ -614,7 +614,7 @@ def _vespa_hit_to_inference_chunk( primary_owners=fields.get(PRIMARY_OWNERS), secondary_owners=fields.get(SECONDARY_OWNERS), metadata=metadata, - metadata_suffix=fields.get(METADATA_SUFFIX) or "", + metadata_suffix=fields.get(METADATA_SUFFIX), match_highlights=match_highlights, updated_at=updated_at, ) diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index 53f98fa66..a94b9f63d 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -190,11 +190,12 @@ class InferenceChunk(BaseChunk): class InferenceChunkUncleaned(InferenceChunk): - title: str # Separate from Semantic Identifier though often same - metadata_suffix: str + title: str | None # Separate from Semantic Identifier though often same + metadata_suffix: str | None def to_inference_chunk(self) -> InferenceChunk: # Create a dict of all fields except 'title' and 'metadata_suffix' + # Assumes the cleaning has already been applied and just needs to translate to the right type inference_chunk_data = { k: v for k, v in self.dict().items()