Fix Title for docs without (#1827)

This commit is contained in:
Yuhong Sun 2024-07-14 13:51:11 -07:00 committed by GitHub
parent 56b175f597
commit da31da33e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 4 deletions

View File

@ -605,7 +605,7 @@ def _vespa_hit_to_inference_chunk(
section_continuation=fields[SECTION_CONTINUATION],
document_id=fields[DOCUMENT_ID],
source_type=fields[SOURCE_TYPE],
title=fields[TITLE],
title=fields.get(TITLE),
semantic_identifier=fields[SEMANTIC_IDENTIFIER],
boost=fields.get(BOOST, 1),
recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0),
@ -614,7 +614,7 @@ def _vespa_hit_to_inference_chunk(
primary_owners=fields.get(PRIMARY_OWNERS),
secondary_owners=fields.get(SECONDARY_OWNERS),
metadata=metadata,
metadata_suffix=fields.get(METADATA_SUFFIX) or "",
metadata_suffix=fields.get(METADATA_SUFFIX),
match_highlights=match_highlights,
updated_at=updated_at,
)

View File

@ -190,11 +190,12 @@ class InferenceChunk(BaseChunk):
class InferenceChunkUncleaned(InferenceChunk):
title: str # Separate from Semantic Identifier though often same
metadata_suffix: str
title: str | None # Separate from Semantic Identifier though often same
metadata_suffix: str | None
def to_inference_chunk(self) -> InferenceChunk:
# Create a dict of all fields except 'title' and 'metadata_suffix'
# Assumes the cleaning has already been applied and just needs to translate to the right type
inference_chunk_data = {
k: v
for k, v in self.dict().items()