Title Truncation Logic (#1828)

This commit is contained in:
Yuhong Sun 2024-07-14 13:54:36 -07:00 committed by GitHub
parent da31da33e7
commit f63d0ca3ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -110,7 +110,7 @@ def chunk_document(
tokenizer = get_default_tokenizer()
title = document.get_title_for_document_index()
title_prefix = f"{title}{RETURN_SEPARATOR}"[:MAX_CHUNK_TITLE_LEN] if title else ""
title_prefix = f"{title[:MAX_CHUNK_TITLE_LEN]}{RETURN_SEPARATOR}" if title else ""
title_tokens = len(tokenizer.tokenize(title_prefix))
metadata_suffix = ""