diff --git a/backend/onyx/connectors/models.py b/backend/onyx/connectors/models.py index c1e88f9a8..bd3e9aee5 100644 --- a/backend/onyx/connectors/models.py +++ b/backend/onyx/connectors/models.py @@ -164,8 +164,8 @@ class DocumentBase(BaseModel): attributes.append(k + INDEX_SEPARATOR + v) return attributes - def get_content(self) -> str: - return " ".join([section.text for section in self.sections]) + def get_text_content(self) -> str: + return " ".join([section.text for section in self.sections if section.text]) class Document(DocumentBase): diff --git a/backend/onyx/indexing/chunker.py b/backend/onyx/indexing/chunker.py index faeb8a9cb..2448c5733 100644 --- a/backend/onyx/indexing/chunker.py +++ b/backend/onyx/indexing/chunker.py @@ -477,7 +477,7 @@ class Chunker: single_chunk_fits = True doc_token_count = 0 if self.enable_contextual_rag: - doc_content = document.get_content() + doc_content = document.get_text_content() tokenized_doc = self.tokenizer.tokenize(doc_content) doc_token_count = len(tokenized_doc)