mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-27 20:38:32 +02:00
Fix chunker (#2014)
This commit is contained in:
@@ -41,7 +41,10 @@ ChunkFunc = Callable[[Document], list[DocAwareChunk]]
|
||||
|
||||
|
||||
def extract_blurb(text: str, blurb_splitter: "SentenceSplitter") -> str:
|
||||
return blurb_splitter.split_text(text)[0]
|
||||
texts = blurb_splitter.split_text(text)
|
||||
if not texts:
|
||||
return ""
|
||||
return texts[0]
|
||||
|
||||
|
||||
def chunk_large_section(
|
||||
|
@@ -181,7 +181,7 @@ def index_doc_batch(
|
||||
)
|
||||
|
||||
logger.debug("Starting chunking")
|
||||
# The first chunk additionally contains the Title of the Document
|
||||
# The embedder is needed here to get the correct tokenizer
|
||||
chunks: list[DocAwareChunk] = [
|
||||
chunk
|
||||
for document in updatable_docs
|
||||
|
Reference in New Issue
Block a user