mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 21:26:01 +02:00
Handle Empty Titles (#1891)
This commit is contained in:
@@ -100,6 +100,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
|
|||||||
# Drop any None or empty strings
|
# Drop any None or empty strings
|
||||||
chunk_titles_list = [title for title in chunk_titles if title]
|
chunk_titles_list = [title for title in chunk_titles if title]
|
||||||
|
|
||||||
|
if chunk_titles_list:
|
||||||
title_embeddings = self.embedding_model.encode(
|
title_embeddings = self.embedding_model.encode(
|
||||||
chunk_titles_list, text_type=EmbedTextType.PASSAGE
|
chunk_titles_list, text_type=EmbedTextType.PASSAGE
|
||||||
)
|
)
|
||||||
|
@@ -112,6 +112,10 @@ class EmbeddingModel:
|
|||||||
text_type: EmbedTextType,
|
text_type: EmbedTextType,
|
||||||
batch_size: int = BATCH_SIZE_ENCODE_CHUNKS,
|
batch_size: int = BATCH_SIZE_ENCODE_CHUNKS,
|
||||||
) -> list[list[float]]:
|
) -> list[list[float]]:
|
||||||
|
if not texts:
|
||||||
|
logger.warning("No texts to be embedded")
|
||||||
|
return []
|
||||||
|
|
||||||
if self.provider_type:
|
if self.provider_type:
|
||||||
embed_request = EmbedRequest(
|
embed_request = EmbedRequest(
|
||||||
model_name=self.model_name,
|
model_name=self.model_name,
|
||||||
|
@@ -284,6 +284,9 @@ def calc_sim_scores(query: str, docs: list[str]) -> list[list[float]]:
|
|||||||
async def process_embed_request(
|
async def process_embed_request(
|
||||||
embed_request: EmbedRequest,
|
embed_request: EmbedRequest,
|
||||||
) -> EmbedResponse:
|
) -> EmbedResponse:
|
||||||
|
if not embed_request.texts:
|
||||||
|
raise HTTPException(status_code=400, detail="No texts to be embedded")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if embed_request.text_type == EmbedTextType.QUERY:
|
if embed_request.text_type == EmbedTextType.QUERY:
|
||||||
prefix = embed_request.manual_query_prefix
|
prefix = embed_request.manual_query_prefix
|
||||||
@@ -315,6 +318,11 @@ async def process_rerank_request(embed_request: RerankRequest) -> RerankResponse
|
|||||||
if INDEXING_ONLY:
|
if INDEXING_ONLY:
|
||||||
raise RuntimeError("Indexing model server should not call intent endpoint")
|
raise RuntimeError("Indexing model server should not call intent endpoint")
|
||||||
|
|
||||||
|
if not embed_request.documents or not embed_request.query:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail="No documents or query to be reranked"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sim_scores = calc_sim_scores(
|
sim_scores = calc_sim_scores(
|
||||||
query=embed_request.query, docs=embed_request.documents
|
query=embed_request.query, docs=embed_request.documents
|
||||||
|
Reference in New Issue
Block a user