Add answers to search (#2020)

This commit is contained in:
pablodanswer
2024-08-04 23:02:55 -07:00
committed by GitHub
parent 76b7792e69
commit 6d67d472cd
47 changed files with 806 additions and 445 deletions

View File

@@ -1,6 +1,7 @@
from pydantic import BaseModel
from danswer.configs.constants import DocumentSource
from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import SearchType
from danswer.search.models import ChunkContext
from danswer.search.models import RetrievalDetails
@@ -21,9 +22,9 @@ class DocumentSearchRequest(ChunkContext):
search_type: SearchType
retrieval_options: RetrievalDetails
recency_bias_multiplier: float = 1.0
evaluation_type: LLMEvaluationType
# This is to forcibly skip (or run) the step, if None it uses the system defaults
skip_rerank: bool | None = None
skip_llm_chunk_filter: bool | None = None
class BasicCreateChatMessageRequest(ChunkContext):

View File

@@ -1,3 +1,5 @@
from typing import cast
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
@@ -9,7 +11,9 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTA
from danswer.danswerbot.slack.handlers.handle_standard_answers import (
oneoff_standard_answers,
)
from danswer.db.chat import translate_db_search_doc_to_server_search_doc
from danswer.db.engine import get_session
from danswer.db.models import SearchDoc
from danswer.db.models import User
from danswer.db.persona import get_persona_by_id
from danswer.llm.answering.prompts.citations_prompt import (
@@ -27,6 +31,7 @@ from danswer.search.models import SearchRequest
from danswer.search.pipeline import SearchPipeline
from danswer.search.utils import dedupe_documents
from danswer.search.utils import drop_llm_indices
from danswer.search.utils import relevant_documents_to_indices
from danswer.utils.logger import setup_logger
from ee.danswer.server.query_and_chat.models import DocumentSearchRequest
from ee.danswer.server.query_and_chat.models import StandardAnswerRequest
@@ -63,7 +68,7 @@ def handle_search_request(
offset=search_request.retrieval_options.offset,
limit=search_request.retrieval_options.limit,
skip_rerank=search_request.skip_rerank,
skip_llm_chunk_filter=search_request.skip_llm_chunk_filter,
evaluation_type=search_request.evaluation_type,
chunks_above=search_request.chunks_above,
chunks_below=search_request.chunks_below,
full_doc=search_request.full_doc,
@@ -75,8 +80,7 @@ def handle_search_request(
bypass_acl=False,
)
top_sections = search_pipeline.reranked_sections
# If using surrounding context or full doc, this will be empty
relevant_section_indices = search_pipeline.relevant_section_indices
relevance_sections = search_pipeline.section_relevance
top_docs = [
SavedSearchDocWithContent(
document_id=section.center_chunk.document_id,
@@ -105,19 +109,26 @@ def handle_search_request(
# Deduping happens at the last step to avoid harming quality by dropping content early on
deduped_docs = top_docs
dropped_inds = None
if search_request.retrieval_options.dedupe_docs:
deduped_docs, dropped_inds = dedupe_documents(top_docs)
llm_indices = relevant_documents_to_indices(
relevance_sections=relevance_sections,
search_docs=[
translate_db_search_doc_to_server_search_doc(cast(SearchDoc, doc))
for doc in deduped_docs
],
)
if dropped_inds:
relevant_section_indices = drop_llm_indices(
llm_indices=relevant_section_indices,
llm_indices = drop_llm_indices(
llm_indices=llm_indices,
search_docs=deduped_docs,
dropped_indices=dropped_inds,
)
return DocumentSearchResponse(
top_documents=deduped_docs, llm_indices=relevant_section_indices
)
return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)
@basic_router.post("/answer-with-quote")