mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-21 14:12:42 +02:00
Add retrieved_document_ids to QueryEvent
This commit is contained in:
@@ -0,0 +1,31 @@
|
|||||||
|
"""Added retrieved docs to query event
|
||||||
|
|
||||||
|
Revision ID: 9d97fecfab7f
|
||||||
|
Revises: ffc707a226b4
|
||||||
|
Create Date: 2023-10-20 12:22:31.930449
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "9d97fecfab7f"
|
||||||
|
down_revision = "ffc707a226b4"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"query_event",
|
||||||
|
sa.Column(
|
||||||
|
"retrieved_document_ids",
|
||||||
|
postgresql.ARRAY(sa.String()),
|
||||||
|
nullable=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("query_event", "retrieved_document_ids")
|
@@ -94,16 +94,18 @@ def update_document_hidden(db_session: Session, document_id: str, hidden: bool)
|
|||||||
|
|
||||||
|
|
||||||
def create_query_event(
|
def create_query_event(
|
||||||
|
db_session: Session,
|
||||||
query: str,
|
query: str,
|
||||||
selected_flow: SearchType | None,
|
selected_flow: SearchType | None,
|
||||||
llm_answer: str | None,
|
llm_answer: str | None,
|
||||||
user_id: UUID | None,
|
user_id: UUID | None,
|
||||||
db_session: Session,
|
retrieved_document_ids: list[str] | None = None,
|
||||||
) -> int:
|
) -> int:
|
||||||
query_event = QueryEvent(
|
query_event = QueryEvent(
|
||||||
query=query,
|
query=query,
|
||||||
selected_search_flow=selected_flow,
|
selected_search_flow=selected_flow,
|
||||||
llm_answer=llm_answer,
|
llm_answer=llm_answer,
|
||||||
|
retrieved_document_ids=retrieved_document_ids,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
db_session.add(query_event)
|
db_session.add(query_event)
|
||||||
@@ -113,10 +115,10 @@ def create_query_event(
|
|||||||
|
|
||||||
|
|
||||||
def update_query_event_feedback(
|
def update_query_event_feedback(
|
||||||
|
db_session: Session,
|
||||||
feedback: QAFeedbackType,
|
feedback: QAFeedbackType,
|
||||||
query_id: int,
|
query_id: int,
|
||||||
user_id: UUID | None,
|
user_id: UUID | None,
|
||||||
db_session: Session,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
query_event = fetch_query_event_by_id(query_id, db_session)
|
query_event = fetch_query_event_by_id(query_id, db_session)
|
||||||
|
|
||||||
@@ -124,7 +126,21 @@ def update_query_event_feedback(
|
|||||||
raise ValueError("User trying to give feedback on a query run by another user.")
|
raise ValueError("User trying to give feedback on a query run by another user.")
|
||||||
|
|
||||||
query_event.feedback = feedback
|
query_event.feedback = feedback
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def update_query_event_retrieved_documents(
|
||||||
|
db_session: Session,
|
||||||
|
retrieved_document_ids: list[str],
|
||||||
|
query_id: int,
|
||||||
|
user_id: UUID | None,
|
||||||
|
) -> None:
|
||||||
|
query_event = fetch_query_event_by_id(query_id, db_session)
|
||||||
|
|
||||||
|
if user_id != query_event.user_id:
|
||||||
|
raise ValueError("User trying to update docs on a query run by another user.")
|
||||||
|
|
||||||
|
query_event.retrieved_document_ids = retrieved_document_ids
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -346,6 +346,12 @@ class QueryEvent(Base):
|
|||||||
Enum(SearchType), nullable=True
|
Enum(SearchType), nullable=True
|
||||||
)
|
)
|
||||||
llm_answer: Mapped[str | None] = mapped_column(Text, default=None)
|
llm_answer: Mapped[str | None] = mapped_column(Text, default=None)
|
||||||
|
# Document IDs of the top context documents retrieved for the query (if any)
|
||||||
|
# NOTE: not using a foreign key to enable easy deletion of documents without
|
||||||
|
# needing to adjust `QueryEvent` rows
|
||||||
|
retrieved_document_ids: Mapped[list[str] | None] = mapped_column(
|
||||||
|
postgresql.ARRAY(String), nullable=True
|
||||||
|
)
|
||||||
feedback: Mapped[QAFeedbackType | None] = mapped_column(
|
feedback: Mapped[QAFeedbackType | None] = mapped_column(
|
||||||
Enum(QAFeedbackType), nullable=True
|
Enum(QAFeedbackType), nullable=True
|
||||||
)
|
)
|
||||||
|
@@ -9,6 +9,7 @@ from danswer.configs.app_configs import QA_TIMEOUT
|
|||||||
from danswer.configs.constants import IGNORE_FOR_QA
|
from danswer.configs.constants import IGNORE_FOR_QA
|
||||||
from danswer.datastores.document_index import get_default_document_index
|
from danswer.datastores.document_index import get_default_document_index
|
||||||
from danswer.db.feedback import create_query_event
|
from danswer.db.feedback import create_query_event
|
||||||
|
from danswer.db.feedback import update_query_event_retrieved_documents
|
||||||
from danswer.db.models import User
|
from danswer.db.models import User
|
||||||
from danswer.direct_qa.exceptions import OpenAIKeyMissing
|
from danswer.direct_qa.exceptions import OpenAIKeyMissing
|
||||||
from danswer.direct_qa.exceptions import UnknownModelError
|
from danswer.direct_qa.exceptions import UnknownModelError
|
||||||
@@ -55,7 +56,9 @@ def answer_qa_query(
|
|||||||
|
|
||||||
query_event_id = create_query_event(
|
query_event_id = create_query_event(
|
||||||
query=query,
|
query=query,
|
||||||
selected_flow=SearchType.KEYWORD,
|
selected_flow=SearchType.KEYWORD
|
||||||
|
if question.use_keyword
|
||||||
|
else SearchType.SEMANTIC,
|
||||||
llm_answer=None,
|
llm_answer=None,
|
||||||
user_id=user.id if user is not None else None,
|
user_id=user.id if user is not None else None,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
@@ -97,13 +100,22 @@ def answer_qa_query(
|
|||||||
query_event_id=query_event_id,
|
query_event_id=query_event_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
top_docs = chunks_to_search_docs(ranked_chunks)
|
||||||
|
unranked_top_docs = chunks_to_search_docs(unranked_chunks)
|
||||||
|
update_query_event_retrieved_documents(
|
||||||
|
db_session=db_session,
|
||||||
|
retrieved_document_ids=[doc.document_id for doc in top_docs],
|
||||||
|
query_id=query_event_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
|
||||||
if disable_generative_answer:
|
if disable_generative_answer:
|
||||||
logger.debug("Skipping QA because generative AI is disabled")
|
logger.debug("Skipping QA because generative AI is disabled")
|
||||||
return QAResponse(
|
return QAResponse(
|
||||||
answer=None,
|
answer=None,
|
||||||
quotes=None,
|
quotes=None,
|
||||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
top_ranked_docs=top_docs,
|
||||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
lower_ranked_docs=unranked_top_docs,
|
||||||
# set flow as search so frontend doesn't ask the user if they want
|
# set flow as search so frontend doesn't ask the user if they want
|
||||||
# to run QA over more documents
|
# to run QA over more documents
|
||||||
predicted_flow=QueryFlow.SEARCH,
|
predicted_flow=QueryFlow.SEARCH,
|
||||||
@@ -119,8 +131,8 @@ def answer_qa_query(
|
|||||||
return QAResponse(
|
return QAResponse(
|
||||||
answer=None,
|
answer=None,
|
||||||
quotes=None,
|
quotes=None,
|
||||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
top_ranked_docs=top_docs,
|
||||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
lower_ranked_docs=unranked_top_docs,
|
||||||
predicted_flow=predicted_flow,
|
predicted_flow=predicted_flow,
|
||||||
predicted_search=predicted_search,
|
predicted_search=predicted_search,
|
||||||
error_msg=str(e),
|
error_msg=str(e),
|
||||||
@@ -162,8 +174,8 @@ def answer_qa_query(
|
|||||||
return QAResponse(
|
return QAResponse(
|
||||||
answer=d_answer.answer if d_answer else None,
|
answer=d_answer.answer if d_answer else None,
|
||||||
quotes=quotes.quotes if quotes else None,
|
quotes=quotes.quotes if quotes else None,
|
||||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
top_ranked_docs=top_docs,
|
||||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
lower_ranked_docs=unranked_top_docs,
|
||||||
predicted_flow=predicted_flow,
|
predicted_flow=predicted_flow,
|
||||||
predicted_search=predicted_search,
|
predicted_search=predicted_search,
|
||||||
eval_res_valid=True if valid else False,
|
eval_res_valid=True if valid else False,
|
||||||
@@ -174,8 +186,8 @@ def answer_qa_query(
|
|||||||
return QAResponse(
|
return QAResponse(
|
||||||
answer=d_answer.answer if d_answer else None,
|
answer=d_answer.answer if d_answer else None,
|
||||||
quotes=quotes.quotes if quotes else None,
|
quotes=quotes.quotes if quotes else None,
|
||||||
top_ranked_docs=chunks_to_search_docs(ranked_chunks),
|
top_ranked_docs=top_docs,
|
||||||
lower_ranked_docs=chunks_to_search_docs(unranked_chunks),
|
lower_ranked_docs=unranked_top_docs,
|
||||||
predicted_flow=predicted_flow,
|
predicted_flow=predicted_flow,
|
||||||
predicted_search=predicted_search,
|
predicted_search=predicted_search,
|
||||||
error_msg=error_msg,
|
error_msg=error_msg,
|
||||||
|
@@ -20,6 +20,7 @@ from danswer.db.engine import get_session
|
|||||||
from danswer.db.feedback import create_doc_retrieval_feedback
|
from danswer.db.feedback import create_doc_retrieval_feedback
|
||||||
from danswer.db.feedback import create_query_event
|
from danswer.db.feedback import create_query_event
|
||||||
from danswer.db.feedback import update_query_event_feedback
|
from danswer.db.feedback import update_query_event_feedback
|
||||||
|
from danswer.db.feedback import update_query_event_retrieved_documents
|
||||||
from danswer.db.models import User
|
from danswer.db.models import User
|
||||||
from danswer.direct_qa.answer_question import answer_qa_query
|
from danswer.direct_qa.answer_question import answer_qa_query
|
||||||
from danswer.direct_qa.exceptions import OpenAIKeyMissing
|
from danswer.direct_qa.exceptions import OpenAIKeyMissing
|
||||||
@@ -165,6 +166,12 @@ def semantic_search(
|
|||||||
|
|
||||||
top_docs = chunks_to_search_docs(ranked_chunks)
|
top_docs = chunks_to_search_docs(ranked_chunks)
|
||||||
other_top_docs = chunks_to_search_docs(unranked_chunks)
|
other_top_docs = chunks_to_search_docs(unranked_chunks)
|
||||||
|
update_query_event_retrieved_documents(
|
||||||
|
db_session=db_session,
|
||||||
|
retrieved_document_ids=[doc.document_id for doc in top_docs],
|
||||||
|
query_id=query_event_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
|
||||||
return SearchResponse(
|
return SearchResponse(
|
||||||
top_ranked_docs=top_docs,
|
top_ranked_docs=top_docs,
|
||||||
@@ -203,6 +210,13 @@ def keyword_search(
|
|||||||
)
|
)
|
||||||
|
|
||||||
top_docs = chunks_to_search_docs(ranked_chunks)
|
top_docs = chunks_to_search_docs(ranked_chunks)
|
||||||
|
update_query_event_retrieved_documents(
|
||||||
|
db_session=db_session,
|
||||||
|
retrieved_document_ids=[doc.document_id for doc in top_docs],
|
||||||
|
query_id=query_event_id,
|
||||||
|
user_id=user_id,
|
||||||
|
)
|
||||||
|
|
||||||
return SearchResponse(
|
return SearchResponse(
|
||||||
top_ranked_docs=top_docs, lower_ranked_docs=None, query_event_id=query_event_id
|
top_ranked_docs=top_docs, lower_ranked_docs=None, query_event_id=query_event_id
|
||||||
)
|
)
|
||||||
@@ -349,6 +363,7 @@ def stream_direct_qa(
|
|||||||
if question.use_keyword
|
if question.use_keyword
|
||||||
else SearchType.SEMANTIC,
|
else SearchType.SEMANTIC,
|
||||||
llm_answer=answer_so_far,
|
llm_answer=answer_so_far,
|
||||||
|
retrieved_document_ids=[doc.document_id for doc in top_docs],
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user