mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
rename of documents to verified_reranked_documents
This commit is contained in:
parent
d53dd1e356
commit
732861a940
@ -20,12 +20,12 @@ def format_initial_sub_answers(
|
||||
logger.info(f"--------{now_start}--------INGEST ANSWERS---")
|
||||
documents = []
|
||||
context_documents = []
|
||||
cited_docs = []
|
||||
cited_documents = []
|
||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||
for answer_result in answer_results:
|
||||
documents.extend(answer_result.documents)
|
||||
documents.extend(answer_result.verified_reranked_documents)
|
||||
context_documents.extend(answer_result.context_documents)
|
||||
cited_docs.extend(answer_result.cited_docs)
|
||||
cited_documents.extend(answer_result.cited_documents)
|
||||
now_end = datetime.now()
|
||||
|
||||
logger.debug(
|
||||
@ -35,9 +35,9 @@ def format_initial_sub_answers(
|
||||
return DecompAnswersUpdate(
|
||||
# Deduping is done by the documents operator for the main graph
|
||||
# so we might not need to dedup here
|
||||
documents=dedup_inference_sections(documents, []),
|
||||
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||
context_documents=dedup_inference_sections(context_documents, []),
|
||||
cited_documents=dedup_inference_sections(cited_docs, []),
|
||||
cited_documents=dedup_inference_sections(cited_documents, []),
|
||||
sub_question_results=answer_results,
|
||||
log_messages=[
|
||||
f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}"
|
||||
|
@ -18,9 +18,9 @@ def format_sub_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
|
||||
verified_high_quality=state.answer_quality,
|
||||
answer=state.answer,
|
||||
expanded_retrieval_results=state.expanded_retrieval_results,
|
||||
documents=state.documents,
|
||||
verified_reranked_documents=state.verified_reranked_documents,
|
||||
context_documents=state.context_documents,
|
||||
cited_docs=state.cited_docs,
|
||||
cited_documents=state.cited_documents,
|
||||
sub_question_retrieval_stats=state.sub_question_retrieval_stats,
|
||||
)
|
||||
],
|
||||
|
@ -44,7 +44,7 @@ def generate_sub_answer(
|
||||
|
||||
agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||
question = state.question
|
||||
state.documents
|
||||
state.verified_reranked_documents
|
||||
level, question_nr = parse_question_id(state.question_id)
|
||||
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
|
||||
persona_contextualized_prompt = get_persona_agent_prompt_expressions(
|
||||
@ -107,7 +107,7 @@ def generate_sub_answer(
|
||||
)
|
||||
|
||||
answer_citation_ids = get_answer_citation_ids(answer_str)
|
||||
cited_docs = [
|
||||
cited_documents = [
|
||||
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
|
||||
]
|
||||
|
||||
@ -121,7 +121,7 @@ def generate_sub_answer(
|
||||
|
||||
return QAGenerationUpdate(
|
||||
answer=answer_str,
|
||||
cited_docs=cited_docs,
|
||||
cited_documents=cited_documents,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
graph_component="initial - generate individual sub answer",
|
||||
|
@ -18,7 +18,7 @@ def ingest_retrieved_documents(
|
||||
|
||||
return RetrievalIngestionUpdate(
|
||||
expanded_retrieval_results=state.expanded_retrieval_result.expanded_queries_results,
|
||||
documents=state.expanded_retrieval_result.reranked_documents,
|
||||
verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
|
||||
context_documents=state.expanded_retrieval_result.context_documents,
|
||||
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
||||
)
|
||||
|
@ -25,13 +25,15 @@ class QACheckUpdate(LoggerUpdate, BaseModel):
|
||||
class QAGenerationUpdate(LoggerUpdate, BaseModel):
|
||||
answer: str = ""
|
||||
log_messages: list[str] = []
|
||||
cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
cited_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
# answer_stat: AnswerStats
|
||||
|
||||
|
||||
class RetrievalIngestionUpdate(LoggerUpdate, BaseModel):
|
||||
expanded_retrieval_results: list[QueryResult] = []
|
||||
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
verified_reranked_documents: Annotated[
|
||||
list[InferenceSection], dedup_inference_sections
|
||||
] = []
|
||||
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
||||
|
||||
|
@ -67,13 +67,13 @@ def generate_initial_answer(
|
||||
question = agent_a_config.search_request.query
|
||||
prompt_enrichment_components = get_prompt_enrichment_components(agent_a_config)
|
||||
|
||||
sub_questions_cited_docs = state.cited_documents
|
||||
sub_questions_cited_documents = state.cited_documents
|
||||
all_original_question_documents = state.all_original_question_documents
|
||||
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||
counter = 0
|
||||
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
||||
if original_doc_number not in sub_questions_cited_docs:
|
||||
if original_doc_number not in sub_questions_cited_documents:
|
||||
if (
|
||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||
|
@ -21,19 +21,19 @@ def format_initial_sub_answers(
|
||||
|
||||
documents = []
|
||||
context_documents = []
|
||||
cited_docs = []
|
||||
cited_documents = []
|
||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||
for answer_result in answer_results:
|
||||
documents.extend(answer_result.documents)
|
||||
documents.extend(answer_result.verified_reranked_documents)
|
||||
context_documents.extend(answer_result.context_documents)
|
||||
cited_docs.extend(answer_result.cited_docs)
|
||||
cited_documents.extend(answer_result.cited_documents)
|
||||
|
||||
return DecompAnswersUpdate(
|
||||
# Deduping is done by the documents operator for the main graph
|
||||
# so we might not need to dedup here
|
||||
documents=dedup_inference_sections(documents, []),
|
||||
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||
context_documents=dedup_inference_sections(context_documents, []),
|
||||
cited_documents=dedup_inference_sections(cited_docs, []),
|
||||
cited_documents=dedup_inference_sections(cited_documents, []),
|
||||
sub_question_results=answer_results,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
|
@ -1,20 +1,8 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import QueryResult
|
||||
from onyx.context.search.models import InferenceSection
|
||||
|
||||
### Models ###
|
||||
|
||||
|
||||
class AnswerRetrievalStats(BaseModel):
|
||||
answer_retrieval_stats: dict[str, float | int]
|
||||
|
||||
|
||||
class QuestionAnswerResults(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
quality: str
|
||||
expanded_retrieval_results: list[QueryResult]
|
||||
documents: list[InferenceSection]
|
||||
sub_question_retrieval_stats: list[AgentChunkStats]
|
||||
|
@ -69,16 +69,16 @@ def generate_refined_answer(
|
||||
prompt_enrichment_components.persona_prompts.contextualized_prompt
|
||||
)
|
||||
|
||||
initial_documents = state.documents
|
||||
initial_documents = state.verified_reranked_documents
|
||||
refined_documents = state.refined_documents
|
||||
sub_questions_cited_docs = state.cited_documents
|
||||
sub_questions_cited_documents = state.cited_documents
|
||||
all_original_question_documents = state.all_original_question_documents
|
||||
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
|
||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||
|
||||
counter = 0
|
||||
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
||||
if original_doc_number not in sub_questions_cited_docs:
|
||||
if original_doc_number not in sub_questions_cited_documents:
|
||||
if (
|
||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||
or len(consolidated_context_docs)
|
||||
|
@ -22,12 +22,12 @@ def ingest_refined_answers(
|
||||
documents = []
|
||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||
for answer_result in answer_results:
|
||||
documents.extend(answer_result.documents)
|
||||
documents.extend(answer_result.verified_reranked_documents)
|
||||
|
||||
return DecompAnswersUpdate(
|
||||
# Deduping is done by the documents operator for the main graph
|
||||
# so we might not need to dedup here
|
||||
documents=dedup_inference_sections(documents, []),
|
||||
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||
sub_question_results=answer_results,
|
||||
log_messages=[
|
||||
get_langgraph_node_log_string(
|
||||
|
@ -102,7 +102,9 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
|
||||
|
||||
|
||||
class DecompAnswersUpdate(LoggerUpdate):
|
||||
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
verified_reranked_documents: Annotated[
|
||||
list[InferenceSection], dedup_inference_sections
|
||||
] = []
|
||||
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||
cited_documents: Annotated[
|
||||
list[InferenceSection], dedup_inference_sections
|
||||
|
@ -1,18 +1,8 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
|
||||
from onyx.context.search.models import InferenceSection
|
||||
|
||||
### Models ###
|
||||
|
||||
|
||||
class AnswerRetrievalStats(BaseModel):
|
||||
answer_retrieval_stats: dict[str, float | int]
|
||||
|
||||
|
||||
class QuestionAnswerResults(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
quality: str
|
||||
documents: list[InferenceSection]
|
||||
sub_question_retrieval_stats: AgentChunkStats
|
||||
|
@ -7,6 +7,6 @@ from onyx.context.search.models import InferenceSection
|
||||
|
||||
class ExpandedRetrievalResult(BaseModel):
|
||||
expanded_queries_results: list[QueryResult] = []
|
||||
reranked_documents: list[InferenceSection] = []
|
||||
verified_reranked_documents: list[InferenceSection] = []
|
||||
context_documents: list[InferenceSection] = []
|
||||
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
||||
|
@ -79,7 +79,7 @@ def format_results(
|
||||
return ExpandedRetrievalUpdate(
|
||||
expanded_retrieval_result=ExpandedRetrievalResult(
|
||||
expanded_queries_results=state.expanded_retrieval_results,
|
||||
reranked_documents=reranked_documents,
|
||||
verified_reranked_documents=reranked_documents,
|
||||
context_documents=state.reranked_documents,
|
||||
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
||||
),
|
||||
|
@ -16,7 +16,7 @@ def kickoff_verification(
|
||||
state: ExpandedRetrievalState,
|
||||
config: RunnableConfig,
|
||||
) -> Command[Literal["verify_documents"]]:
|
||||
documents = state.retrieved_documents
|
||||
retrieved_documents = state.retrieved_documents
|
||||
verification_question = state.question
|
||||
|
||||
sub_question_id = state.sub_question_id
|
||||
@ -26,13 +26,13 @@ def kickoff_verification(
|
||||
Send(
|
||||
node="verify_documents",
|
||||
arg=DocVerificationInput(
|
||||
doc_to_verify=doc,
|
||||
retrieved_document_to_verify=document,
|
||||
question=verification_question,
|
||||
base_search=False,
|
||||
sub_question_id=sub_question_id,
|
||||
log_messages=[],
|
||||
),
|
||||
)
|
||||
for doc in documents
|
||||
for document in retrieved_documents
|
||||
],
|
||||
)
|
||||
|
@ -31,8 +31,8 @@ def verify_documents(
|
||||
"""
|
||||
|
||||
question = state.question
|
||||
doc_to_verify = state.doc_to_verify
|
||||
document_content = doc_to_verify.combined_content
|
||||
retrieved_document_to_verify = state.retrieved_document_to_verify
|
||||
document_content = retrieved_document_to_verify.combined_content
|
||||
|
||||
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||
fast_llm = agent_a_config.fast_llm
|
||||
@ -53,7 +53,7 @@ def verify_documents(
|
||||
|
||||
verified_documents = []
|
||||
if isinstance(response.content, str) and "yes" in response.content.lower():
|
||||
verified_documents.append(doc_to_verify)
|
||||
verified_documents.append(retrieved_document_to_verify)
|
||||
|
||||
return DocVerificationUpdate(
|
||||
verified_documents=verified_documents,
|
||||
|
@ -81,7 +81,7 @@ class ExpandedRetrievalState(
|
||||
|
||||
|
||||
class DocVerificationInput(ExpandedRetrievalInput):
|
||||
doc_to_verify: InferenceSection
|
||||
retrieved_document_to_verify: InferenceSection
|
||||
|
||||
|
||||
class RetrievalInput(ExpandedRetrievalInput):
|
||||
|
@ -105,9 +105,9 @@ class QuestionAnswerResults(BaseModel):
|
||||
answer: str
|
||||
verified_high_quality: bool
|
||||
expanded_retrieval_results: list[QueryResult]
|
||||
documents: list[InferenceSection]
|
||||
verified_reranked_documents: list[InferenceSection]
|
||||
context_documents: list[InferenceSection]
|
||||
cited_docs: list[InferenceSection]
|
||||
cited_documents: list[InferenceSection]
|
||||
sub_question_retrieval_stats: AgentChunkStats
|
||||
|
||||
|
||||
|
@ -1018,7 +1018,7 @@ def log_agent_sub_question_results(
|
||||
sub_question = sub_question_answer_result.question
|
||||
sub_answer = sub_question_answer_result.answer
|
||||
sub_document_results = _create_citation_format_list(
|
||||
sub_question_answer_result.documents
|
||||
sub_question_answer_result.verified_reranked_documents
|
||||
)
|
||||
|
||||
sub_question_object = AgentSubQuestion(
|
||||
|
Loading…
x
Reference in New Issue
Block a user