rename of documents to verified_reranked_documents

This commit is contained in:
joachim-danswer 2025-01-31 12:16:17 -08:00 committed by Evan Lohn
parent d53dd1e356
commit 732861a940
19 changed files with 44 additions and 62 deletions

View File

@ -20,12 +20,12 @@ def format_initial_sub_answers(
logger.info(f"--------{now_start}--------INGEST ANSWERS---")
documents = []
context_documents = []
cited_docs = []
cited_documents = []
answer_results = state.answer_results if hasattr(state, "answer_results") else []
for answer_result in answer_results:
documents.extend(answer_result.documents)
documents.extend(answer_result.verified_reranked_documents)
context_documents.extend(answer_result.context_documents)
cited_docs.extend(answer_result.cited_docs)
cited_documents.extend(answer_result.cited_documents)
now_end = datetime.now()
logger.debug(
@ -35,9 +35,9 @@ def format_initial_sub_answers(
return DecompAnswersUpdate(
# Deduping is done by the documents operator for the main graph
# so we might not need to dedup here
documents=dedup_inference_sections(documents, []),
verified_reranked_documents=dedup_inference_sections(documents, []),
context_documents=dedup_inference_sections(context_documents, []),
cited_documents=dedup_inference_sections(cited_docs, []),
cited_documents=dedup_inference_sections(cited_documents, []),
sub_question_results=answer_results,
log_messages=[
f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}"

View File

@ -18,9 +18,9 @@ def format_sub_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
verified_high_quality=state.answer_quality,
answer=state.answer,
expanded_retrieval_results=state.expanded_retrieval_results,
documents=state.documents,
verified_reranked_documents=state.verified_reranked_documents,
context_documents=state.context_documents,
cited_docs=state.cited_docs,
cited_documents=state.cited_documents,
sub_question_retrieval_stats=state.sub_question_retrieval_stats,
)
],

View File

@ -44,7 +44,7 @@ def generate_sub_answer(
agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = state.question
state.documents
state.verified_reranked_documents
level, question_nr = parse_question_id(state.question_id)
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
persona_contextualized_prompt = get_persona_agent_prompt_expressions(
@ -107,7 +107,7 @@ def generate_sub_answer(
)
answer_citation_ids = get_answer_citation_ids(answer_str)
cited_docs = [
cited_documents = [
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
]
@ -121,7 +121,7 @@ def generate_sub_answer(
return QAGenerationUpdate(
answer=answer_str,
cited_docs=cited_docs,
cited_documents=cited_documents,
log_messages=[
get_langgraph_node_log_string(
graph_component="initial - generate individual sub answer",

View File

@ -18,7 +18,7 @@ def ingest_retrieved_documents(
return RetrievalIngestionUpdate(
expanded_retrieval_results=state.expanded_retrieval_result.expanded_queries_results,
documents=state.expanded_retrieval_result.reranked_documents,
verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
context_documents=state.expanded_retrieval_result.context_documents,
sub_question_retrieval_stats=sub_question_retrieval_stats,
)

View File

@ -25,13 +25,15 @@ class QACheckUpdate(LoggerUpdate, BaseModel):
class QAGenerationUpdate(LoggerUpdate, BaseModel):
answer: str = ""
log_messages: list[str] = []
cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
cited_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
# answer_stat: AnswerStats
class RetrievalIngestionUpdate(LoggerUpdate, BaseModel):
expanded_retrieval_results: list[QueryResult] = []
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
verified_reranked_documents: Annotated[
list[InferenceSection], dedup_inference_sections
] = []
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()

View File

@ -67,13 +67,13 @@ def generate_initial_answer(
question = agent_a_config.search_request.query
prompt_enrichment_components = get_prompt_enrichment_components(agent_a_config)
sub_questions_cited_docs = state.cited_documents
sub_questions_cited_documents = state.cited_documents
all_original_question_documents = state.all_original_question_documents
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
counter = 0
for original_doc_number, original_doc in enumerate(all_original_question_documents):
if original_doc_number not in sub_questions_cited_docs:
if original_doc_number not in sub_questions_cited_documents:
if (
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS

View File

@ -21,19 +21,19 @@ def format_initial_sub_answers(
documents = []
context_documents = []
cited_docs = []
cited_documents = []
answer_results = state.answer_results if hasattr(state, "answer_results") else []
for answer_result in answer_results:
documents.extend(answer_result.documents)
documents.extend(answer_result.verified_reranked_documents)
context_documents.extend(answer_result.context_documents)
cited_docs.extend(answer_result.cited_docs)
cited_documents.extend(answer_result.cited_documents)
return DecompAnswersUpdate(
# Deduping is done by the documents operator for the main graph
# so we might not need to dedup here
documents=dedup_inference_sections(documents, []),
verified_reranked_documents=dedup_inference_sections(documents, []),
context_documents=dedup_inference_sections(context_documents, []),
cited_documents=dedup_inference_sections(cited_docs, []),
cited_documents=dedup_inference_sections(cited_documents, []),
sub_question_results=answer_results,
log_messages=[
get_langgraph_node_log_string(

View File

@ -1,20 +1,8 @@
from pydantic import BaseModel
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.agents.agent_search.shared_graph_utils.models import QueryResult
from onyx.context.search.models import InferenceSection
### Models ###
class AnswerRetrievalStats(BaseModel):
answer_retrieval_stats: dict[str, float | int]
class QuestionAnswerResults(BaseModel):
question: str
answer: str
quality: str
expanded_retrieval_results: list[QueryResult]
documents: list[InferenceSection]
sub_question_retrieval_stats: list[AgentChunkStats]

View File

@ -69,16 +69,16 @@ def generate_refined_answer(
prompt_enrichment_components.persona_prompts.contextualized_prompt
)
initial_documents = state.documents
initial_documents = state.verified_reranked_documents
refined_documents = state.refined_documents
sub_questions_cited_docs = state.cited_documents
sub_questions_cited_documents = state.cited_documents
all_original_question_documents = state.all_original_question_documents
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
counter = 0
for original_doc_number, original_doc in enumerate(all_original_question_documents):
if original_doc_number not in sub_questions_cited_docs:
if original_doc_number not in sub_questions_cited_documents:
if (
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
or len(consolidated_context_docs)

View File

@ -22,12 +22,12 @@ def ingest_refined_answers(
documents = []
answer_results = state.answer_results if hasattr(state, "answer_results") else []
for answer_result in answer_results:
documents.extend(answer_result.documents)
documents.extend(answer_result.verified_reranked_documents)
return DecompAnswersUpdate(
# Deduping is done by the documents operator for the main graph
# so we might not need to dedup here
documents=dedup_inference_sections(documents, []),
verified_reranked_documents=dedup_inference_sections(documents, []),
sub_question_results=answer_results,
log_messages=[
get_langgraph_node_log_string(

View File

@ -102,7 +102,9 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
class DecompAnswersUpdate(LoggerUpdate):
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
verified_reranked_documents: Annotated[
list[InferenceSection], dedup_inference_sections
] = []
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
cited_documents: Annotated[
list[InferenceSection], dedup_inference_sections

View File

@ -1,18 +1,8 @@
from pydantic import BaseModel
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
from onyx.context.search.models import InferenceSection
### Models ###
class AnswerRetrievalStats(BaseModel):
answer_retrieval_stats: dict[str, float | int]
class QuestionAnswerResults(BaseModel):
question: str
answer: str
quality: str
documents: list[InferenceSection]
sub_question_retrieval_stats: AgentChunkStats

View File

@ -7,6 +7,6 @@ from onyx.context.search.models import InferenceSection
class ExpandedRetrievalResult(BaseModel):
expanded_queries_results: list[QueryResult] = []
reranked_documents: list[InferenceSection] = []
verified_reranked_documents: list[InferenceSection] = []
context_documents: list[InferenceSection] = []
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()

View File

@ -79,7 +79,7 @@ def format_results(
return ExpandedRetrievalUpdate(
expanded_retrieval_result=ExpandedRetrievalResult(
expanded_queries_results=state.expanded_retrieval_results,
reranked_documents=reranked_documents,
verified_reranked_documents=reranked_documents,
context_documents=state.reranked_documents,
sub_question_retrieval_stats=sub_question_retrieval_stats,
),

View File

@ -16,7 +16,7 @@ def kickoff_verification(
state: ExpandedRetrievalState,
config: RunnableConfig,
) -> Command[Literal["verify_documents"]]:
documents = state.retrieved_documents
retrieved_documents = state.retrieved_documents
verification_question = state.question
sub_question_id = state.sub_question_id
@ -26,13 +26,13 @@ def kickoff_verification(
Send(
node="verify_documents",
arg=DocVerificationInput(
doc_to_verify=doc,
retrieved_document_to_verify=document,
question=verification_question,
base_search=False,
sub_question_id=sub_question_id,
log_messages=[],
),
)
for doc in documents
for document in retrieved_documents
],
)

View File

@ -31,8 +31,8 @@ def verify_documents(
"""
question = state.question
doc_to_verify = state.doc_to_verify
document_content = doc_to_verify.combined_content
retrieved_document_to_verify = state.retrieved_document_to_verify
document_content = retrieved_document_to_verify.combined_content
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
fast_llm = agent_a_config.fast_llm
@ -53,7 +53,7 @@ def verify_documents(
verified_documents = []
if isinstance(response.content, str) and "yes" in response.content.lower():
verified_documents.append(doc_to_verify)
verified_documents.append(retrieved_document_to_verify)
return DocVerificationUpdate(
verified_documents=verified_documents,

View File

@ -81,7 +81,7 @@ class ExpandedRetrievalState(
class DocVerificationInput(ExpandedRetrievalInput):
doc_to_verify: InferenceSection
retrieved_document_to_verify: InferenceSection
class RetrievalInput(ExpandedRetrievalInput):

View File

@ -105,9 +105,9 @@ class QuestionAnswerResults(BaseModel):
answer: str
verified_high_quality: bool
expanded_retrieval_results: list[QueryResult]
documents: list[InferenceSection]
verified_reranked_documents: list[InferenceSection]
context_documents: list[InferenceSection]
cited_docs: list[InferenceSection]
cited_documents: list[InferenceSection]
sub_question_retrieval_stats: AgentChunkStats

View File

@ -1018,7 +1018,7 @@ def log_agent_sub_question_results(
sub_question = sub_question_answer_result.question
sub_answer = sub_question_answer_result.answer
sub_document_results = _create_citation_format_list(
sub_question_answer_result.documents
sub_question_answer_result.verified_reranked_documents
)
sub_question_object = AgentSubQuestion(