rename of documents to verified_reranked_documents

2025-03-29 11:12:02 +01:00 · 2025-01-31 12:16:17 -08:00 · 2025-01-31 12:16:17 -08:00 · 732861a940
commit 732861a940
parent d53dd1e356
19 changed files with 44 additions and 62 deletions
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/general_sub_answers/nodes/format_initial_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/general_sub_answers/nodes/format_initial_sub_answers.py
@ -20,12 +20,12 @@ def format_initial_sub_answers(
    logger.info(f"--------{now_start}--------INGEST ANSWERS---")
    documents = []
    context_documents = []
-    cited_docs = []
+    cited_documents = []
    answer_results = state.answer_results if hasattr(state, "answer_results") else []
    for answer_result in answer_results:
-        documents.extend(answer_result.documents)
+        documents.extend(answer_result.verified_reranked_documents)
        context_documents.extend(answer_result.context_documents)
-        cited_docs.extend(answer_result.cited_docs)
+        cited_documents.extend(answer_result.cited_documents)
    now_end = datetime.now()

    logger.debug(
@ -35,9 +35,9 @@ def format_initial_sub_answers(
    return DecompAnswersUpdate(
        # Deduping is done by the documents operator for the main graph
        # so we might not need to dedup here
-        documents=dedup_inference_sections(documents, []),
+        verified_reranked_documents=dedup_inference_sections(documents, []),
        context_documents=dedup_inference_sections(context_documents, []),
-        cited_documents=dedup_inference_sections(cited_docs, []),
+        cited_documents=dedup_inference_sections(cited_documents, []),
        sub_question_results=answer_results,
        log_messages=[
            f"{now_start} -- Main - Ingest initial processed sub questions,  Time taken: {now_end - now_start}"
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/format_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/format_sub_answer.py
@ -18,9 +18,9 @@ def format_sub_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
                verified_high_quality=state.answer_quality,
                answer=state.answer,
                expanded_retrieval_results=state.expanded_retrieval_results,
-                documents=state.documents,
+                verified_reranked_documents=state.verified_reranked_documents,
                context_documents=state.context_documents,
-                cited_docs=state.cited_docs,
+                cited_documents=state.cited_documents,
                sub_question_retrieval_stats=state.sub_question_retrieval_stats,
            )
        ],
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@ -44,7 +44,7 @@ def generate_sub_answer(

    agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
    question = state.question
-    state.documents
+    state.verified_reranked_documents
    level, question_nr = parse_question_id(state.question_id)
    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
@ -107,7 +107,7 @@ def generate_sub_answer(
        )

    answer_citation_ids = get_answer_citation_ids(answer_str)
-    cited_docs = [
+    cited_documents = [
        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
    ]

@ -121,7 +121,7 @@ def generate_sub_answer(

    return QAGenerationUpdate(
        answer=answer_str,
-        cited_docs=cited_docs,
+        cited_documents=cited_documents,
        log_messages=[
            get_langgraph_node_log_string(
                graph_component="initial - generate individual sub answer",
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/ingest_retrieved_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/nodes/ingest_retrieved_documents.py
@ -18,7 +18,7 @@ def ingest_retrieved_documents(

    return RetrievalIngestionUpdate(
        expanded_retrieval_results=state.expanded_retrieval_result.expanded_queries_results,
-        documents=state.expanded_retrieval_result.reranked_documents,
+        verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
        context_documents=state.expanded_retrieval_result.context_documents,
        sub_question_retrieval_stats=sub_question_retrieval_stats,
    )
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_individual_sub_answer/states.py
@ -25,13 +25,15 @@ class QACheckUpdate(LoggerUpdate, BaseModel):
 class QAGenerationUpdate(LoggerUpdate, BaseModel):
    answer: str = ""
    log_messages: list[str] = []
-    cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    cited_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
    # answer_stat: AnswerStats


 class RetrievalIngestionUpdate(LoggerUpdate, BaseModel):
    expanded_retrieval_results: list[QueryResult] = []
-    documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    verified_reranked_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ] = []
    context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
    sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()

--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_initial_answer/nodes/generate_initial_answer.py
@ -67,13 +67,13 @@ def generate_initial_answer(
    question = agent_a_config.search_request.query
    prompt_enrichment_components = get_prompt_enrichment_components(agent_a_config)

-    sub_questions_cited_docs = state.cited_documents
+    sub_questions_cited_documents = state.cited_documents
    all_original_question_documents = state.all_original_question_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
    counter = 0
    for original_doc_number, original_doc in enumerate(all_original_question_documents):
-        if original_doc_number not in sub_questions_cited_docs:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
@ -21,19 +21,19 @@ def format_initial_sub_answers(

    documents = []
    context_documents = []
-    cited_docs = []
+    cited_documents = []
    answer_results = state.answer_results if hasattr(state, "answer_results") else []
    for answer_result in answer_results:
-        documents.extend(answer_result.documents)
+        documents.extend(answer_result.verified_reranked_documents)
        context_documents.extend(answer_result.context_documents)
-        cited_docs.extend(answer_result.cited_docs)
+        cited_documents.extend(answer_result.cited_documents)

    return DecompAnswersUpdate(
        # Deduping is done by the documents operator for the main graph
        # so we might not need to dedup here
-        documents=dedup_inference_sections(documents, []),
+        verified_reranked_documents=dedup_inference_sections(documents, []),
        context_documents=dedup_inference_sections(context_documents, []),
-        cited_documents=dedup_inference_sections(cited_docs, []),
+        cited_documents=dedup_inference_sections(cited_documents, []),
        sub_question_results=answer_results,
        log_messages=[
            get_langgraph_node_log_string(
--- a/backend/onyx/agents/agent_search/deep_search_a/initial/retrieve_orig_question_docs/models.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial/retrieve_orig_question_docs/models.py
@ -1,20 +1,8 @@
 from pydantic import BaseModel

-from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
-from onyx.agents.agent_search.shared_graph_utils.models import QueryResult
-from onyx.context.search.models import InferenceSection

 ### Models ###


 class AnswerRetrievalStats(BaseModel):
    answer_retrieval_stats: dict[str, float | int]
-
-
-class QuestionAnswerResults(BaseModel):
-    question: str
-    answer: str
-    quality: str
-    expanded_retrieval_results: list[QueryResult]
-    documents: list[InferenceSection]
-    sub_question_retrieval_stats: list[AgentChunkStats]
--- a/backend/onyx/agents/agent_search/deep_search_a/main/nodes/generate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/main/nodes/generate_refined_answer.py
@ -69,16 +69,16 @@ def generate_refined_answer(
        prompt_enrichment_components.persona_prompts.contextualized_prompt
    )

-    initial_documents = state.documents
+    initial_documents = state.verified_reranked_documents
    refined_documents = state.refined_documents
-    sub_questions_cited_docs = state.cited_documents
+    sub_questions_cited_documents = state.cited_documents
    all_original_question_documents = state.all_original_question_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents

    counter = 0
    for original_doc_number, original_doc in enumerate(all_original_question_documents):
-        if original_doc_number not in sub_questions_cited_docs:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs)
--- a/backend/onyx/agents/agent_search/deep_search_a/main/nodes/ingest_refined_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/main/nodes/ingest_refined_answers.py
@ -22,12 +22,12 @@ def ingest_refined_answers(
    documents = []
    answer_results = state.answer_results if hasattr(state, "answer_results") else []
    for answer_result in answer_results:
-        documents.extend(answer_result.documents)
+        documents.extend(answer_result.verified_reranked_documents)

    return DecompAnswersUpdate(
        # Deduping is done by the documents operator for the main graph
        # so we might not need to dedup here
-        documents=dedup_inference_sections(documents, []),
+        verified_reranked_documents=dedup_inference_sections(documents, []),
        sub_question_results=answer_results,
        log_messages=[
            get_langgraph_node_log_string(
--- a/backend/onyx/agents/agent_search/deep_search_a/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/main/states.py
@ -102,7 +102,9 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):


 class DecompAnswersUpdate(LoggerUpdate):
-    documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    verified_reranked_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ] = []
    context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
    cited_documents: Annotated[
        list[InferenceSection], dedup_inference_sections
--- a/backend/onyx/agents/agent_search/deep_search_a/refinement/consolidate_sub_answers/models.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/refinement/consolidate_sub_answers/models.py
@ -1,18 +1,8 @@
 from pydantic import BaseModel

-from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
-from onyx.context.search.models import InferenceSection

 ### Models ###


 class AnswerRetrievalStats(BaseModel):
    answer_retrieval_stats: dict[str, float | int]
-
-
-class QuestionAnswerResults(BaseModel):
-    question: str
-    answer: str
-    quality: str
-    documents: list[InferenceSection]
-    sub_question_retrieval_stats: AgentChunkStats
--- a/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/models.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/models.py
@ -7,6 +7,6 @@ from onyx.context.search.models import InferenceSection

 class ExpandedRetrievalResult(BaseModel):
    expanded_queries_results: list[QueryResult] = []
-    reranked_documents: list[InferenceSection] = []
+    verified_reranked_documents: list[InferenceSection] = []
    context_documents: list[InferenceSection] = []
    sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
--- a/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/format_results.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/format_results.py
@ -79,7 +79,7 @@ def format_results(
    return ExpandedRetrievalUpdate(
        expanded_retrieval_result=ExpandedRetrievalResult(
            expanded_queries_results=state.expanded_retrieval_results,
-            reranked_documents=reranked_documents,
+            verified_reranked_documents=reranked_documents,
            context_documents=state.reranked_documents,
            sub_question_retrieval_stats=sub_question_retrieval_stats,
        ),
--- a/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/kickoff_verification.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/kickoff_verification.py
@ -16,7 +16,7 @@ def kickoff_verification(
    state: ExpandedRetrievalState,
    config: RunnableConfig,
 ) -> Command[Literal["verify_documents"]]:
-    documents = state.retrieved_documents
+    retrieved_documents = state.retrieved_documents
    verification_question = state.question

    sub_question_id = state.sub_question_id
@ -26,13 +26,13 @@ def kickoff_verification(
            Send(
                node="verify_documents",
                arg=DocVerificationInput(
-                    doc_to_verify=doc,
+                    retrieved_document_to_verify=document,
                    question=verification_question,
                    base_search=False,
                    sub_question_id=sub_question_id,
                    log_messages=[],
                ),
            )
-            for doc in documents
+            for document in retrieved_documents
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/nodes/verify_documents.py
@ -31,8 +31,8 @@ def verify_documents(
    """

    question = state.question
-    doc_to_verify = state.doc_to_verify
-    document_content = doc_to_verify.combined_content
+    retrieved_document_to_verify = state.retrieved_document_to_verify
+    document_content = retrieved_document_to_verify.combined_content

    agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
    fast_llm = agent_a_config.fast_llm
@ -53,7 +53,7 @@ def verify_documents(

    verified_documents = []
    if isinstance(response.content, str) and "yes" in response.content.lower():
-        verified_documents.append(doc_to_verify)
+        verified_documents.append(retrieved_document_to_verify)

    return DocVerificationUpdate(
        verified_documents=verified_documents,
--- a/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/shared/expanded_retrieval/states.py
@ -81,7 +81,7 @@ class ExpandedRetrievalState(


 class DocVerificationInput(ExpandedRetrievalInput):
-    doc_to_verify: InferenceSection
+    retrieved_document_to_verify: InferenceSection


 class RetrievalInput(ExpandedRetrievalInput):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@ -105,9 +105,9 @@ class QuestionAnswerResults(BaseModel):
    answer: str
    verified_high_quality: bool
    expanded_retrieval_results: list[QueryResult]
-    documents: list[InferenceSection]
+    verified_reranked_documents: list[InferenceSection]
    context_documents: list[InferenceSection]
-    cited_docs: list[InferenceSection]
+    cited_documents: list[InferenceSection]
    sub_question_retrieval_stats: AgentChunkStats


--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@ -1018,7 +1018,7 @@ def log_agent_sub_question_results(
        sub_question = sub_question_answer_result.question
        sub_answer = sub_question_answer_result.answer
        sub_document_results = _create_citation_format_list(
-            sub_question_answer_result.documents
+            sub_question_answer_result.verified_reranked_documents
        )

        sub_question_object = AgentSubQuestion(