mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 13:15:18 +02:00
rename of documents to verified_reranked_documents
This commit is contained in:
committed by
Evan Lohn
parent
d53dd1e356
commit
732861a940
@@ -20,12 +20,12 @@ def format_initial_sub_answers(
|
|||||||
logger.info(f"--------{now_start}--------INGEST ANSWERS---")
|
logger.info(f"--------{now_start}--------INGEST ANSWERS---")
|
||||||
documents = []
|
documents = []
|
||||||
context_documents = []
|
context_documents = []
|
||||||
cited_docs = []
|
cited_documents = []
|
||||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||||
for answer_result in answer_results:
|
for answer_result in answer_results:
|
||||||
documents.extend(answer_result.documents)
|
documents.extend(answer_result.verified_reranked_documents)
|
||||||
context_documents.extend(answer_result.context_documents)
|
context_documents.extend(answer_result.context_documents)
|
||||||
cited_docs.extend(answer_result.cited_docs)
|
cited_documents.extend(answer_result.cited_documents)
|
||||||
now_end = datetime.now()
|
now_end = datetime.now()
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -35,9 +35,9 @@ def format_initial_sub_answers(
|
|||||||
return DecompAnswersUpdate(
|
return DecompAnswersUpdate(
|
||||||
# Deduping is done by the documents operator for the main graph
|
# Deduping is done by the documents operator for the main graph
|
||||||
# so we might not need to dedup here
|
# so we might not need to dedup here
|
||||||
documents=dedup_inference_sections(documents, []),
|
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||||
context_documents=dedup_inference_sections(context_documents, []),
|
context_documents=dedup_inference_sections(context_documents, []),
|
||||||
cited_documents=dedup_inference_sections(cited_docs, []),
|
cited_documents=dedup_inference_sections(cited_documents, []),
|
||||||
sub_question_results=answer_results,
|
sub_question_results=answer_results,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}"
|
f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}"
|
||||||
|
@@ -18,9 +18,9 @@ def format_sub_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
|
|||||||
verified_high_quality=state.answer_quality,
|
verified_high_quality=state.answer_quality,
|
||||||
answer=state.answer,
|
answer=state.answer,
|
||||||
expanded_retrieval_results=state.expanded_retrieval_results,
|
expanded_retrieval_results=state.expanded_retrieval_results,
|
||||||
documents=state.documents,
|
verified_reranked_documents=state.verified_reranked_documents,
|
||||||
context_documents=state.context_documents,
|
context_documents=state.context_documents,
|
||||||
cited_docs=state.cited_docs,
|
cited_documents=state.cited_documents,
|
||||||
sub_question_retrieval_stats=state.sub_question_retrieval_stats,
|
sub_question_retrieval_stats=state.sub_question_retrieval_stats,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
@@ -44,7 +44,7 @@ def generate_sub_answer(
|
|||||||
|
|
||||||
agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||||
question = state.question
|
question = state.question
|
||||||
state.documents
|
state.verified_reranked_documents
|
||||||
level, question_nr = parse_question_id(state.question_id)
|
level, question_nr = parse_question_id(state.question_id)
|
||||||
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
|
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
|
||||||
persona_contextualized_prompt = get_persona_agent_prompt_expressions(
|
persona_contextualized_prompt = get_persona_agent_prompt_expressions(
|
||||||
@@ -107,7 +107,7 @@ def generate_sub_answer(
|
|||||||
)
|
)
|
||||||
|
|
||||||
answer_citation_ids = get_answer_citation_ids(answer_str)
|
answer_citation_ids = get_answer_citation_ids(answer_str)
|
||||||
cited_docs = [
|
cited_documents = [
|
||||||
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
|
context_docs[id] for id in answer_citation_ids if id < len(context_docs)
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -121,7 +121,7 @@ def generate_sub_answer(
|
|||||||
|
|
||||||
return QAGenerationUpdate(
|
return QAGenerationUpdate(
|
||||||
answer=answer_str,
|
answer=answer_str,
|
||||||
cited_docs=cited_docs,
|
cited_documents=cited_documents,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
get_langgraph_node_log_string(
|
get_langgraph_node_log_string(
|
||||||
graph_component="initial - generate individual sub answer",
|
graph_component="initial - generate individual sub answer",
|
||||||
|
@@ -18,7 +18,7 @@ def ingest_retrieved_documents(
|
|||||||
|
|
||||||
return RetrievalIngestionUpdate(
|
return RetrievalIngestionUpdate(
|
||||||
expanded_retrieval_results=state.expanded_retrieval_result.expanded_queries_results,
|
expanded_retrieval_results=state.expanded_retrieval_result.expanded_queries_results,
|
||||||
documents=state.expanded_retrieval_result.reranked_documents,
|
verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
|
||||||
context_documents=state.expanded_retrieval_result.context_documents,
|
context_documents=state.expanded_retrieval_result.context_documents,
|
||||||
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
||||||
)
|
)
|
||||||
|
@@ -25,13 +25,15 @@ class QACheckUpdate(LoggerUpdate, BaseModel):
|
|||||||
class QAGenerationUpdate(LoggerUpdate, BaseModel):
|
class QAGenerationUpdate(LoggerUpdate, BaseModel):
|
||||||
answer: str = ""
|
answer: str = ""
|
||||||
log_messages: list[str] = []
|
log_messages: list[str] = []
|
||||||
cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
cited_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||||
# answer_stat: AnswerStats
|
# answer_stat: AnswerStats
|
||||||
|
|
||||||
|
|
||||||
class RetrievalIngestionUpdate(LoggerUpdate, BaseModel):
|
class RetrievalIngestionUpdate(LoggerUpdate, BaseModel):
|
||||||
expanded_retrieval_results: list[QueryResult] = []
|
expanded_retrieval_results: list[QueryResult] = []
|
||||||
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
verified_reranked_documents: Annotated[
|
||||||
|
list[InferenceSection], dedup_inference_sections
|
||||||
|
] = []
|
||||||
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||||
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
||||||
|
|
||||||
|
@@ -67,13 +67,13 @@ def generate_initial_answer(
|
|||||||
question = agent_a_config.search_request.query
|
question = agent_a_config.search_request.query
|
||||||
prompt_enrichment_components = get_prompt_enrichment_components(agent_a_config)
|
prompt_enrichment_components = get_prompt_enrichment_components(agent_a_config)
|
||||||
|
|
||||||
sub_questions_cited_docs = state.cited_documents
|
sub_questions_cited_documents = state.cited_documents
|
||||||
all_original_question_documents = state.all_original_question_documents
|
all_original_question_documents = state.all_original_question_documents
|
||||||
|
|
||||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
|
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||||
counter = 0
|
counter = 0
|
||||||
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
||||||
if original_doc_number not in sub_questions_cited_docs:
|
if original_doc_number not in sub_questions_cited_documents:
|
||||||
if (
|
if (
|
||||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||||
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
|
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
|
||||||
|
@@ -21,19 +21,19 @@ def format_initial_sub_answers(
|
|||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
context_documents = []
|
context_documents = []
|
||||||
cited_docs = []
|
cited_documents = []
|
||||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||||
for answer_result in answer_results:
|
for answer_result in answer_results:
|
||||||
documents.extend(answer_result.documents)
|
documents.extend(answer_result.verified_reranked_documents)
|
||||||
context_documents.extend(answer_result.context_documents)
|
context_documents.extend(answer_result.context_documents)
|
||||||
cited_docs.extend(answer_result.cited_docs)
|
cited_documents.extend(answer_result.cited_documents)
|
||||||
|
|
||||||
return DecompAnswersUpdate(
|
return DecompAnswersUpdate(
|
||||||
# Deduping is done by the documents operator for the main graph
|
# Deduping is done by the documents operator for the main graph
|
||||||
# so we might not need to dedup here
|
# so we might not need to dedup here
|
||||||
documents=dedup_inference_sections(documents, []),
|
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||||
context_documents=dedup_inference_sections(context_documents, []),
|
context_documents=dedup_inference_sections(context_documents, []),
|
||||||
cited_documents=dedup_inference_sections(cited_docs, []),
|
cited_documents=dedup_inference_sections(cited_documents, []),
|
||||||
sub_question_results=answer_results,
|
sub_question_results=answer_results,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
get_langgraph_node_log_string(
|
get_langgraph_node_log_string(
|
||||||
|
@@ -1,20 +1,8 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
|
|
||||||
from onyx.agents.agent_search.shared_graph_utils.models import QueryResult
|
|
||||||
from onyx.context.search.models import InferenceSection
|
|
||||||
|
|
||||||
### Models ###
|
### Models ###
|
||||||
|
|
||||||
|
|
||||||
class AnswerRetrievalStats(BaseModel):
|
class AnswerRetrievalStats(BaseModel):
|
||||||
answer_retrieval_stats: dict[str, float | int]
|
answer_retrieval_stats: dict[str, float | int]
|
||||||
|
|
||||||
|
|
||||||
class QuestionAnswerResults(BaseModel):
|
|
||||||
question: str
|
|
||||||
answer: str
|
|
||||||
quality: str
|
|
||||||
expanded_retrieval_results: list[QueryResult]
|
|
||||||
documents: list[InferenceSection]
|
|
||||||
sub_question_retrieval_stats: list[AgentChunkStats]
|
|
||||||
|
@@ -69,16 +69,16 @@ def generate_refined_answer(
|
|||||||
prompt_enrichment_components.persona_prompts.contextualized_prompt
|
prompt_enrichment_components.persona_prompts.contextualized_prompt
|
||||||
)
|
)
|
||||||
|
|
||||||
initial_documents = state.documents
|
initial_documents = state.verified_reranked_documents
|
||||||
refined_documents = state.refined_documents
|
refined_documents = state.refined_documents
|
||||||
sub_questions_cited_docs = state.cited_documents
|
sub_questions_cited_documents = state.cited_documents
|
||||||
all_original_question_documents = state.all_original_question_documents
|
all_original_question_documents = state.all_original_question_documents
|
||||||
|
|
||||||
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_docs
|
consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
|
||||||
|
|
||||||
counter = 0
|
counter = 0
|
||||||
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
for original_doc_number, original_doc in enumerate(all_original_question_documents):
|
||||||
if original_doc_number not in sub_questions_cited_docs:
|
if original_doc_number not in sub_questions_cited_documents:
|
||||||
if (
|
if (
|
||||||
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
|
||||||
or len(consolidated_context_docs)
|
or len(consolidated_context_docs)
|
||||||
|
@@ -22,12 +22,12 @@ def ingest_refined_answers(
|
|||||||
documents = []
|
documents = []
|
||||||
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
answer_results = state.answer_results if hasattr(state, "answer_results") else []
|
||||||
for answer_result in answer_results:
|
for answer_result in answer_results:
|
||||||
documents.extend(answer_result.documents)
|
documents.extend(answer_result.verified_reranked_documents)
|
||||||
|
|
||||||
return DecompAnswersUpdate(
|
return DecompAnswersUpdate(
|
||||||
# Deduping is done by the documents operator for the main graph
|
# Deduping is done by the documents operator for the main graph
|
||||||
# so we might not need to dedup here
|
# so we might not need to dedup here
|
||||||
documents=dedup_inference_sections(documents, []),
|
verified_reranked_documents=dedup_inference_sections(documents, []),
|
||||||
sub_question_results=answer_results,
|
sub_question_results=answer_results,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
get_langgraph_node_log_string(
|
get_langgraph_node_log_string(
|
||||||
|
@@ -102,7 +102,9 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
|
|||||||
|
|
||||||
|
|
||||||
class DecompAnswersUpdate(LoggerUpdate):
|
class DecompAnswersUpdate(LoggerUpdate):
|
||||||
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
verified_reranked_documents: Annotated[
|
||||||
|
list[InferenceSection], dedup_inference_sections
|
||||||
|
] = []
|
||||||
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
|
||||||
cited_documents: Annotated[
|
cited_documents: Annotated[
|
||||||
list[InferenceSection], dedup_inference_sections
|
list[InferenceSection], dedup_inference_sections
|
||||||
|
@@ -1,18 +1,8 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkStats
|
|
||||||
from onyx.context.search.models import InferenceSection
|
|
||||||
|
|
||||||
### Models ###
|
### Models ###
|
||||||
|
|
||||||
|
|
||||||
class AnswerRetrievalStats(BaseModel):
|
class AnswerRetrievalStats(BaseModel):
|
||||||
answer_retrieval_stats: dict[str, float | int]
|
answer_retrieval_stats: dict[str, float | int]
|
||||||
|
|
||||||
|
|
||||||
class QuestionAnswerResults(BaseModel):
|
|
||||||
question: str
|
|
||||||
answer: str
|
|
||||||
quality: str
|
|
||||||
documents: list[InferenceSection]
|
|
||||||
sub_question_retrieval_stats: AgentChunkStats
|
|
||||||
|
@@ -7,6 +7,6 @@ from onyx.context.search.models import InferenceSection
|
|||||||
|
|
||||||
class ExpandedRetrievalResult(BaseModel):
|
class ExpandedRetrievalResult(BaseModel):
|
||||||
expanded_queries_results: list[QueryResult] = []
|
expanded_queries_results: list[QueryResult] = []
|
||||||
reranked_documents: list[InferenceSection] = []
|
verified_reranked_documents: list[InferenceSection] = []
|
||||||
context_documents: list[InferenceSection] = []
|
context_documents: list[InferenceSection] = []
|
||||||
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
sub_question_retrieval_stats: AgentChunkStats = AgentChunkStats()
|
||||||
|
@@ -79,7 +79,7 @@ def format_results(
|
|||||||
return ExpandedRetrievalUpdate(
|
return ExpandedRetrievalUpdate(
|
||||||
expanded_retrieval_result=ExpandedRetrievalResult(
|
expanded_retrieval_result=ExpandedRetrievalResult(
|
||||||
expanded_queries_results=state.expanded_retrieval_results,
|
expanded_queries_results=state.expanded_retrieval_results,
|
||||||
reranked_documents=reranked_documents,
|
verified_reranked_documents=reranked_documents,
|
||||||
context_documents=state.reranked_documents,
|
context_documents=state.reranked_documents,
|
||||||
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
sub_question_retrieval_stats=sub_question_retrieval_stats,
|
||||||
),
|
),
|
||||||
|
@@ -16,7 +16,7 @@ def kickoff_verification(
|
|||||||
state: ExpandedRetrievalState,
|
state: ExpandedRetrievalState,
|
||||||
config: RunnableConfig,
|
config: RunnableConfig,
|
||||||
) -> Command[Literal["verify_documents"]]:
|
) -> Command[Literal["verify_documents"]]:
|
||||||
documents = state.retrieved_documents
|
retrieved_documents = state.retrieved_documents
|
||||||
verification_question = state.question
|
verification_question = state.question
|
||||||
|
|
||||||
sub_question_id = state.sub_question_id
|
sub_question_id = state.sub_question_id
|
||||||
@@ -26,13 +26,13 @@ def kickoff_verification(
|
|||||||
Send(
|
Send(
|
||||||
node="verify_documents",
|
node="verify_documents",
|
||||||
arg=DocVerificationInput(
|
arg=DocVerificationInput(
|
||||||
doc_to_verify=doc,
|
retrieved_document_to_verify=document,
|
||||||
question=verification_question,
|
question=verification_question,
|
||||||
base_search=False,
|
base_search=False,
|
||||||
sub_question_id=sub_question_id,
|
sub_question_id=sub_question_id,
|
||||||
log_messages=[],
|
log_messages=[],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
for doc in documents
|
for document in retrieved_documents
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@@ -31,8 +31,8 @@ def verify_documents(
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
question = state.question
|
question = state.question
|
||||||
doc_to_verify = state.doc_to_verify
|
retrieved_document_to_verify = state.retrieved_document_to_verify
|
||||||
document_content = doc_to_verify.combined_content
|
document_content = retrieved_document_to_verify.combined_content
|
||||||
|
|
||||||
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||||
fast_llm = agent_a_config.fast_llm
|
fast_llm = agent_a_config.fast_llm
|
||||||
@@ -53,7 +53,7 @@ def verify_documents(
|
|||||||
|
|
||||||
verified_documents = []
|
verified_documents = []
|
||||||
if isinstance(response.content, str) and "yes" in response.content.lower():
|
if isinstance(response.content, str) and "yes" in response.content.lower():
|
||||||
verified_documents.append(doc_to_verify)
|
verified_documents.append(retrieved_document_to_verify)
|
||||||
|
|
||||||
return DocVerificationUpdate(
|
return DocVerificationUpdate(
|
||||||
verified_documents=verified_documents,
|
verified_documents=verified_documents,
|
||||||
|
@@ -81,7 +81,7 @@ class ExpandedRetrievalState(
|
|||||||
|
|
||||||
|
|
||||||
class DocVerificationInput(ExpandedRetrievalInput):
|
class DocVerificationInput(ExpandedRetrievalInput):
|
||||||
doc_to_verify: InferenceSection
|
retrieved_document_to_verify: InferenceSection
|
||||||
|
|
||||||
|
|
||||||
class RetrievalInput(ExpandedRetrievalInput):
|
class RetrievalInput(ExpandedRetrievalInput):
|
||||||
|
@@ -105,9 +105,9 @@ class QuestionAnswerResults(BaseModel):
|
|||||||
answer: str
|
answer: str
|
||||||
verified_high_quality: bool
|
verified_high_quality: bool
|
||||||
expanded_retrieval_results: list[QueryResult]
|
expanded_retrieval_results: list[QueryResult]
|
||||||
documents: list[InferenceSection]
|
verified_reranked_documents: list[InferenceSection]
|
||||||
context_documents: list[InferenceSection]
|
context_documents: list[InferenceSection]
|
||||||
cited_docs: list[InferenceSection]
|
cited_documents: list[InferenceSection]
|
||||||
sub_question_retrieval_stats: AgentChunkStats
|
sub_question_retrieval_stats: AgentChunkStats
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1018,7 +1018,7 @@ def log_agent_sub_question_results(
|
|||||||
sub_question = sub_question_answer_result.question
|
sub_question = sub_question_answer_result.question
|
||||||
sub_answer = sub_question_answer_result.answer
|
sub_answer = sub_question_answer_result.answer
|
||||||
sub_document_results = _create_citation_format_list(
|
sub_document_results = _create_citation_format_list(
|
||||||
sub_question_answer_result.documents
|
sub_question_answer_result.verified_reranked_documents
|
||||||
)
|
)
|
||||||
|
|
||||||
sub_question_object = AgentSubQuestion(
|
sub_question_object = AgentSubQuestion(
|
||||||
|
Reference in New Issue
Block a user