taking out Extraction for now

This commit is contained in:
joachim-danswer
2025-01-24 15:37:48 -08:00
committed by Evan Lohn
parent fc60fd0322
commit aa8cb44a33
9 changed files with 135 additions and 38 deletions

View File

@@ -27,7 +27,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
return QACheckUpdate(
answer_quality=SUB_CHECK_NO,
log_messages=[
f"{now_end} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
f"{now_start} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
],
)
msg = [
@@ -53,7 +53,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
return QACheckUpdate(
answer_quality=quality_str,
log_messages=[
f"""{now_end} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
f"""{now_start} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
Time taken: {now_end - now_start}"""
],
)

View File

@@ -32,9 +32,14 @@ from onyx.agents.agent_search.deep_search_a.main.nodes.agent_search_start import
from onyx.agents.agent_search.deep_search_a.main.nodes.answer_comparison import (
answer_comparison,
)
from onyx.agents.agent_search.deep_search_a.main.nodes.entity_term_extraction_llm import (
entity_term_extraction_llm,
)
from onyx.agents.agent_search.deep_search_a.main.nodes.direct_llm_handling import (
direct_llm_handling,
)
from onyx.agents.agent_search.deep_search_a.main.nodes.generate_initial_answer import (
generate_initial_answer,
)
@@ -197,10 +202,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
action=initial_answer_quality_check,
)
graph.add_node(
node="entity_term_extraction_llm",
action=entity_term_extraction_llm,
)
# graph.add_node(
# node="entity_term_extraction_llm",
# action=entity_term_extraction_llm,
# )
graph.add_node(
node="refined_answer_decision",
action=refined_answer_decision,
@@ -259,10 +264,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
end_key="base_raw_search_subgraph",
)
graph.add_edge(
start_key="agent_search_start",
end_key="entity_term_extraction_llm",
)
# graph.add_edge(
# start_key="agent_search_start",
# end_key="entity_term_extraction_llm",
# )
graph.add_edge(
start_key="agent_search_start",
@@ -319,8 +324,12 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
end_key="initial_answer_quality_check",
)
# graph.add_edge(
# start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
# end_key="refined_answer_decision",
# )
graph.add_edge(
start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
start_key="initial_answer_quality_check",
end_key="refined_answer_decision",
)

View File

@@ -60,6 +60,6 @@ def agent_search_start(
return ExploratorySearchUpdate(
exploratory_search_results=exploratory_search_results,
log_messages=[
f"--------{now_end}--{now_end - now_start}--------EXPLORATORY SEARCH END---"
f"{now_start} -- Main - Exploratory Search, Time taken: {now_end - now_start}"
],
)

View File

@@ -4,7 +4,6 @@ from datetime import datetime
from typing import cast
from langchain_core.messages import HumanMessage
from langchain_core.messages import merge_message_runs
from langchain_core.runnables import RunnableConfig
from onyx.agents.agent_search.deep_search_a.main.operations import logger
@@ -32,12 +31,15 @@ def entity_term_extraction_llm(
now_start = datetime.now()
logger.debug(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
logger.debug(
f"--------{now_start}--------GAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
)
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
if not agent_a_config.allow_refinement:
now_end = datetime.now()
return EntityTermExtractionUpdate(
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
entity_relation_term_extractions=EntityRelationshipTermExtraction(
entities=[],
relationships=[],
terms=[],
@@ -64,14 +66,11 @@ def entity_term_extraction_llm(
]
fast_llm = agent_a_config.fast_llm
# Grader
llm_response_list = list(
fast_llm.stream(
prompt=msg,
)
llm_response = fast_llm.invoke(
prompt=msg,
)
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
cleaned_response = re.sub(r"```json\n|\n```", "", str(llm_response.content))
parsed_response = json.loads(cleaned_response)
entities = []
@@ -117,14 +116,17 @@ def entity_term_extraction_llm(
logger.debug(
f"--------{now_end}--{now_end - now_start}--------ENTITY TERM EXTRACTION END---"
)
logger.debug(
f"--------{now_end}--------GBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
)
return EntityTermExtractionUpdate(
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
entity_relation_term_extractions=EntityRelationshipTermExtraction(
entities=entities,
relationships=relationships,
terms=terms,
),
log_messages=[
f"{now_end} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
f"{now_start} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
],
)

View File

@@ -19,9 +19,7 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
)
from onyx.agents.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import (
format_entity_term_extraction,
)
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
from onyx.tools.models import ToolCallKickoff
@@ -52,11 +50,13 @@ def refined_sub_question_creation(
base_answer = state.initial_answer
history = build_history_prompt(agent_a_config.prompt_builder)
# get the entity term extraction dict and properly format it
entity_retlation_term_extractions = state.entity_retlation_term_extractions
# entity_retlation_term_extractions = state.entity_relation_term_extractions
entity_term_extraction_str = format_entity_term_extraction(
entity_retlation_term_extractions
)
# entity_term_extraction_str = format_entity_term_extraction(
# entity_retlation_term_extractions
# )
docs_str = format_docs(state.all_original_question_documents[:10])
initial_question_answers = state.decomp_answer_results
@@ -73,7 +73,7 @@ def refined_sub_question_creation(
content=DEEP_DECOMPOSE_PROMPT.format(
question=question,
history=history,
entity_term_extraction_str=entity_term_extraction_str,
docs_str=docs_str,
base_answer=base_answer,
answered_sub_questions="\n - ".join(addressed_question_list),
failed_sub_questions="\n - ".join(failed_question_list),

View File

@@ -117,7 +117,7 @@ class ExpandedRetrievalUpdate(LoggerUpdate):
class EntityTermExtractionUpdate(LoggerUpdate):
entity_retlation_term_extractions: EntityRelationshipTermExtraction = (
entity_relation_term_extractions: EntityRelationshipTermExtraction = (
EntityRelationshipTermExtraction()
)

View File

@@ -52,7 +52,7 @@ class AgentSearchConfig:
db_session: Session | None = None
# Whether to perform initial search to inform decomposition
perform_initial_search_path_decision: bool = True
# perform_initial_search_path_decision: bool = True
# Whether to perform initial search to inform decomposition
perform_initial_search_decomposition: bool = True

View File

@@ -138,7 +138,7 @@ def run_graph(
input: BasicInput | MainInput_a,
) -> AnswerStream:
# TODO: add these to the environment
config.perform_initial_search_path_decision = False
# config.perform_initial_search_path_decision = False
config.perform_initial_search_decomposition = True
config.allow_refinement = True
@@ -212,7 +212,8 @@ if __name__ == "__main__":
# query="What are the guiding principles behind the development of cockroachDB",
# query="What are the temperatures in Munich, Hawaii, and New York?",
# query="When was Washington born?",
query="What is Onyx?",
# query="What is Onyx?",
query="What is the difference between astronomy and astrology?",
)
# Joachim custom persona
@@ -222,7 +223,7 @@ if __name__ == "__main__":
)
# search_request.persona = get_persona_by_id(1, None, db_session)
config.use_persistence = True
config.perform_initial_search_path_decision = False
# config.perform_initial_search_path_decision = False
config.perform_initial_search_decomposition = True
if GRAPH_NAME == "a":
input = MainInput_a(

View File

@@ -138,7 +138,7 @@ BASE_CHECK_PROMPT = """ \n
VERIFIER_PROMPT = """
You are supposed to judge whether a document text contains data or information that is potentially relevant
for a question. It does not have to be fully relevant, but check whether it has some information that
could help to address the question.
would help - possibly in conjunction with other documents - to address the question.
Here is a document text that you can take as a fact:
--
@@ -147,8 +147,7 @@ DOCUMENT INFORMATION:
--
Do you think that this document text is useful and relevant to answer the following question?
(Other documents may supply additional information, so do not worry if the provided information
is not enough to answer the question, but it needs to be relevant to the question.)
--
QUESTION:
{question}
@@ -295,6 +294,92 @@ DEEP_DECOMPOSE_PROMPT = """ \n
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
considering:
1) The initial question
2) The initial answer that was found to be unsatisfactory
3) The sub-questions that were answered
4) The sub-questions that were suggested but not answered
5) A sample of the TYPE of documents that may be in the databse in order to inform
you what type of entities, relationships, and terms you may want to consider asking about.
(But do not build the questions strictly on these documents! They are only examples!
Take the, as illustrations.)
The individual questions should be answerable by a good RAG system.
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
question for different entities that may be involved in the original question, but in a way that does
not duplicate questions that were already tried.
Additional Guidelines:
- The sub-questions should be specific to the question and provide richer context for the question,
resolve ambiguities, or address shortcoming of the initial answer
- Each sub-question - when answered - should be relevant for the answer to the original question
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
other complications that may require extra context.
- The sub-questions MUST have the full context of the original question so that it can be executed by
a RAG system independently without the original question available
(Example:
- initial question: "What is the capital of France?"
- bad sub-question: "What is the name of the river there?"
- good sub-question: "What is the name of the river that flows through Paris?"
- For each sub-question, please also provide a search term that can be used to retrieve relevant
documents from a document store.
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not
answerable with the available context, and you should not ask similar questions.
\n\n
Here is the initial question:
\n ------- \n
{question}
\n ------- \n
{history}
Here is the initial sub-optimal answer:
\n ------- \n
{base_answer}
\n ------- \n
Here are the sub-questions that were answered:
\n ------- \n
{answered_sub_questions}
\n ------- \n
Here are the sub-questions that were suggested but not answered:
\n ------- \n
{failed_sub_questions}
\n ------- \n
And here some reference documents that show you what type of entities, relationships,
and terms you may want to consider toask about as relevamt to your initial question.
\n ------- \n
{docs_str}
\n ------- \n
Please generate the list of good, fully contextualized sub-questions that would help to address the
main question.
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not
mentioned in the 'entities, relationships and terms' section.
Again, please find questions that are NOT overlapping too much with the already answered
sub-questions or those that already were suggested and failed.
In other words - what can we try in addition to what has been tried so far?
Generate the list of questions separated by one new line like this:
<sub-question 1>
<sub-question 2>
<sub-question 3>
...
"""
DEEP_DECOMPOSE_PROMPT_WITH_ENTITIES = """ \n
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
good enough. Also, some sub-questions had been answered and this information has been used to provide
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
you have an idea of how the avaiolable data looks like.
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
considering:
1) The initial question
2) The initial answer that was found to be unsatisfactory
3) The sub-questions that were answered