mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 05:15:12 +02:00
taking out Extraction for now
This commit is contained in:
committed by
Evan Lohn
parent
fc60fd0322
commit
aa8cb44a33
@@ -27,7 +27,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
|
|||||||
return QACheckUpdate(
|
return QACheckUpdate(
|
||||||
answer_quality=SUB_CHECK_NO,
|
answer_quality=SUB_CHECK_NO,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
f"{now_end} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
|
f"{now_start} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
msg = [
|
msg = [
|
||||||
@@ -53,7 +53,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
|
|||||||
return QACheckUpdate(
|
return QACheckUpdate(
|
||||||
answer_quality=quality_str,
|
answer_quality=quality_str,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
f"""{now_end} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
|
f"""{now_start} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
|
||||||
Time taken: {now_end - now_start}"""
|
Time taken: {now_end - now_start}"""
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@@ -32,9 +32,14 @@ from onyx.agents.agent_search.deep_search_a.main.nodes.agent_search_start import
|
|||||||
from onyx.agents.agent_search.deep_search_a.main.nodes.answer_comparison import (
|
from onyx.agents.agent_search.deep_search_a.main.nodes.answer_comparison import (
|
||||||
answer_comparison,
|
answer_comparison,
|
||||||
)
|
)
|
||||||
|
|
||||||
from onyx.agents.agent_search.deep_search_a.main.nodes.entity_term_extraction_llm import (
|
from onyx.agents.agent_search.deep_search_a.main.nodes.entity_term_extraction_llm import (
|
||||||
entity_term_extraction_llm,
|
entity_term_extraction_llm,
|
||||||
)
|
)
|
||||||
|
from onyx.agents.agent_search.deep_search_a.main.nodes.direct_llm_handling import (
|
||||||
|
direct_llm_handling,
|
||||||
|
|
||||||
|
)
|
||||||
from onyx.agents.agent_search.deep_search_a.main.nodes.generate_initial_answer import (
|
from onyx.agents.agent_search.deep_search_a.main.nodes.generate_initial_answer import (
|
||||||
generate_initial_answer,
|
generate_initial_answer,
|
||||||
)
|
)
|
||||||
@@ -197,10 +202,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
|||||||
action=initial_answer_quality_check,
|
action=initial_answer_quality_check,
|
||||||
)
|
)
|
||||||
|
|
||||||
graph.add_node(
|
# graph.add_node(
|
||||||
node="entity_term_extraction_llm",
|
# node="entity_term_extraction_llm",
|
||||||
action=entity_term_extraction_llm,
|
# action=entity_term_extraction_llm,
|
||||||
)
|
# )
|
||||||
graph.add_node(
|
graph.add_node(
|
||||||
node="refined_answer_decision",
|
node="refined_answer_decision",
|
||||||
action=refined_answer_decision,
|
action=refined_answer_decision,
|
||||||
@@ -259,10 +264,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
|||||||
end_key="base_raw_search_subgraph",
|
end_key="base_raw_search_subgraph",
|
||||||
)
|
)
|
||||||
|
|
||||||
graph.add_edge(
|
# graph.add_edge(
|
||||||
start_key="agent_search_start",
|
# start_key="agent_search_start",
|
||||||
end_key="entity_term_extraction_llm",
|
# end_key="entity_term_extraction_llm",
|
||||||
)
|
# )
|
||||||
|
|
||||||
graph.add_edge(
|
graph.add_edge(
|
||||||
start_key="agent_search_start",
|
start_key="agent_search_start",
|
||||||
@@ -319,8 +324,12 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
|||||||
end_key="initial_answer_quality_check",
|
end_key="initial_answer_quality_check",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# graph.add_edge(
|
||||||
|
# start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
|
||||||
|
# end_key="refined_answer_decision",
|
||||||
|
# )
|
||||||
graph.add_edge(
|
graph.add_edge(
|
||||||
start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
|
start_key="initial_answer_quality_check",
|
||||||
end_key="refined_answer_decision",
|
end_key="refined_answer_decision",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -60,6 +60,6 @@ def agent_search_start(
|
|||||||
return ExploratorySearchUpdate(
|
return ExploratorySearchUpdate(
|
||||||
exploratory_search_results=exploratory_search_results,
|
exploratory_search_results=exploratory_search_results,
|
||||||
log_messages=[
|
log_messages=[
|
||||||
f"--------{now_end}--{now_end - now_start}--------EXPLORATORY SEARCH END---"
|
f"{now_start} -- Main - Exploratory Search, Time taken: {now_end - now_start}"
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@@ -4,7 +4,6 @@ from datetime import datetime
|
|||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from langchain_core.messages import merge_message_runs
|
|
||||||
from langchain_core.runnables import RunnableConfig
|
from langchain_core.runnables import RunnableConfig
|
||||||
|
|
||||||
from onyx.agents.agent_search.deep_search_a.main.operations import logger
|
from onyx.agents.agent_search.deep_search_a.main.operations import logger
|
||||||
@@ -32,12 +31,15 @@ def entity_term_extraction_llm(
|
|||||||
now_start = datetime.now()
|
now_start = datetime.now()
|
||||||
|
|
||||||
logger.debug(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
|
logger.debug(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
|
||||||
|
logger.debug(
|
||||||
|
f"--------{now_start}--------GAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
||||||
|
)
|
||||||
|
|
||||||
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||||
if not agent_a_config.allow_refinement:
|
if not agent_a_config.allow_refinement:
|
||||||
now_end = datetime.now()
|
now_end = datetime.now()
|
||||||
return EntityTermExtractionUpdate(
|
return EntityTermExtractionUpdate(
|
||||||
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
|
entity_relation_term_extractions=EntityRelationshipTermExtraction(
|
||||||
entities=[],
|
entities=[],
|
||||||
relationships=[],
|
relationships=[],
|
||||||
terms=[],
|
terms=[],
|
||||||
@@ -64,14 +66,11 @@ def entity_term_extraction_llm(
|
|||||||
]
|
]
|
||||||
fast_llm = agent_a_config.fast_llm
|
fast_llm = agent_a_config.fast_llm
|
||||||
# Grader
|
# Grader
|
||||||
llm_response_list = list(
|
llm_response = fast_llm.invoke(
|
||||||
fast_llm.stream(
|
|
||||||
prompt=msg,
|
prompt=msg,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
|
||||||
|
|
||||||
cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
|
cleaned_response = re.sub(r"```json\n|\n```", "", str(llm_response.content))
|
||||||
parsed_response = json.loads(cleaned_response)
|
parsed_response = json.loads(cleaned_response)
|
||||||
|
|
||||||
entities = []
|
entities = []
|
||||||
@@ -117,14 +116,17 @@ def entity_term_extraction_llm(
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
f"--------{now_end}--{now_end - now_start}--------ENTITY TERM EXTRACTION END---"
|
f"--------{now_end}--{now_end - now_start}--------ENTITY TERM EXTRACTION END---"
|
||||||
)
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"--------{now_end}--------GBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
|
||||||
|
)
|
||||||
|
|
||||||
return EntityTermExtractionUpdate(
|
return EntityTermExtractionUpdate(
|
||||||
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
|
entity_relation_term_extractions=EntityRelationshipTermExtraction(
|
||||||
entities=entities,
|
entities=entities,
|
||||||
relationships=relationships,
|
relationships=relationships,
|
||||||
terms=terms,
|
terms=terms,
|
||||||
),
|
),
|
||||||
log_messages=[
|
log_messages=[
|
||||||
f"{now_end} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
|
f"{now_start} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@@ -19,9 +19,7 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
|||||||
)
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
|
from onyx.agents.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||||
format_entity_term_extraction,
|
|
||||||
)
|
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
|
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
|
||||||
from onyx.tools.models import ToolCallKickoff
|
from onyx.tools.models import ToolCallKickoff
|
||||||
|
|
||||||
@@ -52,11 +50,13 @@ def refined_sub_question_creation(
|
|||||||
base_answer = state.initial_answer
|
base_answer = state.initial_answer
|
||||||
history = build_history_prompt(agent_a_config.prompt_builder)
|
history = build_history_prompt(agent_a_config.prompt_builder)
|
||||||
# get the entity term extraction dict and properly format it
|
# get the entity term extraction dict and properly format it
|
||||||
entity_retlation_term_extractions = state.entity_retlation_term_extractions
|
# entity_retlation_term_extractions = state.entity_relation_term_extractions
|
||||||
|
|
||||||
entity_term_extraction_str = format_entity_term_extraction(
|
# entity_term_extraction_str = format_entity_term_extraction(
|
||||||
entity_retlation_term_extractions
|
# entity_retlation_term_extractions
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
docs_str = format_docs(state.all_original_question_documents[:10])
|
||||||
|
|
||||||
initial_question_answers = state.decomp_answer_results
|
initial_question_answers = state.decomp_answer_results
|
||||||
|
|
||||||
@@ -73,7 +73,7 @@ def refined_sub_question_creation(
|
|||||||
content=DEEP_DECOMPOSE_PROMPT.format(
|
content=DEEP_DECOMPOSE_PROMPT.format(
|
||||||
question=question,
|
question=question,
|
||||||
history=history,
|
history=history,
|
||||||
entity_term_extraction_str=entity_term_extraction_str,
|
docs_str=docs_str,
|
||||||
base_answer=base_answer,
|
base_answer=base_answer,
|
||||||
answered_sub_questions="\n - ".join(addressed_question_list),
|
answered_sub_questions="\n - ".join(addressed_question_list),
|
||||||
failed_sub_questions="\n - ".join(failed_question_list),
|
failed_sub_questions="\n - ".join(failed_question_list),
|
||||||
|
@@ -117,7 +117,7 @@ class ExpandedRetrievalUpdate(LoggerUpdate):
|
|||||||
|
|
||||||
|
|
||||||
class EntityTermExtractionUpdate(LoggerUpdate):
|
class EntityTermExtractionUpdate(LoggerUpdate):
|
||||||
entity_retlation_term_extractions: EntityRelationshipTermExtraction = (
|
entity_relation_term_extractions: EntityRelationshipTermExtraction = (
|
||||||
EntityRelationshipTermExtraction()
|
EntityRelationshipTermExtraction()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -52,7 +52,7 @@ class AgentSearchConfig:
|
|||||||
db_session: Session | None = None
|
db_session: Session | None = None
|
||||||
|
|
||||||
# Whether to perform initial search to inform decomposition
|
# Whether to perform initial search to inform decomposition
|
||||||
perform_initial_search_path_decision: bool = True
|
# perform_initial_search_path_decision: bool = True
|
||||||
|
|
||||||
# Whether to perform initial search to inform decomposition
|
# Whether to perform initial search to inform decomposition
|
||||||
perform_initial_search_decomposition: bool = True
|
perform_initial_search_decomposition: bool = True
|
||||||
|
@@ -138,7 +138,7 @@ def run_graph(
|
|||||||
input: BasicInput | MainInput_a,
|
input: BasicInput | MainInput_a,
|
||||||
) -> AnswerStream:
|
) -> AnswerStream:
|
||||||
# TODO: add these to the environment
|
# TODO: add these to the environment
|
||||||
config.perform_initial_search_path_decision = False
|
# config.perform_initial_search_path_decision = False
|
||||||
config.perform_initial_search_decomposition = True
|
config.perform_initial_search_decomposition = True
|
||||||
config.allow_refinement = True
|
config.allow_refinement = True
|
||||||
|
|
||||||
@@ -212,7 +212,8 @@ if __name__ == "__main__":
|
|||||||
# query="What are the guiding principles behind the development of cockroachDB",
|
# query="What are the guiding principles behind the development of cockroachDB",
|
||||||
# query="What are the temperatures in Munich, Hawaii, and New York?",
|
# query="What are the temperatures in Munich, Hawaii, and New York?",
|
||||||
# query="When was Washington born?",
|
# query="When was Washington born?",
|
||||||
query="What is Onyx?",
|
# query="What is Onyx?",
|
||||||
|
query="What is the difference between astronomy and astrology?",
|
||||||
)
|
)
|
||||||
# Joachim custom persona
|
# Joachim custom persona
|
||||||
|
|
||||||
@@ -222,7 +223,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
# search_request.persona = get_persona_by_id(1, None, db_session)
|
# search_request.persona = get_persona_by_id(1, None, db_session)
|
||||||
config.use_persistence = True
|
config.use_persistence = True
|
||||||
config.perform_initial_search_path_decision = False
|
# config.perform_initial_search_path_decision = False
|
||||||
config.perform_initial_search_decomposition = True
|
config.perform_initial_search_decomposition = True
|
||||||
if GRAPH_NAME == "a":
|
if GRAPH_NAME == "a":
|
||||||
input = MainInput_a(
|
input = MainInput_a(
|
||||||
|
@@ -138,7 +138,7 @@ BASE_CHECK_PROMPT = """ \n
|
|||||||
VERIFIER_PROMPT = """
|
VERIFIER_PROMPT = """
|
||||||
You are supposed to judge whether a document text contains data or information that is potentially relevant
|
You are supposed to judge whether a document text contains data or information that is potentially relevant
|
||||||
for a question. It does not have to be fully relevant, but check whether it has some information that
|
for a question. It does not have to be fully relevant, but check whether it has some information that
|
||||||
could help to address the question.
|
would help - possibly in conjunction with other documents - to address the question.
|
||||||
|
|
||||||
Here is a document text that you can take as a fact:
|
Here is a document text that you can take as a fact:
|
||||||
--
|
--
|
||||||
@@ -147,8 +147,7 @@ DOCUMENT INFORMATION:
|
|||||||
--
|
--
|
||||||
|
|
||||||
Do you think that this document text is useful and relevant to answer the following question?
|
Do you think that this document text is useful and relevant to answer the following question?
|
||||||
(Other documents may supply additional information, so do not worry if the provided information
|
|
||||||
is not enough to answer the question, but it needs to be relevant to the question.)
|
|
||||||
--
|
--
|
||||||
QUESTION:
|
QUESTION:
|
||||||
{question}
|
{question}
|
||||||
@@ -295,6 +294,92 @@ DEEP_DECOMPOSE_PROMPT = """ \n
|
|||||||
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
||||||
considering:
|
considering:
|
||||||
|
|
||||||
|
1) The initial question
|
||||||
|
2) The initial answer that was found to be unsatisfactory
|
||||||
|
3) The sub-questions that were answered
|
||||||
|
4) The sub-questions that were suggested but not answered
|
||||||
|
5) A sample of the TYPE of documents that may be in the databse in order to inform
|
||||||
|
you what type of entities, relationships, and terms you may want to consider asking about.
|
||||||
|
(But do not build the questions strictly on these documents! They are only examples!
|
||||||
|
Take the, as illustrations.)
|
||||||
|
|
||||||
|
The individual questions should be answerable by a good RAG system.
|
||||||
|
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
||||||
|
question for different entities that may be involved in the original question, but in a way that does
|
||||||
|
not duplicate questions that were already tried.
|
||||||
|
|
||||||
|
Additional Guidelines:
|
||||||
|
- The sub-questions should be specific to the question and provide richer context for the question,
|
||||||
|
resolve ambiguities, or address shortcoming of the initial answer
|
||||||
|
- Each sub-question - when answered - should be relevant for the answer to the original question
|
||||||
|
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
||||||
|
other complications that may require extra context.
|
||||||
|
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
||||||
|
a RAG system independently without the original question available
|
||||||
|
(Example:
|
||||||
|
- initial question: "What is the capital of France?"
|
||||||
|
- bad sub-question: "What is the name of the river there?"
|
||||||
|
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||||
|
- For each sub-question, please also provide a search term that can be used to retrieve relevant
|
||||||
|
documents from a document store.
|
||||||
|
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not
|
||||||
|
answerable with the available context, and you should not ask similar questions.
|
||||||
|
\n\n
|
||||||
|
Here is the initial question:
|
||||||
|
\n ------- \n
|
||||||
|
{question}
|
||||||
|
\n ------- \n
|
||||||
|
{history}
|
||||||
|
|
||||||
|
Here is the initial sub-optimal answer:
|
||||||
|
\n ------- \n
|
||||||
|
{base_answer}
|
||||||
|
\n ------- \n
|
||||||
|
|
||||||
|
Here are the sub-questions that were answered:
|
||||||
|
\n ------- \n
|
||||||
|
{answered_sub_questions}
|
||||||
|
\n ------- \n
|
||||||
|
|
||||||
|
Here are the sub-questions that were suggested but not answered:
|
||||||
|
\n ------- \n
|
||||||
|
{failed_sub_questions}
|
||||||
|
\n ------- \n
|
||||||
|
|
||||||
|
And here some reference documents that show you what type of entities, relationships,
|
||||||
|
and terms you may want to consider toask about as relevamt to your initial question.
|
||||||
|
\n ------- \n
|
||||||
|
{docs_str}
|
||||||
|
\n ------- \n
|
||||||
|
|
||||||
|
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
||||||
|
main question.
|
||||||
|
|
||||||
|
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of
|
||||||
|
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not
|
||||||
|
mentioned in the 'entities, relationships and terms' section.
|
||||||
|
|
||||||
|
Again, please find questions that are NOT overlapping too much with the already answered
|
||||||
|
sub-questions or those that already were suggested and failed.
|
||||||
|
In other words - what can we try in addition to what has been tried so far?
|
||||||
|
|
||||||
|
Generate the list of questions separated by one new line like this:
|
||||||
|
<sub-question 1>
|
||||||
|
<sub-question 2>
|
||||||
|
<sub-question 3>
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
DEEP_DECOMPOSE_PROMPT_WITH_ENTITIES = """ \n
|
||||||
|
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
||||||
|
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
||||||
|
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
||||||
|
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
|
||||||
|
you have an idea of how the avaiolable data looks like.
|
||||||
|
|
||||||
|
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
||||||
|
considering:
|
||||||
|
|
||||||
1) The initial question
|
1) The initial question
|
||||||
2) The initial answer that was found to be unsatisfactory
|
2) The initial answer that was found to be unsatisfactory
|
||||||
3) The sub-questions that were answered
|
3) The sub-questions that were answered
|
||||||
|
Reference in New Issue
Block a user