mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-20 13:05:49 +02:00
taking out Extraction for now
This commit is contained in:
committed by
Evan Lohn
parent
fc60fd0322
commit
aa8cb44a33
@@ -27,7 +27,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
|
||||
return QACheckUpdate(
|
||||
answer_quality=SUB_CHECK_NO,
|
||||
log_messages=[
|
||||
f"{now_end} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
|
||||
f"{now_start} -- Answer check SQ-{level}-{question_num} - unknown answer, Time taken: {now_end - now_start}"
|
||||
],
|
||||
)
|
||||
msg = [
|
||||
@@ -53,7 +53,7 @@ def answer_check(state: AnswerQuestionState, config: RunnableConfig) -> QACheckU
|
||||
return QACheckUpdate(
|
||||
answer_quality=quality_str,
|
||||
log_messages=[
|
||||
f"""{now_end} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
|
||||
f"""{now_start} -- Answer check SQ-{level}-{question_num} - Answer quality: {quality_str},
|
||||
Time taken: {now_end - now_start}"""
|
||||
],
|
||||
)
|
||||
|
@@ -32,9 +32,14 @@ from onyx.agents.agent_search.deep_search_a.main.nodes.agent_search_start import
|
||||
from onyx.agents.agent_search.deep_search_a.main.nodes.answer_comparison import (
|
||||
answer_comparison,
|
||||
)
|
||||
|
||||
from onyx.agents.agent_search.deep_search_a.main.nodes.entity_term_extraction_llm import (
|
||||
entity_term_extraction_llm,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search_a.main.nodes.direct_llm_handling import (
|
||||
direct_llm_handling,
|
||||
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search_a.main.nodes.generate_initial_answer import (
|
||||
generate_initial_answer,
|
||||
)
|
||||
@@ -197,10 +202,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
action=initial_answer_quality_check,
|
||||
)
|
||||
|
||||
graph.add_node(
|
||||
node="entity_term_extraction_llm",
|
||||
action=entity_term_extraction_llm,
|
||||
)
|
||||
# graph.add_node(
|
||||
# node="entity_term_extraction_llm",
|
||||
# action=entity_term_extraction_llm,
|
||||
# )
|
||||
graph.add_node(
|
||||
node="refined_answer_decision",
|
||||
action=refined_answer_decision,
|
||||
@@ -259,10 +264,10 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
end_key="base_raw_search_subgraph",
|
||||
)
|
||||
|
||||
graph.add_edge(
|
||||
start_key="agent_search_start",
|
||||
end_key="entity_term_extraction_llm",
|
||||
)
|
||||
# graph.add_edge(
|
||||
# start_key="agent_search_start",
|
||||
# end_key="entity_term_extraction_llm",
|
||||
# )
|
||||
|
||||
graph.add_edge(
|
||||
start_key="agent_search_start",
|
||||
@@ -319,8 +324,12 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
|
||||
end_key="initial_answer_quality_check",
|
||||
)
|
||||
|
||||
# graph.add_edge(
|
||||
# start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
|
||||
# end_key="refined_answer_decision",
|
||||
# )
|
||||
graph.add_edge(
|
||||
start_key=["initial_answer_quality_check", "entity_term_extraction_llm"],
|
||||
start_key="initial_answer_quality_check",
|
||||
end_key="refined_answer_decision",
|
||||
)
|
||||
|
||||
|
@@ -60,6 +60,6 @@ def agent_search_start(
|
||||
return ExploratorySearchUpdate(
|
||||
exploratory_search_results=exploratory_search_results,
|
||||
log_messages=[
|
||||
f"--------{now_end}--{now_end - now_start}--------EXPLORATORY SEARCH END---"
|
||||
f"{now_start} -- Main - Exploratory Search, Time taken: {now_end - now_start}"
|
||||
],
|
||||
)
|
||||
|
@@ -4,7 +4,6 @@ from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import merge_message_runs
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from onyx.agents.agent_search.deep_search_a.main.operations import logger
|
||||
@@ -32,12 +31,15 @@ def entity_term_extraction_llm(
|
||||
now_start = datetime.now()
|
||||
|
||||
logger.debug(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
|
||||
logger.debug(
|
||||
f"--------{now_start}--------GAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
||||
)
|
||||
|
||||
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
|
||||
if not agent_a_config.allow_refinement:
|
||||
now_end = datetime.now()
|
||||
return EntityTermExtractionUpdate(
|
||||
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
|
||||
entity_relation_term_extractions=EntityRelationshipTermExtraction(
|
||||
entities=[],
|
||||
relationships=[],
|
||||
terms=[],
|
||||
@@ -64,14 +66,11 @@ def entity_term_extraction_llm(
|
||||
]
|
||||
fast_llm = agent_a_config.fast_llm
|
||||
# Grader
|
||||
llm_response_list = list(
|
||||
fast_llm.stream(
|
||||
prompt=msg,
|
||||
)
|
||||
llm_response = fast_llm.invoke(
|
||||
prompt=msg,
|
||||
)
|
||||
llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
|
||||
|
||||
cleaned_response = re.sub(r"```json\n|\n```", "", llm_response)
|
||||
cleaned_response = re.sub(r"```json\n|\n```", "", str(llm_response.content))
|
||||
parsed_response = json.loads(cleaned_response)
|
||||
|
||||
entities = []
|
||||
@@ -117,14 +116,17 @@ def entity_term_extraction_llm(
|
||||
logger.debug(
|
||||
f"--------{now_end}--{now_end - now_start}--------ENTITY TERM EXTRACTION END---"
|
||||
)
|
||||
logger.debug(
|
||||
f"--------{now_end}--------GBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
|
||||
)
|
||||
|
||||
return EntityTermExtractionUpdate(
|
||||
entity_retlation_term_extractions=EntityRelationshipTermExtraction(
|
||||
entity_relation_term_extractions=EntityRelationshipTermExtraction(
|
||||
entities=entities,
|
||||
relationships=relationships,
|
||||
terms=terms,
|
||||
),
|
||||
log_messages=[
|
||||
f"{now_end} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
|
||||
f"{now_start} -- Main - ETR Extraction, Time taken: {now_end - now_start}"
|
||||
],
|
||||
)
|
||||
|
@@ -19,9 +19,7 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.prompts import DEEP_DECOMPOSE_PROMPT
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
format_entity_term_extraction,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
|
||||
@@ -52,11 +50,13 @@ def refined_sub_question_creation(
|
||||
base_answer = state.initial_answer
|
||||
history = build_history_prompt(agent_a_config.prompt_builder)
|
||||
# get the entity term extraction dict and properly format it
|
||||
entity_retlation_term_extractions = state.entity_retlation_term_extractions
|
||||
# entity_retlation_term_extractions = state.entity_relation_term_extractions
|
||||
|
||||
entity_term_extraction_str = format_entity_term_extraction(
|
||||
entity_retlation_term_extractions
|
||||
)
|
||||
# entity_term_extraction_str = format_entity_term_extraction(
|
||||
# entity_retlation_term_extractions
|
||||
# )
|
||||
|
||||
docs_str = format_docs(state.all_original_question_documents[:10])
|
||||
|
||||
initial_question_answers = state.decomp_answer_results
|
||||
|
||||
@@ -73,7 +73,7 @@ def refined_sub_question_creation(
|
||||
content=DEEP_DECOMPOSE_PROMPT.format(
|
||||
question=question,
|
||||
history=history,
|
||||
entity_term_extraction_str=entity_term_extraction_str,
|
||||
docs_str=docs_str,
|
||||
base_answer=base_answer,
|
||||
answered_sub_questions="\n - ".join(addressed_question_list),
|
||||
failed_sub_questions="\n - ".join(failed_question_list),
|
||||
|
@@ -117,7 +117,7 @@ class ExpandedRetrievalUpdate(LoggerUpdate):
|
||||
|
||||
|
||||
class EntityTermExtractionUpdate(LoggerUpdate):
|
||||
entity_retlation_term_extractions: EntityRelationshipTermExtraction = (
|
||||
entity_relation_term_extractions: EntityRelationshipTermExtraction = (
|
||||
EntityRelationshipTermExtraction()
|
||||
)
|
||||
|
||||
|
@@ -52,7 +52,7 @@ class AgentSearchConfig:
|
||||
db_session: Session | None = None
|
||||
|
||||
# Whether to perform initial search to inform decomposition
|
||||
perform_initial_search_path_decision: bool = True
|
||||
# perform_initial_search_path_decision: bool = True
|
||||
|
||||
# Whether to perform initial search to inform decomposition
|
||||
perform_initial_search_decomposition: bool = True
|
||||
|
@@ -138,7 +138,7 @@ def run_graph(
|
||||
input: BasicInput | MainInput_a,
|
||||
) -> AnswerStream:
|
||||
# TODO: add these to the environment
|
||||
config.perform_initial_search_path_decision = False
|
||||
# config.perform_initial_search_path_decision = False
|
||||
config.perform_initial_search_decomposition = True
|
||||
config.allow_refinement = True
|
||||
|
||||
@@ -212,7 +212,8 @@ if __name__ == "__main__":
|
||||
# query="What are the guiding principles behind the development of cockroachDB",
|
||||
# query="What are the temperatures in Munich, Hawaii, and New York?",
|
||||
# query="When was Washington born?",
|
||||
query="What is Onyx?",
|
||||
# query="What is Onyx?",
|
||||
query="What is the difference between astronomy and astrology?",
|
||||
)
|
||||
# Joachim custom persona
|
||||
|
||||
@@ -222,7 +223,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
# search_request.persona = get_persona_by_id(1, None, db_session)
|
||||
config.use_persistence = True
|
||||
config.perform_initial_search_path_decision = False
|
||||
# config.perform_initial_search_path_decision = False
|
||||
config.perform_initial_search_decomposition = True
|
||||
if GRAPH_NAME == "a":
|
||||
input = MainInput_a(
|
||||
|
@@ -138,7 +138,7 @@ BASE_CHECK_PROMPT = """ \n
|
||||
VERIFIER_PROMPT = """
|
||||
You are supposed to judge whether a document text contains data or information that is potentially relevant
|
||||
for a question. It does not have to be fully relevant, but check whether it has some information that
|
||||
could help to address the question.
|
||||
would help - possibly in conjunction with other documents - to address the question.
|
||||
|
||||
Here is a document text that you can take as a fact:
|
||||
--
|
||||
@@ -147,8 +147,7 @@ DOCUMENT INFORMATION:
|
||||
--
|
||||
|
||||
Do you think that this document text is useful and relevant to answer the following question?
|
||||
(Other documents may supply additional information, so do not worry if the provided information
|
||||
is not enough to answer the question, but it needs to be relevant to the question.)
|
||||
|
||||
--
|
||||
QUESTION:
|
||||
{question}
|
||||
@@ -295,6 +294,92 @@ DEEP_DECOMPOSE_PROMPT = """ \n
|
||||
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
||||
considering:
|
||||
|
||||
1) The initial question
|
||||
2) The initial answer that was found to be unsatisfactory
|
||||
3) The sub-questions that were answered
|
||||
4) The sub-questions that were suggested but not answered
|
||||
5) A sample of the TYPE of documents that may be in the databse in order to inform
|
||||
you what type of entities, relationships, and terms you may want to consider asking about.
|
||||
(But do not build the questions strictly on these documents! They are only examples!
|
||||
Take the, as illustrations.)
|
||||
|
||||
The individual questions should be answerable by a good RAG system.
|
||||
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
||||
question for different entities that may be involved in the original question, but in a way that does
|
||||
not duplicate questions that were already tried.
|
||||
|
||||
Additional Guidelines:
|
||||
- The sub-questions should be specific to the question and provide richer context for the question,
|
||||
resolve ambiguities, or address shortcoming of the initial answer
|
||||
- Each sub-question - when answered - should be relevant for the answer to the original question
|
||||
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
||||
other complications that may require extra context.
|
||||
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
||||
a RAG system independently without the original question available
|
||||
(Example:
|
||||
- initial question: "What is the capital of France?"
|
||||
- bad sub-question: "What is the name of the river there?"
|
||||
- good sub-question: "What is the name of the river that flows through Paris?"
|
||||
- For each sub-question, please also provide a search term that can be used to retrieve relevant
|
||||
documents from a document store.
|
||||
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not
|
||||
answerable with the available context, and you should not ask similar questions.
|
||||
\n\n
|
||||
Here is the initial question:
|
||||
\n ------- \n
|
||||
{question}
|
||||
\n ------- \n
|
||||
{history}
|
||||
|
||||
Here is the initial sub-optimal answer:
|
||||
\n ------- \n
|
||||
{base_answer}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were answered:
|
||||
\n ------- \n
|
||||
{answered_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
Here are the sub-questions that were suggested but not answered:
|
||||
\n ------- \n
|
||||
{failed_sub_questions}
|
||||
\n ------- \n
|
||||
|
||||
And here some reference documents that show you what type of entities, relationships,
|
||||
and terms you may want to consider toask about as relevamt to your initial question.
|
||||
\n ------- \n
|
||||
{docs_str}
|
||||
\n ------- \n
|
||||
|
||||
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
||||
main question.
|
||||
|
||||
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of
|
||||
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not
|
||||
mentioned in the 'entities, relationships and terms' section.
|
||||
|
||||
Again, please find questions that are NOT overlapping too much with the already answered
|
||||
sub-questions or those that already were suggested and failed.
|
||||
In other words - what can we try in addition to what has been tried so far?
|
||||
|
||||
Generate the list of questions separated by one new line like this:
|
||||
<sub-question 1>
|
||||
<sub-question 2>
|
||||
<sub-question 3>
|
||||
...
|
||||
"""
|
||||
|
||||
DEEP_DECOMPOSE_PROMPT_WITH_ENTITIES = """ \n
|
||||
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
||||
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
||||
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
||||
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
|
||||
you have an idea of how the avaiolable data looks like.
|
||||
|
||||
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
||||
considering:
|
||||
|
||||
1) The initial question
|
||||
2) The initial answer that was found to be unsatisfactory
|
||||
3) The sub-questions that were answered
|
||||
|
Reference in New Issue
Block a user