mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-28 12:58:41 +02:00
history summary fix
- adjusted prompt - adjusted citation removal - length cutoff by words, not characters
This commit is contained in:
committed by
Evan Lohn
parent
95fcc0019c
commit
d5661baf98
@@ -16,9 +16,6 @@ from onyx.agents.agent_search.deep_search.main.operations import (
|
|||||||
)
|
)
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import logger
|
from onyx.agents.agent_search.deep_search.main.operations import logger
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import (
|
|
||||||
remove_document_citations,
|
|
||||||
)
|
|
||||||
from onyx.agents.agent_search.deep_search.main.states import (
|
from onyx.agents.agent_search.deep_search.main.states import (
|
||||||
InitialAnswerUpdate,
|
InitialAnswerUpdate,
|
||||||
)
|
)
|
||||||
@@ -49,6 +46,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
|||||||
get_langgraph_node_log_string,
|
get_langgraph_node_log_string,
|
||||||
)
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
||||||
|
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||||
|
remove_document_citations,
|
||||||
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||||
from onyx.chat.models import AgentAnswerPiece
|
from onyx.chat.models import AgentAnswerPiece
|
||||||
from onyx.chat.models import ExtendedToolResponse
|
from onyx.chat.models import ExtendedToolResponse
|
||||||
|
@@ -12,9 +12,6 @@ from onyx.agents.agent_search.deep_search.main.models import (
|
|||||||
)
|
)
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import logger
|
from onyx.agents.agent_search.deep_search.main.operations import logger
|
||||||
from onyx.agents.agent_search.deep_search.main.operations import (
|
|
||||||
remove_document_citations,
|
|
||||||
)
|
|
||||||
from onyx.agents.agent_search.deep_search.main.states import MainState
|
from onyx.agents.agent_search.deep_search.main.states import MainState
|
||||||
from onyx.agents.agent_search.deep_search.main.states import (
|
from onyx.agents.agent_search.deep_search.main.states import (
|
||||||
RefinedAnswerUpdate,
|
RefinedAnswerUpdate,
|
||||||
@@ -48,6 +45,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
|||||||
)
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
|
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
||||||
|
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||||
|
remove_document_citations,
|
||||||
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||||
from onyx.chat.models import AgentAnswerPiece
|
from onyx.chat.models import AgentAnswerPiece
|
||||||
from onyx.chat.models import ExtendedToolResponse
|
from onyx.chat.models import ExtendedToolResponse
|
||||||
|
@@ -1,4 +1,3 @@
|
|||||||
import re
|
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
|
||||||
from langgraph.types import StreamWriter
|
from langgraph.types import StreamWriter
|
||||||
@@ -18,27 +17,6 @@ from onyx.utils.logger import setup_logger
|
|||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
def remove_document_citations(text: str) -> str:
|
|
||||||
"""
|
|
||||||
Removes citation expressions of format '[[D1]]()' from text.
|
|
||||||
The number after D can vary.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Input text containing citations
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Text with citations removed
|
|
||||||
"""
|
|
||||||
# Pattern explanation:
|
|
||||||
# \[\[D\d+\]\]\(\) matches:
|
|
||||||
# \[\[ - literal [[ characters
|
|
||||||
# D - literal D character
|
|
||||||
# \d+ - one or more digits
|
|
||||||
# \]\] - literal ]] characters
|
|
||||||
# \(\) - literal () characters
|
|
||||||
return re.sub(r"\[\[(?:D|Q)\d+\]\]\(\)", "", text)
|
|
||||||
|
|
||||||
|
|
||||||
def dispatch_subquestion(
|
def dispatch_subquestion(
|
||||||
level: int, writer: StreamWriter
|
level: int, writer: StreamWriter
|
||||||
) -> Callable[[str, int], None]:
|
) -> Callable[[str, int], None]:
|
||||||
|
@@ -11,8 +11,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
|||||||
get_persona_agent_prompt_expressions,
|
get_persona_agent_prompt_expressions,
|
||||||
)
|
)
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
|
from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
|
||||||
|
from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_citations
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
|
from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
|
||||||
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH
|
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||||
from onyx.context.search.models import InferenceSection
|
from onyx.context.search.models import InferenceSection
|
||||||
from onyx.llm.interfaces import LLMConfig
|
from onyx.llm.interfaces import LLMConfig
|
||||||
from onyx.llm.utils import get_max_input_tokens
|
from onyx.llm.utils import get_max_input_tokens
|
||||||
@@ -109,8 +110,8 @@ def build_history_prompt(config: AgentSearchConfig, question: str) -> str:
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
history = "\n".join(history_components)
|
history = "\n".join(history_components)
|
||||||
|
history = remove_document_citations(history)
|
||||||
if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH:
|
if len(history.split()) > AGENT_MAX_STATIC_HISTORY_WORD_LENGTH:
|
||||||
history = summarize_history(history, question, persona_base, model)
|
history = summarize_history(history, question, persona_base, model)
|
||||||
|
|
||||||
return HISTORY_PROMPT.format(history=history) if history else ""
|
return HISTORY_PROMPT.format(history=history) if history else ""
|
||||||
|
@@ -1160,7 +1160,8 @@ Please answer with a simple 'yes' or 'no'.
|
|||||||
|
|
||||||
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
|
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
|
||||||
{persona_specification}
|
{persona_specification}
|
||||||
You need to summarize the key parts of the history of a conversation between a user and an agent. The
|
|
||||||
|
Your task now is to summarize the key parts of the history of a conversation between a user and an agent. The
|
||||||
summary has two purposes:
|
summary has two purposes:
|
||||||
1) providing the suitable context for a new question, and
|
1) providing the suitable context for a new question, and
|
||||||
2) To capture the key information that was discussed and that the user may have a follow-up question about.
|
2) To capture the key information that was discussed and that the user may have a follow-up question about.
|
||||||
|
@@ -343,8 +343,12 @@ def get_answer_citation_ids(answer_str: str) -> list[int]:
|
|||||||
def summarize_history(
|
def summarize_history(
|
||||||
history: str, question: str, persona_specification: str, model: LLM
|
history: str, question: str, persona_specification: str, model: LLM
|
||||||
) -> str:
|
) -> str:
|
||||||
history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format(
|
history_context_prompt = remove_document_citations(
|
||||||
persona_specification=persona_specification, question=question, history=history
|
HISTORY_CONTEXT_SUMMARY_PROMPT.format(
|
||||||
|
persona_specification=persona_specification,
|
||||||
|
question=question,
|
||||||
|
history=history,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
history_response = model.invoke(history_context_prompt)
|
history_response = model.invoke(history_context_prompt)
|
||||||
@@ -399,3 +403,24 @@ def get_langgraph_node_log_string(
|
|||||||
duration = datetime.now() - node_start_time
|
duration = datetime.now() - node_start_time
|
||||||
results_str = "" if result is None else f" -- Result: {result}"
|
results_str = "" if result is None else f" -- Result: {result}"
|
||||||
return f"{node_start_time} -- {graph_component} - {node_name} -- Time taken: {duration}{results_str}"
|
return f"{node_start_time} -- {graph_component} - {node_name} -- Time taken: {duration}{results_str}"
|
||||||
|
|
||||||
|
|
||||||
|
def remove_document_citations(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Removes citation expressions of format '[[D1]]()' from text.
|
||||||
|
The number after D can vary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Input text containing citations
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text with citations removed
|
||||||
|
"""
|
||||||
|
# Pattern explanation:
|
||||||
|
# \[\[D\d+\]\]\(\) matches:
|
||||||
|
# \[\[ - literal [[ characters
|
||||||
|
# D - literal D character
|
||||||
|
# \d+ - one or more digits
|
||||||
|
# \]\] - literal ]] characters
|
||||||
|
# \(\) - literal () characters
|
||||||
|
return re.sub(r"\[\[(?:D|Q)?\d+\]\](?:\([^)]*\))?", "", text)
|
||||||
|
@@ -11,7 +11,7 @@ AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
|
|||||||
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3
|
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3
|
||||||
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
|
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
|
||||||
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
|
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
|
||||||
AGENT_DEFAULT_MAX_STATIC_HISTORY_CHAR_LENGTH = 10000
|
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
|
||||||
|
|
||||||
#####
|
#####
|
||||||
# Agent Configs
|
# Agent Configs
|
||||||
@@ -72,9 +72,9 @@ AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
|
|||||||
) # 8
|
) # 8
|
||||||
|
|
||||||
|
|
||||||
AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(
|
AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
|
||||||
os.environ.get("AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH")
|
os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
|
||||||
or AGENT_DEFAULT_MAX_STATIC_HISTORY_CHAR_LENGTH
|
or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||||
) # 10000
|
) # 2000
|
||||||
|
|
||||||
GRAPH_VERSION_NAME: str = "a"
|
GRAPH_VERSION_NAME: str = "a"
|
||||||
|
Reference in New Issue
Block a user