mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-13 22:39:36 +02:00
history summary fix
- adjusted prompt - adjusted citation removal - length cutoff by words, not characters
This commit is contained in:
parent
95fcc0019c
commit
d5661baf98
@ -16,9 +16,6 @@ from onyx.agents.agent_search.deep_search.main.operations import (
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
||||
from onyx.agents.agent_search.deep_search.main.operations import logger
|
||||
from onyx.agents.agent_search.deep_search.main.operations import (
|
||||
remove_document_citations,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.states import (
|
||||
InitialAnswerUpdate,
|
||||
)
|
||||
@ -49,6 +46,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
remove_document_citations,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import AgentAnswerPiece
|
||||
from onyx.chat.models import ExtendedToolResponse
|
||||
|
@ -12,9 +12,6 @@ from onyx.agents.agent_search.deep_search.main.models import (
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.operations import get_query_info
|
||||
from onyx.agents.agent_search.deep_search.main.operations import logger
|
||||
from onyx.agents.agent_search.deep_search.main.operations import (
|
||||
remove_document_citations,
|
||||
)
|
||||
from onyx.agents.agent_search.deep_search.main.states import MainState
|
||||
from onyx.agents.agent_search.deep_search.main.states import (
|
||||
RefinedAnswerUpdate,
|
||||
@ -48,6 +45,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
remove_document_citations,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||
from onyx.chat.models import AgentAnswerPiece
|
||||
from onyx.chat.models import ExtendedToolResponse
|
||||
|
@ -1,4 +1,3 @@
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
|
||||
from langgraph.types import StreamWriter
|
||||
@ -18,27 +17,6 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def remove_document_citations(text: str) -> str:
|
||||
"""
|
||||
Removes citation expressions of format '[[D1]]()' from text.
|
||||
The number after D can vary.
|
||||
|
||||
Args:
|
||||
text: Input text containing citations
|
||||
|
||||
Returns:
|
||||
Text with citations removed
|
||||
"""
|
||||
# Pattern explanation:
|
||||
# \[\[D\d+\]\]\(\) matches:
|
||||
# \[\[ - literal [[ characters
|
||||
# D - literal D character
|
||||
# \d+ - one or more digits
|
||||
# \]\] - literal ]] characters
|
||||
# \(\) - literal () characters
|
||||
return re.sub(r"\[\[(?:D|Q)\d+\]\]\(\)", "", text)
|
||||
|
||||
|
||||
def dispatch_subquestion(
|
||||
level: int, writer: StreamWriter
|
||||
) -> Callable[[str, int], None]:
|
||||
|
@ -11,8 +11,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_persona_agent_prompt_expressions,
|
||||
)
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_citations
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
|
||||
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH
|
||||
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.llm.interfaces import LLMConfig
|
||||
from onyx.llm.utils import get_max_input_tokens
|
||||
@ -109,8 +110,8 @@ def build_history_prompt(config: AgentSearchConfig, question: str) -> str:
|
||||
else:
|
||||
continue
|
||||
history = "\n".join(history_components)
|
||||
|
||||
if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH:
|
||||
history = remove_document_citations(history)
|
||||
if len(history.split()) > AGENT_MAX_STATIC_HISTORY_WORD_LENGTH:
|
||||
history = summarize_history(history, question, persona_base, model)
|
||||
|
||||
return HISTORY_PROMPT.format(history=history) if history else ""
|
||||
|
@ -1160,7 +1160,8 @@ Please answer with a simple 'yes' or 'no'.
|
||||
|
||||
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
|
||||
{persona_specification}
|
||||
You need to summarize the key parts of the history of a conversation between a user and an agent. The
|
||||
|
||||
Your task now is to summarize the key parts of the history of a conversation between a user and an agent. The
|
||||
summary has two purposes:
|
||||
1) providing the suitable context for a new question, and
|
||||
2) To capture the key information that was discussed and that the user may have a follow-up question about.
|
||||
|
@ -343,8 +343,12 @@ def get_answer_citation_ids(answer_str: str) -> list[int]:
|
||||
def summarize_history(
|
||||
history: str, question: str, persona_specification: str, model: LLM
|
||||
) -> str:
|
||||
history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format(
|
||||
persona_specification=persona_specification, question=question, history=history
|
||||
history_context_prompt = remove_document_citations(
|
||||
HISTORY_CONTEXT_SUMMARY_PROMPT.format(
|
||||
persona_specification=persona_specification,
|
||||
question=question,
|
||||
history=history,
|
||||
)
|
||||
)
|
||||
|
||||
history_response = model.invoke(history_context_prompt)
|
||||
@ -399,3 +403,24 @@ def get_langgraph_node_log_string(
|
||||
duration = datetime.now() - node_start_time
|
||||
results_str = "" if result is None else f" -- Result: {result}"
|
||||
return f"{node_start_time} -- {graph_component} - {node_name} -- Time taken: {duration}{results_str}"
|
||||
|
||||
|
||||
def remove_document_citations(text: str) -> str:
|
||||
"""
|
||||
Removes citation expressions of format '[[D1]]()' from text.
|
||||
The number after D can vary.
|
||||
|
||||
Args:
|
||||
text: Input text containing citations
|
||||
|
||||
Returns:
|
||||
Text with citations removed
|
||||
"""
|
||||
# Pattern explanation:
|
||||
# \[\[D\d+\]\]\(\) matches:
|
||||
# \[\[ - literal [[ characters
|
||||
# D - literal D character
|
||||
# \d+ - one or more digits
|
||||
# \]\] - literal ]] characters
|
||||
# \(\) - literal () characters
|
||||
return re.sub(r"\[\[(?:D|Q)?\d+\]\](?:\([^)]*\))?", "", text)
|
||||
|
@ -11,7 +11,7 @@ AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
|
||||
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3
|
||||
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
|
||||
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
|
||||
AGENT_DEFAULT_MAX_STATIC_HISTORY_CHAR_LENGTH = 10000
|
||||
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
|
||||
|
||||
#####
|
||||
# Agent Configs
|
||||
@ -72,9 +72,9 @@ AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
|
||||
) # 8
|
||||
|
||||
|
||||
AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(
|
||||
os.environ.get("AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH")
|
||||
or AGENT_DEFAULT_MAX_STATIC_HISTORY_CHAR_LENGTH
|
||||
) # 10000
|
||||
AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
|
||||
os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
|
||||
or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||
) # 2000
|
||||
|
||||
GRAPH_VERSION_NAME: str = "a"
|
||||
|
Loading…
x
Reference in New Issue
Block a user