increased logging

This commit is contained in:
joachim-danswer 2025-01-28 09:36:07 -08:00 committed by Evan Lohn
parent 325892a21c
commit 12d1186888
24 changed files with 70 additions and 39 deletions

View File

@ -23,6 +23,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.utils.logger import setup_logger
logger = setup_logger()
@ -32,13 +33,13 @@ def answer_generation(
state: AnswerQuestionState, config: RunnableConfig
) -> QAGenerationUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------START ANSWER GENERATION---")
logger.info(f"--------{now_start}--------START ANSWER GENERATION---")
agent_search_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = state.question
docs = state.documents
level, question_nr = parse_question_id(state.question_id)
context_docs = state.context_documents
context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
persona = get_persona_expressions(agent_search_config.search_request.persona)
if len(context_docs) == 0:
@ -99,6 +100,10 @@ def answer_generation(
dispatch_custom_event("stream_finished", stop_event)
now_end = datetime.now()
logger.info(
f"{now_start} -- Answer generation SQ-{level} - Q{question_nr} - Time taken: {now_end - now_start}"
)
return QAGenerationUpdate(
answer=answer_str,
cited_docs=cited_docs,

View File

@ -13,7 +13,7 @@ def ingest_initial_base_retrieval(
) -> ExpandedRetrievalUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------INGEST INITIAL RETRIEVAL---")
logger.info(f"--------{now_start}--------INGEST INITIAL RETRIEVAL---")
sub_question_retrieval_stats = (
state.base_expanded_retrieval_result.sub_question_retrieval_stats

View File

@ -80,6 +80,9 @@ def doc_reranking(
# TODO: stream deduped docs here, or decide to use search tool ranking/verification
now_end = datetime.now()
logger.info(
f"{now_start} -- Expanded Retrieval - Reranking - Time taken: {now_end - now_start}"
)
return DocRerankingUpdate(
reranked_documents=[
doc for doc in reranked_documents if type(doc) == InferenceSection

View File

@ -96,6 +96,9 @@ def doc_retrieval(state: RetrievalInput, config: RunnableConfig) -> DocRetrieval
query_info=query_info,
)
now_end = datetime.now()
logger.info(
f"{now_start} -- Expanded Retrieval - Retrieval - Time taken: {now_end - now_start}"
)
return DocRetrievalUpdate(
expanded_retrieval_results=[expanded_retrieval_result],
retrieved_documents=retrieved_docs,

View File

@ -8,6 +8,7 @@ from langchain_core.runnables.config import RunnableConfig
from onyx.agents.agent_search.deep_search_a.expanded_retrieval.operations import (
dispatch_subquery,
)
from onyx.agents.agent_search.deep_search_a.expanded_retrieval.operations import logger
from onyx.agents.agent_search.deep_search_a.expanded_retrieval.states import (
ExpandedRetrievalInput,
)
@ -60,6 +61,9 @@ def expand_queries(
rewritten_queries = llm_response.split("\n")
now_end = datetime.now()
logger.info(
f"{now_start} -- Expanded Retrieval - Query Expansion - Time taken: {now_end - now_start}"
)
return QueryExpansionUpdate(
expanded_queries=rewritten_queries,
log_messages=[

View File

@ -39,6 +39,7 @@ def parallelize_initial_sub_question_answering(
log_messages=[
f"{now_start} -- Main Edge - Parallelize Initial Sub-question Answering"
],
request_start_time=[],
),
)
for question_nr, question in enumerate(state.initial_decomp_questions)

View File

@ -61,7 +61,7 @@ def generate_initial_answer(
) -> InitialAnswerUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------GENERATE INITIAL---")
logger.info(f"--------{now_start}--------GENERATE INITIAL---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = agent_a_config.search_request.query
@ -244,10 +244,6 @@ def generate_initial_answer(
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------INITIAL AGENT ANSWER END---\n\n"
)
agent_base_end_time = datetime.now()
agent_base_metrics = AgentBaseMetrics(
@ -269,6 +265,10 @@ def generate_initial_answer(
duration__s=(agent_base_end_time - state.agent_start_time).total_seconds(),
)
logger.info(
f"{now_start} -- Main - Initial Answer generation, Time taken: {now_end - now_start}"
)
return InitialAnswerUpdate(
initial_answer=answer,
initial_agent_stats=initial_agent_stats,

View File

@ -13,7 +13,7 @@ def ingest_initial_base_retrieval(
) -> ExpandedRetrievalUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------INGEST INITIAL RETRIEVAL---")
logger.info(f"--------{now_start}--------INGEST INITIAL RETRIEVAL---")
sub_question_retrieval_stats = (
state.base_expanded_retrieval_result.sub_question_retrieval_stats

View File

@ -15,7 +15,7 @@ def ingest_initial_sub_question_answers(
) -> DecompAnswersUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------INGEST ANSWERS---")
logger.info(f"--------{now_start}--------INGEST ANSWERS---")
documents = []
context_documents = []
cited_docs = []

View File

@ -35,7 +35,7 @@ def initial_sub_question_creation(
) -> BaseDecompUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------BASE DECOMP START---")
logger.info(f"--------{now_start}--------BASE DECOMP START---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = agent_a_config.search_request.query
@ -117,7 +117,9 @@ def initial_sub_question_creation(
now_end = datetime.now()
logger.debug(f"--------{now_end}--{now_end - now_start}--------BASE DECOMP END---")
logger.info(
f"{now_start} -- INITIAL SUBQUESTION ANSWERING - Base Decomposition, Time taken: {now_end - now_start}"
)
return BaseDecompUpdate(
initial_decomp_questions=decomp_list,

View File

@ -17,7 +17,7 @@ from onyx.db.chat import log_agent_sub_question_results
def agent_logging(state: MainState, config: RunnableConfig) -> MainOutput:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------LOGGING NODE---")
logger.info(f"--------{now_start}--------LOGGING NODE---")
agent_start_time = state.agent_start_time
agent_base_end_time = state.agent_base_end_time
@ -109,6 +109,16 @@ def agent_logging(state: MainState, config: RunnableConfig) -> MainOutput:
],
)
logger.debug(f"--------{now_end}--{now_end - now_start}--------LOGGING NODE END---")
logger.info(f"--------{now_end}--{now_end - now_start}--------LOGGING NODE END---")
for log_message in state.log_messages:
logger.info(log_message)
logger.info("")
if state.agent_base_metrics:
logger.info(f"Initial loop: {state.agent_base_metrics.duration__s}")
if state.agent_refined_metrics:
logger.info(f"Refined loop: {state.agent_refined_metrics.duration__s}")
return main_output

View File

@ -18,7 +18,7 @@ def agent_path_decision(state: MainState, config: RunnableConfig) -> RoutingDeci
# agent_a_config.perform_initial_search_path_decision
# )
logger.debug(f"--------{now_start}--------DECIDING TO SEARCH OR GO TO LLM---")
logger.info(f"--------{now_start}--------DECIDING TO SEARCH OR GO TO LLM---")
routing = "agent_search"

View File

@ -20,7 +20,7 @@ def agent_search_start(
) -> ExploratorySearchUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------EXPLORATORY SEARCH START---")
logger.info(f"--------{now_start}--------EXPLORATORY SEARCH START---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = agent_a_config.search_request.query

View File

@ -21,7 +21,7 @@ def answer_comparison(state: MainState, config: RunnableConfig) -> AnswerCompari
initial_answer = state.initial_answer
refined_answer = state.refined_answer
logger.debug(f"--------{now_start}--------ANSWER COMPARISON STARTED--")
logger.info(f"--------{now_start}--------ANSWER COMPARISON STARTED--")
answer_comparison_prompt = ANSWER_COMPARISON_PROMPT.format(
question=question, initial_answer=initial_answer, refined_answer=refined_answer
@ -48,8 +48,8 @@ def answer_comparison(state: MainState, config: RunnableConfig) -> AnswerCompari
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------ANSWER COMPARISON COMPLETED---"
logger.info(
f"{now_start} -- MAIN - Answer comparison, Time taken: {now_end - now_start}"
)
return AnswerComparison(

View File

@ -25,7 +25,7 @@ def direct_llm_handling(
question = agent_a_config.search_request.query
persona = get_persona_expressions(agent_a_config.search_request.persona)
logger.debug(f"--------{now_start}--------LLM HANDLING START---")
logger.info(f"--------{now_start}--------LLM HANDLING START---")
model = agent_a_config.fast_llm
@ -62,7 +62,7 @@ def direct_llm_handling(
now_end = datetime.now()
logger.debug(f"--------{now_end}--{now_end - now_start}--------LLM HANDLING END---")
logger.info(f"--------{now_end}--{now_end - now_start}--------LLM HANDLING END---")
return InitialAnswerUpdate(
initial_answer=answer,

View File

@ -30,7 +30,7 @@ def entity_term_extraction_llm(
) -> EntityTermExtractionUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
logger.info(f"--------{now_start}--------GENERATE ENTITIES & TERMS---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
if not agent_a_config.allow_refinement:
@ -110,8 +110,8 @@ def entity_term_extraction_llm(
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------ENTITY TERM EXTRACTION END---"
logger.info(
f"{now_start} -- MAIN - Entity term extraction, Time taken: {now_end - now_start}"
)
return EntityTermExtractionUpdate(

View File

@ -21,7 +21,7 @@ def generate_initial_base_search_only_answer(
) -> InitialAnswerBASEUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------GENERATE INITIAL BASE ANSWER---")
logger.info(f"--------{now_start}--------GENERATE INITIAL BASE ANSWER---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = agent_a_config.search_request.query

View File

@ -54,7 +54,7 @@ def generate_refined_answer(
) -> RefinedAnswerUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------GENERATE REFINED ANSWER---")
logger.info(f"--------{now_start}--------GENERATE REFINED ANSWER---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
question = agent_a_config.search_request.query
@ -307,8 +307,8 @@ def generate_refined_answer(
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------REFINED ANSWER UPDATE END---"
logger.info(
f"{now_start} -- MAIN - Generate refined answer, Time taken: {now_end - now_start}"
)
return RefinedAnswerUpdate(
@ -317,4 +317,7 @@ def generate_refined_answer(
refined_agent_stats=refined_agent_stats,
agent_refined_end_time=agent_refined_end_time,
agent_refined_metrics=agent_refined_metrics,
log_messages=[
f"{now_start} -- MAIN - Generate refined answer, Time taken: {now_end - now_start}"
],
)

View File

@ -15,7 +15,7 @@ def ingest_refined_answers(
) -> DecompAnswersUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------INGEST FOLLOW UP ANSWERS---")
logger.info(f"--------{now_start}--------INGEST FOLLOW UP ANSWERS---")
documents = []
answer_results = state.answer_results if hasattr(state, "answer_results") else []

View File

@ -16,7 +16,7 @@ def refined_answer_decision(
) -> RequireRefinedAnswerUpdate:
now_start = datetime.now()
logger.debug(f"--------{now_start}--------REFINED ANSWER DECISION---")
logger.info(f"--------{now_start}--------REFINED ANSWER DECISION---")
agent_a_config = cast(AgentSearchConfig, config["metadata"]["config"])
if "?" in agent_a_config.search_request.query:
@ -28,8 +28,8 @@ def refined_answer_decision(
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------REFINED ANSWER DECISION END---"
logger.info(
f"{now_start} -- MAIN - Refined answer decision, Time taken: {now_end - now_start}"
)
log_messages = [
f"{now_start} -- Main - Refined answer decision: {decision}, Time taken: {now_end - now_start}"

View File

@ -46,7 +46,7 @@ def refined_sub_question_creation(
now_start = datetime.now()
logger.debug(f"--------{now_start}--------FOLLOW UP DECOMPOSE---")
logger.info(f"--------{now_start}--------FOLLOW UP DECOMPOSE---")
agent_refined_start_time = datetime.now()
@ -108,8 +108,8 @@ def refined_sub_question_creation(
now_end = datetime.now()
logger.debug(
f"--------{now_end}--{now_end - now_start}--------FOLLOW UP DECOMPOSE END---"
logger.info(
f"{now_start} -- MAIN - Refined sub question creation, Time taken: {now_end - now_start}"
)
return FollowUpSubQuestionsUpdate(

View File

@ -82,7 +82,7 @@ class InitialAnswerUpdate(LoggerUpdate):
agent_base_metrics: AgentBaseMetrics | None = None
class RefinedAnswerUpdate(RefinedAgentEndStats):
class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
refined_answer: str = ""
refined_agent_stats: RefinedAgentStats | None = None
refined_answer_quality: bool = False

View File

@ -214,7 +214,7 @@ if __name__ == "__main__":
# query="When was Washington born?",
# query="What is Onyx?",
# query="What is the difference between astronomy and astrology?",
query="Do a search to tell me what is the difference between astronomy and astrology?",
query="Do a search to tell me hat is the difference between astronomy and astrology?",
)
# Joachim custom persona

View File

@ -105,7 +105,7 @@ except ValueError:
)
AGENT_MAX_ANSWER_CONTEXT_DOCS_OS: int | str = os.environ.get(
"AGENT_MAX_ANSWER_CONTEXT_DOCS", "30"
"AGENT_MAX_ANSWER_CONTEXT_DOCS", "10"
)
try:
@ -116,7 +116,7 @@ except ValueError:
)
AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS: int | str = os.environ.get(
"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "100"
"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "10000"
)
try: