fixed citations when sections selected (#3914)

* removed some dead code and fixed citations when a search request is made with sections selected

* fix black formatting issue
This commit is contained in:
evan-danswer 2025-02-05 14:16:07 -08:00 committed by GitHub
parent b469a7eff4
commit 29f5f4edfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 12 additions and 48 deletions

View File

@ -7,6 +7,7 @@ from langgraph.types import StreamWriter
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import LlmDoc from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
from onyx.chat.stream_processing.answer_response_handler import ( from onyx.chat.stream_processing.answer_response_handler import (
@ -23,7 +24,7 @@ def process_llm_stream(
should_stream_answer: bool, should_stream_answer: bool,
writer: StreamWriter, writer: StreamWriter,
final_search_results: list[LlmDoc] | None = None, final_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[LlmDoc] | None = None, displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
) -> AIMessageChunk: ) -> AIMessageChunk:
tool_call_chunk = AIMessageChunk(content="") tool_call_chunk = AIMessageChunk(content="")

View File

@ -9,6 +9,7 @@ from onyx.agents.agent_search.basic.states import BasicState
from onyx.agents.agent_search.basic.utils import process_llm_stream from onyx.agents.agent_search.basic.utils import process_llm_stream
from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.models import GraphConfig
from onyx.chat.models import LlmDoc from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContexts
from onyx.tools.tool_implementations.search.search_tool import ( from onyx.tools.tool_implementations.search.search_tool import (
SEARCH_DOC_CONTENT_ID, SEARCH_DOC_CONTENT_ID,
) )
@ -50,13 +51,11 @@ def basic_use_tool_response(
if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID: if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
final_search_results = cast(list[LlmDoc], yield_item.response) final_search_results = cast(list[LlmDoc], yield_item.response)
elif yield_item.id == SEARCH_DOC_CONTENT_ID: elif yield_item.id == SEARCH_DOC_CONTENT_ID:
search_contexts = yield_item.response.contexts search_contexts = cast(OnyxContexts, yield_item.response).contexts
for doc in search_contexts: for doc in search_contexts:
if doc.document_id not in initial_search_results: if doc.document_id not in initial_search_results:
initial_search_results.append(doc) initial_search_results.append(doc)
initial_search_results = cast(list[LlmDoc], initial_search_results)
new_tool_call_chunk = AIMessageChunk(content="") new_tool_call_chunk = AIMessageChunk(content="")
if not agent_config.behavior.skip_gen_ai_answer_generation: if not agent_config.behavior.skip_gen_ai_answer_generation:
stream = llm.stream( stream = llm.stream(
@ -70,7 +69,9 @@ def basic_use_tool_response(
True, True,
writer, writer,
final_search_results=final_search_results, final_search_results=final_search_results,
displayed_search_results=initial_search_results, # when the search tool is called with specific doc ids, initial search
# results are not output. But, we still want i.e. citations to be processed.
displayed_search_results=initial_search_results or final_search_results,
) )
return BasicOutput(tool_call_chunk=new_tool_call_chunk) return BasicOutput(tool_call_chunk=new_tool_call_chunk)

View File

@ -183,6 +183,7 @@ class Answer:
citations_by_subquestion: dict[ citations_by_subquestion: dict[
SubQuestionKey, list[CitationInfo] SubQuestionKey, list[CitationInfo]
] = defaultdict(list) ] = defaultdict(list)
basic_subq_key = SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])
for packet in self.processed_streamed_output: for packet in self.processed_streamed_output:
if isinstance(packet, CitationInfo): if isinstance(packet, CitationInfo):
if packet.level_question_num is not None and packet.level is not None: if packet.level_question_num is not None and packet.level is not None:
@ -192,7 +193,7 @@ class Answer:
) )
].append(packet) ].append(packet)
elif packet.level is None: elif packet.level is None:
citations_by_subquestion[BASIC_SQ_KEY].append(packet) citations_by_subquestion[basic_subq_key].append(packet)
return citations_by_subquestion return citations_by_subquestion
def is_cancelled(self) -> bool: def is_cancelled(self) -> bool:

View File

@ -3,6 +3,7 @@ from collections.abc import Sequence
from pydantic import BaseModel from pydantic import BaseModel
from onyx.chat.models import LlmDoc from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.context.search.models import InferenceChunk from onyx.context.search.models import InferenceChunk
@ -11,7 +12,7 @@ class DocumentIdOrderMapping(BaseModel):
def map_document_id_order( def map_document_id_order(
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True chunks: Sequence[InferenceChunk | LlmDoc | OnyxContext], one_indexed: bool = True
) -> DocumentIdOrderMapping: ) -> DocumentIdOrderMapping:
order_mapping = {} order_mapping = {}
current = 1 if one_indexed else 0 current = 1 if one_indexed else 0

View File

@ -743,7 +743,7 @@ def upload_files_for_chat(
# to re-extract it every time we send a message # to re-extract it every time we send a message
if file_type == ChatFileType.DOC: if file_type == ChatFileType.DOC:
extracted_text = extract_file_text( extracted_text = extract_file_text(
file=file_content_io, # use the bytes we already read file=file_content_io, # use the bytes we already read
file_name=file.filename or "", file_name=file.filename or "",
) )
text_file_id = str(uuid.uuid4()) text_file_id = str(uuid.uuid4())

View File

@ -7,7 +7,6 @@ from typing import cast
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.chat_utils import llm_doc_from_inference_section from onyx.chat.chat_utils import llm_doc_from_inference_section
from onyx.chat.llm_response_handler import LLMCall
from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import ContextualPruningConfig from onyx.chat.models import ContextualPruningConfig
from onyx.chat.models import DocumentPruningConfig from onyx.chat.models import DocumentPruningConfig
@ -371,41 +370,6 @@ class SearchTool(Tool):
prompt_config=self.prompt_config, prompt_config=self.prompt_config,
) )
"""Other utility functions"""
@classmethod
def get_search_result(
cls, llm_call: LLMCall
) -> tuple[list[LlmDoc], list[LlmDoc]] | None:
"""
Returns the final search results and a map of docs to their original search rank (which is what is displayed to user)
"""
if not llm_call.tool_call_info:
return None
final_search_results = []
initial_search_results = []
for yield_item in llm_call.tool_call_info:
if (
isinstance(yield_item, ToolResponse)
and yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID
):
final_search_results = cast(list[LlmDoc], yield_item.response)
elif (
isinstance(yield_item, ToolResponse)
and yield_item.id == SEARCH_DOC_CONTENT_ID
):
search_contexts = yield_item.response.contexts
# original_doc_search_rank = 1
for doc in search_contexts:
if doc.document_id not in initial_search_results:
initial_search_results.append(doc)
initial_search_results = cast(list[LlmDoc], initial_search_results)
return final_search_results, initial_search_results
# Allows yielding the same responses as a SearchTool without being a SearchTool. # Allows yielding the same responses as a SearchTool without being a SearchTool.
# SearchTool passed in to allow for access to SearchTool properties. # SearchTool passed in to allow for access to SearchTool properties.

View File

@ -1,6 +1,5 @@
import json import json
import pytest
import requests import requests
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
@ -66,9 +65,6 @@ def test_send_message_simple_with_history(reset: None) -> None:
assert found_doc["metadata"]["document_id"] == doc.id assert found_doc["metadata"]["document_id"] == doc.id
@pytest.mark.xfail(
reason="agent search broke this",
)
def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None: def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None:
# Creating an admin user (first user created is automatically an admin) # Creating an admin user (first user created is automatically an admin)
admin_user: DATestUser = UserManager.create(name="admin_user") admin_user: DATestUser = UserManager.create(name="admin_user")