mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-30 04:31:49 +02:00
fixed citations when sections selected (#3914)
* removed some dead code and fixed citations when a search request is made with sections selected * fix black formatting issue
This commit is contained in:
parent
b469a7eff4
commit
29f5f4edfa
@ -7,6 +7,7 @@ from langgraph.types import StreamWriter
|
|||||||
|
|
||||||
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
|
||||||
from onyx.chat.models import LlmDoc
|
from onyx.chat.models import LlmDoc
|
||||||
|
from onyx.chat.models import OnyxContext
|
||||||
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
|
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
|
||||||
from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
|
from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
|
||||||
from onyx.chat.stream_processing.answer_response_handler import (
|
from onyx.chat.stream_processing.answer_response_handler import (
|
||||||
@ -23,7 +24,7 @@ def process_llm_stream(
|
|||||||
should_stream_answer: bool,
|
should_stream_answer: bool,
|
||||||
writer: StreamWriter,
|
writer: StreamWriter,
|
||||||
final_search_results: list[LlmDoc] | None = None,
|
final_search_results: list[LlmDoc] | None = None,
|
||||||
displayed_search_results: list[LlmDoc] | None = None,
|
displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
|
||||||
) -> AIMessageChunk:
|
) -> AIMessageChunk:
|
||||||
tool_call_chunk = AIMessageChunk(content="")
|
tool_call_chunk = AIMessageChunk(content="")
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ from onyx.agents.agent_search.basic.states import BasicState
|
|||||||
from onyx.agents.agent_search.basic.utils import process_llm_stream
|
from onyx.agents.agent_search.basic.utils import process_llm_stream
|
||||||
from onyx.agents.agent_search.models import GraphConfig
|
from onyx.agents.agent_search.models import GraphConfig
|
||||||
from onyx.chat.models import LlmDoc
|
from onyx.chat.models import LlmDoc
|
||||||
|
from onyx.chat.models import OnyxContexts
|
||||||
from onyx.tools.tool_implementations.search.search_tool import (
|
from onyx.tools.tool_implementations.search.search_tool import (
|
||||||
SEARCH_DOC_CONTENT_ID,
|
SEARCH_DOC_CONTENT_ID,
|
||||||
)
|
)
|
||||||
@ -50,13 +51,11 @@ def basic_use_tool_response(
|
|||||||
if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
|
if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
|
||||||
final_search_results = cast(list[LlmDoc], yield_item.response)
|
final_search_results = cast(list[LlmDoc], yield_item.response)
|
||||||
elif yield_item.id == SEARCH_DOC_CONTENT_ID:
|
elif yield_item.id == SEARCH_DOC_CONTENT_ID:
|
||||||
search_contexts = yield_item.response.contexts
|
search_contexts = cast(OnyxContexts, yield_item.response).contexts
|
||||||
for doc in search_contexts:
|
for doc in search_contexts:
|
||||||
if doc.document_id not in initial_search_results:
|
if doc.document_id not in initial_search_results:
|
||||||
initial_search_results.append(doc)
|
initial_search_results.append(doc)
|
||||||
|
|
||||||
initial_search_results = cast(list[LlmDoc], initial_search_results)
|
|
||||||
|
|
||||||
new_tool_call_chunk = AIMessageChunk(content="")
|
new_tool_call_chunk = AIMessageChunk(content="")
|
||||||
if not agent_config.behavior.skip_gen_ai_answer_generation:
|
if not agent_config.behavior.skip_gen_ai_answer_generation:
|
||||||
stream = llm.stream(
|
stream = llm.stream(
|
||||||
@ -70,7 +69,9 @@ def basic_use_tool_response(
|
|||||||
True,
|
True,
|
||||||
writer,
|
writer,
|
||||||
final_search_results=final_search_results,
|
final_search_results=final_search_results,
|
||||||
displayed_search_results=initial_search_results,
|
# when the search tool is called with specific doc ids, initial search
|
||||||
|
# results are not output. But, we still want i.e. citations to be processed.
|
||||||
|
displayed_search_results=initial_search_results or final_search_results,
|
||||||
)
|
)
|
||||||
|
|
||||||
return BasicOutput(tool_call_chunk=new_tool_call_chunk)
|
return BasicOutput(tool_call_chunk=new_tool_call_chunk)
|
||||||
|
@ -183,6 +183,7 @@ class Answer:
|
|||||||
citations_by_subquestion: dict[
|
citations_by_subquestion: dict[
|
||||||
SubQuestionKey, list[CitationInfo]
|
SubQuestionKey, list[CitationInfo]
|
||||||
] = defaultdict(list)
|
] = defaultdict(list)
|
||||||
|
basic_subq_key = SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])
|
||||||
for packet in self.processed_streamed_output:
|
for packet in self.processed_streamed_output:
|
||||||
if isinstance(packet, CitationInfo):
|
if isinstance(packet, CitationInfo):
|
||||||
if packet.level_question_num is not None and packet.level is not None:
|
if packet.level_question_num is not None and packet.level is not None:
|
||||||
@ -192,7 +193,7 @@ class Answer:
|
|||||||
)
|
)
|
||||||
].append(packet)
|
].append(packet)
|
||||||
elif packet.level is None:
|
elif packet.level is None:
|
||||||
citations_by_subquestion[BASIC_SQ_KEY].append(packet)
|
citations_by_subquestion[basic_subq_key].append(packet)
|
||||||
return citations_by_subquestion
|
return citations_by_subquestion
|
||||||
|
|
||||||
def is_cancelled(self) -> bool:
|
def is_cancelled(self) -> bool:
|
||||||
|
@ -3,6 +3,7 @@ from collections.abc import Sequence
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from onyx.chat.models import LlmDoc
|
from onyx.chat.models import LlmDoc
|
||||||
|
from onyx.chat.models import OnyxContext
|
||||||
from onyx.context.search.models import InferenceChunk
|
from onyx.context.search.models import InferenceChunk
|
||||||
|
|
||||||
|
|
||||||
@ -11,7 +12,7 @@ class DocumentIdOrderMapping(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
def map_document_id_order(
|
def map_document_id_order(
|
||||||
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
|
chunks: Sequence[InferenceChunk | LlmDoc | OnyxContext], one_indexed: bool = True
|
||||||
) -> DocumentIdOrderMapping:
|
) -> DocumentIdOrderMapping:
|
||||||
order_mapping = {}
|
order_mapping = {}
|
||||||
current = 1 if one_indexed else 0
|
current = 1 if one_indexed else 0
|
||||||
|
@ -743,7 +743,7 @@ def upload_files_for_chat(
|
|||||||
# to re-extract it every time we send a message
|
# to re-extract it every time we send a message
|
||||||
if file_type == ChatFileType.DOC:
|
if file_type == ChatFileType.DOC:
|
||||||
extracted_text = extract_file_text(
|
extracted_text = extract_file_text(
|
||||||
file=file_content_io, # use the bytes we already read
|
file=file_content_io, # use the bytes we already read
|
||||||
file_name=file.filename or "",
|
file_name=file.filename or "",
|
||||||
)
|
)
|
||||||
text_file_id = str(uuid.uuid4())
|
text_file_id = str(uuid.uuid4())
|
||||||
|
@ -7,7 +7,6 @@ from typing import cast
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from onyx.chat.chat_utils import llm_doc_from_inference_section
|
from onyx.chat.chat_utils import llm_doc_from_inference_section
|
||||||
from onyx.chat.llm_response_handler import LLMCall
|
|
||||||
from onyx.chat.models import AnswerStyleConfig
|
from onyx.chat.models import AnswerStyleConfig
|
||||||
from onyx.chat.models import ContextualPruningConfig
|
from onyx.chat.models import ContextualPruningConfig
|
||||||
from onyx.chat.models import DocumentPruningConfig
|
from onyx.chat.models import DocumentPruningConfig
|
||||||
@ -371,41 +370,6 @@ class SearchTool(Tool):
|
|||||||
prompt_config=self.prompt_config,
|
prompt_config=self.prompt_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
"""Other utility functions"""
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_search_result(
|
|
||||||
cls, llm_call: LLMCall
|
|
||||||
) -> tuple[list[LlmDoc], list[LlmDoc]] | None:
|
|
||||||
"""
|
|
||||||
Returns the final search results and a map of docs to their original search rank (which is what is displayed to user)
|
|
||||||
"""
|
|
||||||
if not llm_call.tool_call_info:
|
|
||||||
return None
|
|
||||||
|
|
||||||
final_search_results = []
|
|
||||||
initial_search_results = []
|
|
||||||
|
|
||||||
for yield_item in llm_call.tool_call_info:
|
|
||||||
if (
|
|
||||||
isinstance(yield_item, ToolResponse)
|
|
||||||
and yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID
|
|
||||||
):
|
|
||||||
final_search_results = cast(list[LlmDoc], yield_item.response)
|
|
||||||
elif (
|
|
||||||
isinstance(yield_item, ToolResponse)
|
|
||||||
and yield_item.id == SEARCH_DOC_CONTENT_ID
|
|
||||||
):
|
|
||||||
search_contexts = yield_item.response.contexts
|
|
||||||
# original_doc_search_rank = 1
|
|
||||||
for doc in search_contexts:
|
|
||||||
if doc.document_id not in initial_search_results:
|
|
||||||
initial_search_results.append(doc)
|
|
||||||
|
|
||||||
initial_search_results = cast(list[LlmDoc], initial_search_results)
|
|
||||||
|
|
||||||
return final_search_results, initial_search_results
|
|
||||||
|
|
||||||
|
|
||||||
# Allows yielding the same responses as a SearchTool without being a SearchTool.
|
# Allows yielding the same responses as a SearchTool without being a SearchTool.
|
||||||
# SearchTool passed in to allow for access to SearchTool properties.
|
# SearchTool passed in to allow for access to SearchTool properties.
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
import pytest
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from onyx.configs.constants import MessageType
|
from onyx.configs.constants import MessageType
|
||||||
@ -66,9 +65,6 @@ def test_send_message_simple_with_history(reset: None) -> None:
|
|||||||
assert found_doc["metadata"]["document_id"] == doc.id
|
assert found_doc["metadata"]["document_id"] == doc.id
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail(
|
|
||||||
reason="agent search broke this",
|
|
||||||
)
|
|
||||||
def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None:
|
def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None:
|
||||||
# Creating an admin user (first user created is automatically an admin)
|
# Creating an admin user (first user created is automatically an admin)
|
||||||
admin_user: DATestUser = UserManager.create(name="admin_user")
|
admin_user: DATestUser = UserManager.create(name="admin_user")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user