danswer/backend/onyx/chat/answer.py

from collections.abc import Callable
from collections.abc import Iterator
from uuid import uuid4

from langchain.schema.messages import BaseMessage
from langchain_core.messages import AIMessageChunk
from langchain_core.messages import ToolCall

from onyx.chat.llm_response_handler import LLMResponseHandlerManager
from onyx.chat.models import AnswerQuestionPossibleReturn
from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import CitationInfo
from onyx.chat.models import OnyxAnswerPiece
from onyx.chat.models import PromptConfig
from onyx.chat.prompt_builder.build import AnswerPromptBuilder
from onyx.chat.prompt_builder.build import default_build_system_message
from onyx.chat.prompt_builder.build import default_build_user_message
from onyx.chat.prompt_builder.build import LLMCall
from onyx.chat.stream_processing.answer_response_handler import (
    CitationResponseHandler,
)
from onyx.chat.stream_processing.answer_response_handler import (
    DummyAnswerResponseHandler,
)
from onyx.chat.stream_processing.utils import (
    map_document_id_order,
)
from onyx.chat.tool_handling.tool_response_handler import ToolResponseHandler
from onyx.file_store.utils import InMemoryChatFile
from onyx.llm.interfaces import LLM
from onyx.llm.models import PreviousMessage
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.tools.force import ForceUseTool
from onyx.tools.models import ToolResponse
from onyx.tools.tool import Tool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_runner import ToolCallKickoff
from onyx.tools.utils import explicit_tool_calling_supported
from onyx.utils.logger import setup_logger


logger = setup_logger()


AnswerStream = Iterator[AnswerQuestionPossibleReturn | ToolCallKickoff | ToolResponse]


class Answer:
    def __init__(
        self,
        question: str,
        answer_style_config: AnswerStyleConfig,
        llm: LLM,
        prompt_config: PromptConfig,
        force_use_tool: ForceUseTool,
        # must be the same length as `docs`. If None, all docs are considered "relevant"
        message_history: list[PreviousMessage] | None = None,
        single_message_history: str | None = None,
        # newly passed in files to include as part of this question
        # TODO THIS NEEDS TO BE HANDLED
        latest_query_files: list[InMemoryChatFile] | None = None,
        files: list[InMemoryChatFile] | None = None,
        tools: list[Tool] | None = None,
        # NOTE: for native tool-calling, this is only supported by OpenAI atm,
        #       but we only support them anyways
        # if set to True, then never use the LLMs provided tool-calling functonality
        skip_explicit_tool_calling: bool = False,
        # Returns the full document sections text from the search tool
        return_contexts: bool = False,
        skip_gen_ai_answer_generation: bool = False,
        is_connected: Callable[[], bool] | None = None,
    ) -> None:
        if single_message_history and message_history:
            raise ValueError(
                "Cannot provide both `message_history` and `single_message_history`"
            )

        self.question = question
        self.is_connected: Callable[[], bool] | None = is_connected

        self.latest_query_files = latest_query_files or []
        self.file_id_to_file = {file.file_id: file for file in (files or [])}

        self.tools = tools or []
        self.force_use_tool = force_use_tool

        self.message_history = message_history or []
        # used for QA flow where we only want to send a single message
        self.single_message_history = single_message_history

        self.answer_style_config = answer_style_config
        self.prompt_config = prompt_config

        self.llm = llm
        self.llm_tokenizer = get_tokenizer(
            provider_type=llm.config.model_provider,
            model_name=llm.config.model_name,
        )

        self._final_prompt: list[BaseMessage] | None = None

        self._streamed_output: list[str] | None = None
        self._processed_stream: (
            list[AnswerQuestionPossibleReturn | ToolResponse | ToolCallKickoff] | None
        ) = None

        self._return_contexts = return_contexts
        self.skip_gen_ai_answer_generation = skip_gen_ai_answer_generation
        self._is_cancelled = False

        self.using_tool_calling_llm = (
            explicit_tool_calling_supported(
                self.llm.config.model_provider, self.llm.config.model_name
            )
            and not skip_explicit_tool_calling
        )

    def _get_tools_list(self) -> list[Tool]:
        if not self.force_use_tool.force_use:
            return self.tools

        tool = next(
            (t for t in self.tools if t.name == self.force_use_tool.tool_name), None
        )
        if tool is None:
            raise RuntimeError(f"Tool '{self.force_use_tool.tool_name}' not found")

        logger.info(
            f"Forcefully using tool='{tool.name}'"
            + (
                f" with args='{self.force_use_tool.args}'"
                if self.force_use_tool.args is not None
                else ""
            )
        )
        return [tool]

    def _handle_specified_tool_call(
        self, llm_calls: list[LLMCall], tool: Tool, tool_args: dict
    ) -> AnswerStream:
        current_llm_call = llm_calls[-1]

        # make a dummy tool handler
        tool_handler = ToolResponseHandler([tool])

        dummy_tool_call_chunk = AIMessageChunk(content="")
        dummy_tool_call_chunk.tool_calls = [
            ToolCall(name=tool.name, args=tool_args, id=str(uuid4()))
        ]

        response_handler_manager = LLMResponseHandlerManager(
            tool_handler, DummyAnswerResponseHandler(), self.is_cancelled
        )
        yield from response_handler_manager.handle_llm_response(
            iter([dummy_tool_call_chunk])
        )

        new_llm_call = response_handler_manager.next_llm_call(current_llm_call)
        if new_llm_call:
            yield from self._get_response(llm_calls + [new_llm_call])
        else:
            raise RuntimeError("Tool call handler did not return a new LLM call")

    def _get_response(self, llm_calls: list[LLMCall]) -> AnswerStream:
        current_llm_call = llm_calls[-1]

        # handle the case where no decision has to be made; we simply run the tool
        if (
            current_llm_call.force_use_tool.force_use
            and current_llm_call.force_use_tool.args is not None
        ):
            tool_name, tool_args = (
                current_llm_call.force_use_tool.tool_name,
                current_llm_call.force_use_tool.args,
            )
            tool = next(
                (t for t in current_llm_call.tools if t.name == tool_name), None
            )
            if not tool:
                raise RuntimeError(f"Tool '{tool_name}' not found")

            yield from self._handle_specified_tool_call(llm_calls, tool, tool_args)
            return

        # special pre-logic for non-tool calling LLM case
        if not self.using_tool_calling_llm and current_llm_call.tools:
            chosen_tool_and_args = (
                ToolResponseHandler.get_tool_call_for_non_tool_calling_llm(
                    current_llm_call, self.llm
                )
            )
            if chosen_tool_and_args:
                tool, tool_args = chosen_tool_and_args
                yield from self._handle_specified_tool_call(llm_calls, tool, tool_args)
                return

        # if we're skipping gen ai answer generation, we should break
        # out unless we're forcing a tool call. If we don't, we might generate an
        # answer, which is a no-no!
        if (
            self.skip_gen_ai_answer_generation
            and not current_llm_call.force_use_tool.force_use
        ):
            return

        # set up "handlers" to listen to the LLM response stream and
        # feed back the processed results + handle tool call requests
        # + figure out what the next LLM call should be
        tool_call_handler = ToolResponseHandler(current_llm_call.tools)

        final_search_results, displayed_search_results = SearchTool.get_search_result(
            current_llm_call
        ) or ([], [])

        # Quotes are no longer supported
        # answer_handler: AnswerResponseHandler
        # if self.answer_style_config.citation_config:
        #     answer_handler = CitationResponseHandler(
        #         context_docs=search_result,
        #         doc_id_to_rank_map=map_document_id_order(search_result),
        #     )
        # elif self.answer_style_config.quotes_config:
        #     answer_handler = QuotesResponseHandler(
        #         context_docs=search_result,
        #     )
        # else:
        #     raise ValueError("No answer style config provided")
        answer_handler = CitationResponseHandler(
            context_docs=final_search_results,
            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
        )

        response_handler_manager = LLMResponseHandlerManager(
            tool_call_handler, answer_handler, self.is_cancelled
        )

        # DEBUG: good breakpoint
        stream = self.llm.stream(
            # For tool calling LLMs, we want to insert the task prompt as part of this flow, this is because the LLM
            # may choose to not call any tools and just generate the answer, in which case the task prompt is needed.
            prompt=current_llm_call.prompt_builder.build(),
            tools=[tool.tool_definition() for tool in current_llm_call.tools] or None,
            tool_choice=(
                "required"
                if current_llm_call.tools and current_llm_call.force_use_tool.force_use
                else None
            ),
            structured_response_format=self.answer_style_config.structured_response_format,
        )
        yield from response_handler_manager.handle_llm_response(stream)

        new_llm_call = response_handler_manager.next_llm_call(current_llm_call)
        if new_llm_call:
            yield from self._get_response(llm_calls + [new_llm_call])

    @property
    def processed_streamed_output(self) -> AnswerStream:
        if self._processed_stream is not None:
            yield from self._processed_stream
            return

        prompt_builder = AnswerPromptBuilder(
            user_message=default_build_user_message(
                user_query=self.question,
                prompt_config=self.prompt_config,
                files=self.latest_query_files,
            ),
            message_history=self.message_history,
            llm_config=self.llm.config,
            single_message_history=self.single_message_history,
            raw_user_text=self.question,
        )
        prompt_builder.update_system_prompt(
            default_build_system_message(self.prompt_config)
        )
        llm_call = LLMCall(
            prompt_builder=prompt_builder,
            tools=self._get_tools_list(),
            force_use_tool=self.force_use_tool,
            files=self.latest_query_files,
            tool_call_info=[],
            using_tool_calling_llm=self.using_tool_calling_llm,
        )

        processed_stream = []
        for processed_packet in self._get_response([llm_call]):
            processed_stream.append(processed_packet)
            yield processed_packet

        self._processed_stream = processed_stream

    @property
    def llm_answer(self) -> str:
        answer = ""
        for packet in self.processed_streamed_output:
            if isinstance(packet, OnyxAnswerPiece) and packet.answer_piece:
                answer += packet.answer_piece

        return answer

    @property
    def citations(self) -> list[CitationInfo]:
        citations: list[CitationInfo] = []
        for packet in self.processed_streamed_output:
            if isinstance(packet, CitationInfo):
                citations.append(packet)

        return citations

    def is_cancelled(self) -> bool:
        if self._is_cancelled:
            return True

        if self.is_connected is not None:
            if not self.is_connected():
                logger.debug("Answer stream has been cancelled")
            self._is_cancelled = not self.is_connected()

        return self._is_cancelled