danswer/backend/tests/unit/onyx/llm/test_chat_llm.py

from unittest.mock import patch

import litellm
import pytest
from langchain_core.messages import AIMessage
from langchain_core.messages import AIMessageChunk
from langchain_core.messages import HumanMessage
from litellm.types.utils import ChatCompletionDeltaToolCall
from litellm.types.utils import Delta
from litellm.types.utils import Function as LiteLLMFunction

from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.llm.chat_llm import DefaultMultiLLM


def _create_delta(
    role: str | None = None,
    content: str | None = None,
    tool_calls: list[ChatCompletionDeltaToolCall] | None = None,
) -> Delta:
    delta = Delta(role=role, content=content)
    # NOTE: for some reason, if you pass tool_calls to the constructor, it doesn't actually
    # get set, so we have to do it this way
    delta.tool_calls = tool_calls
    return delta


@pytest.fixture
def default_multi_llm() -> DefaultMultiLLM:
    return DefaultMultiLLM(
        api_key="test_key",
        timeout=30,
        model_provider="openai",
        model_name="gpt-3.5-turbo",
    )


def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None:
    # Mock the litellm.completion function
    with patch("onyx.llm.chat_llm.litellm.completion") as mock_completion:
        # Create a mock response with multiple tool calls using litellm objects
        mock_response = litellm.ModelResponse(
            id="chatcmpl-123",
            choices=[
                litellm.Choices(
                    finish_reason="tool_calls",
                    index=0,
                    message=litellm.Message(
                        content=None,
                        role="assistant",
                        tool_calls=[
                            litellm.ChatCompletionMessageToolCall(
                                id="call_1",
                                function=LiteLLMFunction(
                                    name="get_weather",
                                    arguments='{"location": "New York"}',
                                ),
                                type="function",
                            ),
                            litellm.ChatCompletionMessageToolCall(
                                id="call_2",
                                function=LiteLLMFunction(
                                    name="get_time", arguments='{"timezone": "EST"}'
                                ),
                                type="function",
                            ),
                        ],
                    ),
                )
            ],
            model="gpt-3.5-turbo",
            usage=litellm.Usage(
                prompt_tokens=50, completion_tokens=30, total_tokens=80
            ),
        )
        mock_completion.return_value = mock_response

        # Define input messages
        messages = [HumanMessage(content="What's the weather and time in New York?")]

        # Define available tools
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get the current weather for a location",
                    "parameters": {
                        "type": "object",
                        "properties": {"location": {"type": "string"}},
                        "required": ["location"],
                    },
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_time",
                    "description": "Get the current time for a timezone",
                    "parameters": {
                        "type": "object",
                        "properties": {"timezone": {"type": "string"}},
                        "required": ["timezone"],
                    },
                },
            },
        ]

        # Call the _invoke_implementation method
        result = default_multi_llm.invoke(messages, tools)

        # Assert that the result is an AIMessage
        assert isinstance(result, AIMessage)

        # Assert that the content is None (as per the mock response)
        assert result.content == ""

        # Assert that there are two tool calls
        assert len(result.tool_calls) == 2

        # Assert the details of the first tool call
        assert result.tool_calls[0]["id"] == "call_1"
        assert result.tool_calls[0]["name"] == "get_weather"
        assert result.tool_calls[0]["args"] == {"location": "New York"}

        # Assert the details of the second tool call
        assert result.tool_calls[1]["id"] == "call_2"
        assert result.tool_calls[1]["name"] == "get_time"
        assert result.tool_calls[1]["args"] == {"timezone": "EST"}

        # Verify that litellm.completion was called with the correct arguments
        mock_completion.assert_called_once_with(
            model="openai/gpt-3.5-turbo",
            api_key="test_key",
            base_url=None,
            api_version=None,
            custom_llm_provider=None,
            messages=[
                {"role": "user", "content": "What's the weather and time in New York?"}
            ],
            tools=tools,
            tool_choice=None,
            stream=False,
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            parallel_tool_calls=False,
            mock_response=MOCK_LLM_RESPONSE,
        )


def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> None:
    # Mock the litellm.completion function
    with patch("onyx.llm.chat_llm.litellm.completion") as mock_completion:
        # Create a mock response with multiple tool calls using litellm objects
        mock_response = [
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            role="assistant",
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="call_1",
                                    function=LiteLLMFunction(
                                        name="get_weather", arguments='{"location": '
                                    ),
                                    type="function",
                                    index=0,
                                )
                            ],
                        ),
                        finish_reason=None,
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="",
                                    function=LiteLLMFunction(arguments='"New York"}'),
                                    type="function",
                                    index=0,
                                )
                            ]
                        ),
                        finish_reason=None,
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
            litellm.ModelResponse(
                id="chatcmpl-123",
                choices=[
                    litellm.Choices(
                        delta=_create_delta(
                            tool_calls=[
                                ChatCompletionDeltaToolCall(
                                    id="call_2",
                                    function=LiteLLMFunction(
                                        name="get_time", arguments='{"timezone": "EST"}'
                                    ),
                                    type="function",
                                    index=1,
                                )
                            ]
                        ),
                        finish_reason="tool_calls",
                        index=0,
                    )
                ],
                model="gpt-3.5-turbo",
            ),
        ]
        mock_completion.return_value = mock_response

        # Define input messages and tools (same as in the non-streaming test)
        messages = [HumanMessage(content="What's the weather and time in New York?")]

        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get the current weather for a location",
                    "parameters": {
                        "type": "object",
                        "properties": {"location": {"type": "string"}},
                        "required": ["location"],
                    },
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_time",
                    "description": "Get the current time for a timezone",
                    "parameters": {
                        "type": "object",
                        "properties": {"timezone": {"type": "string"}},
                        "required": ["timezone"],
                    },
                },
            },
        ]

        # Call the stream method
        stream_result = list(default_multi_llm.stream(messages, tools))

        # Assert that we received the correct number of chunks
        assert len(stream_result) == 3

        # Combine all chunks into a single AIMessage
        combined_result: AIMessage = AIMessageChunk(content="")
        for chunk in stream_result:
            combined_result += chunk  # type: ignore

        # Assert that the combined result matches our expectations
        assert isinstance(combined_result, AIMessage)
        assert combined_result.content == ""
        assert len(combined_result.tool_calls) == 2
        assert combined_result.tool_calls[0]["id"] == "call_1"
        assert combined_result.tool_calls[0]["name"] == "get_weather"
        assert combined_result.tool_calls[0]["args"] == {"location": "New York"}
        assert combined_result.tool_calls[1]["id"] == "call_2"
        assert combined_result.tool_calls[1]["name"] == "get_time"
        assert combined_result.tool_calls[1]["args"] == {"timezone": "EST"}

        # Verify that litellm.completion was called with the correct arguments
        mock_completion.assert_called_once_with(
            model="openai/gpt-3.5-turbo",
            api_key="test_key",
            base_url=None,
            api_version=None,
            custom_llm_provider=None,
            messages=[
                {"role": "user", "content": "What's the weather and time in New York?"}
            ],
            tools=tools,
            tool_choice=None,
            stream=True,
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            parallel_tool_calls=False,
            mock_response=MOCK_LLM_RESPONSE,
        )