E2e assistant tests (#3869)

* adding llm override logic * update * general cleanup * fix various tests * rm * update * update * better comments * k * k * update to pass tests * clarify content * improve timeout
2025-09-18 19:43:26 +02:00 · 2025-02-01 12:05:53 -08:00
parent a82cac5361
commit 3c34ddcc4f
23 changed files with 405 additions and 76 deletions
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -617,3 +617,8 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

 TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"
+
+# Set to true to mock LLM responses for testing purposes
+MOCK_LLM_RESPONSE = (
+    os.environ.get("MOCK_LLM_RESPONSE") if os.environ.get("MOCK_LLM_RESPONSE") else None
+)
--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -26,6 +26,7 @@ from langchain_core.messages.tool import ToolMessage
 from langchain_core.prompt_values import PromptValue

 from onyx.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS
+from onyx.configs.app_configs import MOCK_LLM_RESPONSE
 from onyx.configs.model_configs import (
    DISABLE_LITELLM_STREAMING,
 )
@@ -387,6 +388,7 @@ class DefaultMultiLLM(LLM):

        try:
            return litellm.completion(
+                mock_response=MOCK_LLM_RESPONSE,
                # model choice
                model=f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
                # NOTE: have to pass in None instead of empty string for these
--- a/backend/onyx/setup.py
+++ b/backend/onyx/setup.py
@@ -37,6 +37,7 @@ from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.models import IndexingSetting
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
+from onyx.llm.llm_provider_options import OPEN_AI_MODEL_NAMES
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
 from onyx.natural_language_processing.search_nlp_models import warm_up_cross_encoder
@@ -279,6 +280,7 @@ def setup_postgres(db_session: Session) -> None:
    if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
        # Only for dev flows
        logger.notice("Setting up default OpenAI LLM for dev.")
+
        llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini"
        fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini"
        model_req = LLMProviderUpsertRequest(
@@ -292,8 +294,8 @@ def setup_postgres(db_session: Session) -> None:
            fast_default_model_name=fast_model,
            is_public=True,
            groups=[],
-            display_model_names=[llm_model, fast_model],
-            model_names=[llm_model, fast_model],
+            display_model_names=OPEN_AI_MODEL_NAMES,
+            model_names=OPEN_AI_MODEL_NAMES,
        )
        new_llm_provider = upsert_llm_provider(
            llm_provider=model_req, db_session=db_session
--- a/backend/tests/unit/onyx/llm/test_chat_llm.py
+++ b/backend/tests/unit/onyx/llm/test_chat_llm.py
@@ -9,6 +9,7 @@ from litellm.types.utils import ChatCompletionDeltaToolCall
 from litellm.types.utils import Delta
 from litellm.types.utils import Function as LiteLLMFunction

+from onyx.configs.app_configs import MOCK_LLM_RESPONSE
 from onyx.llm.chat_llm import DefaultMultiLLM


@@ -143,6 +144,7 @@ def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None:
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            parallel_tool_calls=False,
+            mock_response=MOCK_LLM_RESPONSE,
        )


@@ -287,4 +289,5 @@ def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> No
            temperature=0.0,  # Default value from GEN_AI_TEMPERATURE
            timeout=30,
            parallel_tool_calls=False,
+            mock_response=MOCK_LLM_RESPONSE,
        )