E2e assistant tests (#3869)

* adding llm override logic

* update

* general cleanup

* fix various tests

* rm

* update

* update

* better comments

* k

* k

* update to pass tests

* clarify content

* improve timeout
This commit is contained in:
pablonyx 2025-02-01 12:05:53 -08:00 committed by GitHub
parent a82cac5361
commit 3c34ddcc4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 405 additions and 76 deletions

View File

@ -8,6 +8,8 @@ on: push
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MOCK_LLM_RESPONSE: true
jobs:
playwright-tests:

View File

@ -617,3 +617,8 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"
TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"
# Set to true to mock LLM responses for testing purposes
MOCK_LLM_RESPONSE = (
os.environ.get("MOCK_LLM_RESPONSE") if os.environ.get("MOCK_LLM_RESPONSE") else None
)

View File

@ -26,6 +26,7 @@ from langchain_core.messages.tool import ToolMessage
from langchain_core.prompt_values import PromptValue
from onyx.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.configs.model_configs import (
DISABLE_LITELLM_STREAMING,
)
@ -387,6 +388,7 @@ class DefaultMultiLLM(LLM):
try:
return litellm.completion(
mock_response=MOCK_LLM_RESPONSE,
# model choice
model=f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
# NOTE: have to pass in None instead of empty string for these

View File

@ -37,6 +37,7 @@ from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.llm.llm_provider_options import OPEN_AI_MODEL_NAMES
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.natural_language_processing.search_nlp_models import warm_up_cross_encoder
@ -279,6 +280,7 @@ def setup_postgres(db_session: Session) -> None:
if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
# Only for dev flows
logger.notice("Setting up default OpenAI LLM for dev.")
llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini"
fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini"
model_req = LLMProviderUpsertRequest(
@ -292,8 +294,8 @@ def setup_postgres(db_session: Session) -> None:
fast_default_model_name=fast_model,
is_public=True,
groups=[],
display_model_names=[llm_model, fast_model],
model_names=[llm_model, fast_model],
display_model_names=OPEN_AI_MODEL_NAMES,
model_names=OPEN_AI_MODEL_NAMES,
)
new_llm_provider = upsert_llm_provider(
llm_provider=model_req, db_session=db_session

View File

@ -9,6 +9,7 @@ from litellm.types.utils import ChatCompletionDeltaToolCall
from litellm.types.utils import Delta
from litellm.types.utils import Function as LiteLLMFunction
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.llm.chat_llm import DefaultMultiLLM
@ -143,6 +144,7 @@ def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None:
temperature=0.0, # Default value from GEN_AI_TEMPERATURE
timeout=30,
parallel_tool_calls=False,
mock_response=MOCK_LLM_RESPONSE,
)
@ -287,4 +289,5 @@ def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> No
temperature=0.0, # Default value from GEN_AI_TEMPERATURE
timeout=30,
parallel_tool_calls=False,
mock_response=MOCK_LLM_RESPONSE,
)

View File

@ -2,12 +2,13 @@ import { defineConfig, devices } from "@playwright/test";
export default defineConfig({
globalSetup: require.resolve("./tests/e2e/global-setup"),
timeout: 600000, // 10 minutes timeout
projects: [
{
name: "admin",
use: {
...devices["Desktop Chrome"],
viewport: { width: 1280, height: 720 },
storageState: "admin_auth.json",
},
testIgnore: ["**/codeUtils.test.ts"],

View File

@ -720,7 +720,6 @@ export function AssistantEditor({
name="description"
label="Description"
placeholder="Use this Assistant to help draft professional emails"
data-testid="assistant-description-input"
className="[&_input]:placeholder:text-text-muted/50"
/>

View File

@ -4,7 +4,7 @@ import { OnyxIcon } from "@/components/icons/icons";
export function ChatIntro({ selectedPersona }: { selectedPersona: Persona }) {
return (
<div className="flex flex-col items-center gap-6">
<div data-testid="chat-intro" className="flex flex-col items-center gap-6">
<div className="relative flex flex-col gap-y-4 w-fit mx-auto justify-center">
<div className="absolute z-10 items-center flex -left-12 top-1/2 -translate-y-1/2">
<AssistantIcon size={36} assistant={selectedPersona} />

View File

@ -297,6 +297,7 @@ export function ChatPage({
// 2. Selected assistant (assistnat default in this chat session)
// 3. First pinned assistants (ordered list of pinned assistants)
// 4. Available assistants (ordered list of available assistants)
// Relevant test: `live_assistant.spec.ts`
const liveAssistant: Persona | undefined = useMemo(
() =>
alternativeAssistant ||

View File

@ -478,6 +478,7 @@ export function ChatInputBar({
onKeyDownCapture={handleKeyDown}
onChange={handleInputChange}
ref={textAreaRef}
id="onyx-chat-input-textarea"
className={`
m-0
w-full
@ -703,6 +704,7 @@ export function ChatInputBar({
</div>
<div className="flex my-auto">
<button
id="onyx-chat-input-send-button"
className={`cursor-pointer ${
chatState == "streaming" ||
chatState == "toolBuilding" ||

View File

@ -91,7 +91,10 @@ export default function LLMPopover({
return (
<Popover open={isOpen} onOpenChange={setIsOpen}>
<PopoverTrigger asChild>
<button className="focus:outline-none">
<button
className="focus:outline-none"
data-testid="llm-popover-trigger"
>
<ChatInputOption
minimize
toggle

View File

@ -402,7 +402,7 @@ export const AIMessage = ({
return (
<div
id="onyx-ai-message"
id={isComplete ? "onyx-ai-message" : undefined}
ref={trackedElementRef}
className={`py-5 ml-4 lg:px-5 relative flex `}
>

View File

@ -129,6 +129,7 @@ const SortableAssistant: React.FC<SortableAssistantProps> = ({
className="w-3 ml-[2px] mr-[2px] group-hover:visible invisible flex-none cursor-grab"
/>
<button
data-testid={`assistant-[${assistant.id}]`}
onClick={(e) => {
e.preventDefault();
if (!isDragging) {

View File

@ -133,6 +133,7 @@ export function UserDropdown({
onOpenChange={onOpenChange}
content={
<div
id="onyx-user-dropdown"
onClick={() => setUserInfoVisible(!userInfoVisible)}
className="flex relative cursor-pointer"
>

View File

@ -368,6 +368,13 @@ export interface LlmOverrideManager {
liveAssistant: Persona | null;
}
// Things to test
// 1. User override
// 2. User preference (defaults to system wide default if no preference set)
// 3. Current assistant
// 4. Current chat session
// 5. Live assistant
/*
LLM Override is as follows (i.e. this order)
- User override (explicitly set in the chat input bar)
@ -386,6 +393,8 @@ Changes take place as
- (uploadLLMOverride) User explicitly setting a model override (and we explicitly override and set the userSpecifiedOverride which we'll use in place of the user preferences unless overridden by an assistant)
If we have a live assistant, we should use that model override
Relevant test: `llm_ordering.spec.ts`.
*/
export function useLlmOverride(

View File

@ -1,54 +0,0 @@
import { test, expect } from "@playwright/test";
// Use pre-signed in "admin" storage state
test.use({
storageState: "admin_auth.json",
});
test("Chat workflow", async ({ page }) => {
// Initial setup
await page.goto("http://localhost:3000/chat", { timeout: 3000 });
// Interact with Art assistant
await page.locator("button").filter({ hasText: "Art" }).click();
await page.getByPlaceholder("Message Art assistant...").fill("Hi");
await page.keyboard.press("Enter");
await page.waitForTimeout(3000);
// Start a new chat
await page.getByRole("link", { name: "Start New Chat" }).click();
await page.waitForNavigation({ waitUntil: "networkidle" });
// Check for expected text
await expect(page.getByText("Assistant for generating")).toBeVisible();
// Interact with General assistant
await page.locator("button").filter({ hasText: "General" }).click();
// Check URL after clicking General assistant
await expect(page).toHaveURL("http://localhost:3000/chat?assistantId=-1", {
timeout: 5000,
});
// Create a new assistant
await page.getByRole("button", { name: "Explore Assistants" }).click();
await page.getByRole("button", { name: "Create" }).click();
await page.getByTestId("name").click();
await page.getByTestId("name").fill("Test Assistant");
await page.getByTestId("description").click();
await page.getByTestId("description").fill("Test Assistant Description");
await page.getByTestId("system_prompt").click();
await page.getByTestId("system_prompt").fill("Test Assistant Instructions");
await page.getByRole("button", { name: "Create" }).click();
// Verify new assistant creation
await expect(page.getByText("Test Assistant Description")).toBeVisible({
timeout: 5000,
});
// Start another new chat
await page.getByRole("link", { name: "Start New Chat" }).click();
await expect(page.getByText("Assistant with access to")).toBeVisible({
timeout: 5000,
});
});

View File

@ -0,0 +1,54 @@
import { test, expect } from "@playwright/test";
import { dragElementAbove, dragElementBelow } from "../utils/dragUtils";
import { loginAsRandomUser } from "../utils/auth";
test("Assistant Drag and Drop", async ({ page }) => {
await page.context().clearCookies();
await loginAsRandomUser(page);
// Navigate to the chat page
await page.goto("http://localhost:3000/chat");
// Helper function to get the current order of assistants
const getAssistantOrder = async () => {
const assistants = await page.$$('[data-testid^="assistant-["]');
return Promise.all(
assistants.map(async (assistant) => {
const nameElement = await assistant.$("p");
return nameElement ? nameElement.textContent() : "";
})
);
};
// Get the initial order
const initialOrder = await getAssistantOrder();
// Drag second assistant above first
const secondAssistant = page.locator('[data-testid^="assistant-["]').nth(1);
const firstAssistant = page.locator('[data-testid^="assistant-["]').nth(0);
await dragElementAbove(secondAssistant, firstAssistant, page);
// Check new order
const orderAfterDragUp = await getAssistantOrder();
expect(orderAfterDragUp[0]).toBe(initialOrder[1]);
expect(orderAfterDragUp[1]).toBe(initialOrder[0]);
// Drag last assistant to second position
const assistants = page.locator('[data-testid^="assistant-["]');
const lastIndex = (await assistants.count()) - 1;
const lastAssistant = assistants.nth(lastIndex);
const secondPosition = assistants.nth(1);
await page.waitForTimeout(3000);
await dragElementBelow(lastAssistant, secondPosition, page);
// Check new order
const orderAfterDragDown = await getAssistantOrder();
expect(orderAfterDragDown[1]).toBe(initialOrder[lastIndex]);
// Refresh and verify order
await page.reload();
const orderAfterRefresh = await getAssistantOrder();
expect(orderAfterRefresh).toEqual(orderAfterDragDown);
});

View File

@ -0,0 +1,70 @@
import { test, expect } from "@playwright/test";
import { loginAsRandomUser } from "../utils/auth";
import {
navigateToAssistantInHistorySidebar,
sendMessage,
startNewChat,
switchModel,
} from "../utils/chatActions";
test("Chat workflow", async ({ page }) => {
// Clear cookies and log in as a random user
await page.context().clearCookies();
await loginAsRandomUser(page);
// Navigate to the chat page
await page.goto("http://localhost:3000/chat");
// Test interaction with the Art assistant
await navigateToAssistantInHistorySidebar(
page,
"[-3]",
"Assistant for generating"
);
await sendMessage(page, "Hi");
// Start a new chat session
await startNewChat(page);
// Verify the presence of the expected text
await expect(page.getByText("Assistant for generating")).toBeVisible();
// Test interaction with the General assistant
await navigateToAssistantInHistorySidebar(
page,
"[-1]",
"Assistant with no search"
);
// Verify the URL after selecting the General assistant
await expect(page).toHaveURL("http://localhost:3000/chat?assistantId=-1");
// Test creation of a new assistant
await page.getByRole("button", { name: "Explore Assistants" }).click();
await page.getByRole("button", { name: "Create" }).click();
await page.getByTestId("name").click();
await page.getByTestId("name").fill("Test Assistant");
await page.getByTestId("description").click();
await page.getByTestId("description").fill("Test Assistant Description");
await page.getByTestId("system_prompt").click();
await page.getByTestId("system_prompt").fill("Test Assistant Instructions");
await page.getByRole("button", { name: "Create" }).click();
// Verify the successful creation of the new assistant
await expect(page.getByText("Test Assistant Description")).toBeVisible({
timeout: 5000,
});
// Start another new chat session
await startNewChat(page);
// Verify the presence of the default assistant text
try {
await expect(page.getByText("Assistant with access to")).toBeVisible({
timeout: 5000,
});
} catch (error) {
console.error("Live Assistant final page content:");
console.error(await page.content());
}
});

View File

@ -0,0 +1,84 @@
import { test, expect } from "@playwright/test";
import { loginAsRandomUser } from "../utils/auth";
import {
navigateToAssistantInHistorySidebar,
sendMessage,
verifyCurrentModel,
switchModel,
startNewChat,
} from "../utils/chatActions";
test("LLM Ordering and Model Switching", async ({ page }) => {
// Setup: Clear cookies and log in as a random user
await page.context().clearCookies();
await loginAsRandomUser(page);
// Navigate to the chat page and verify URL
await page.goto("http://localhost:3000/chat");
await page.waitForSelector("#onyx-chat-input-textarea");
await expect(page.url()).toBe("http://localhost:3000/chat");
// Configure user settings: Set default model to GPT 4 Turbo
await page.locator("#onyx-user-dropdown").click();
await page.getByText("User Settings").click();
await page.getByRole("combobox").click();
await page.getByLabel("GPT 4 Turbo", { exact: true }).click();
await page.getByLabel("Close modal").click();
await verifyCurrentModel(page, "GPT 4 Turbo");
// Test Art Assistant: Should use its own model (GPT 4o)
await navigateToAssistantInHistorySidebar(
page,
"[-3]",
"Assistant for generating"
);
await sendMessage(page, "Sample message");
await verifyCurrentModel(page, "GPT 4o");
// Verify model persistence for Art Assistant
await sendMessage(page, "Sample message");
// Test new chat: Should use Art Assistant's model initially
await startNewChat(page);
await expect(page.getByText("Assistant for generating")).toBeVisible();
await verifyCurrentModel(page, "GPT 4o");
// Test another new chat: Should use user's default model (GPT 4 Turbo)
await startNewChat(page);
await verifyCurrentModel(page, "GPT 4 Turbo");
// Test model switching within a chat
await switchModel(page, "O1 Mini");
await sendMessage(page, "Sample message");
await verifyCurrentModel(page, "O1 Mini");
// Create a custom assistant with a specific model
await page.getByRole("button", { name: "Explore Assistants" }).click();
await page.getByRole("button", { name: "Create" }).click();
await page.waitForTimeout(2000);
await page.getByTestId("name").fill("Sample Name");
await page.getByTestId("description").fill("Sample Description");
await page.getByTestId("system_prompt").fill("Sample Instructions");
await page.getByRole("combobox").click();
await page
.getByLabel("GPT 4 Turbo (Preview)")
.getByText("GPT 4 Turbo (Preview)")
.click();
await page.getByRole("button", { name: "Create" }).click();
// Verify custom assistant uses its specified model
await page.locator("#onyx-chat-input-textarea").fill("");
await verifyCurrentModel(page, "GPT 4 Turbo (Preview)");
// Ensure model persistence for custom assistant
await sendMessage(page, "Sample message");
await verifyCurrentModel(page, "GPT 4 Turbo (Preview)");
// Switch back to Art Assistant and verify its model
await navigateToAssistantInHistorySidebar(
page,
"[-3]",
"Assistant for generating"
);
await verifyCurrentModel(page, "GPT 4o");
});

View File

@ -35,3 +35,40 @@ export async function loginAs(page: Page, userType: "admin" | "user") {
}
}
}
// Function to generate a random email and password
const generateRandomCredentials = () => {
const randomString = Math.random().toString(36).substring(2, 10);
const specialChars = "!@#$%^&*()_+{}[]|:;<>,.?~";
const randomSpecialChar =
specialChars[Math.floor(Math.random() * specialChars.length)];
const randomUpperCase = String.fromCharCode(
65 + Math.floor(Math.random() * 26)
);
const randomNumber = Math.floor(Math.random() * 10);
return {
email: `test_${randomString}@example.com`,
password: `P@ssw0rd_${randomUpperCase}${randomSpecialChar}${randomNumber}${randomString}`,
};
};
// Function to sign up a new random user
export async function loginAsRandomUser(page: Page) {
const { email, password } = generateRandomCredentials();
await page.goto("http://localhost:3000/auth/signup");
await page.fill("#email", email);
await page.fill("#password", password);
// Click the signup button
await page.click('button[type="submit"]');
try {
await page.waitForURL("http://localhost:3000/chat");
} catch (error) {
console.log(`Timeout occurred. Current URL: ${page.url()}`);
throw new Error("Failed to sign up and redirect to chat page");
}
return { email, password };
}

View File

@ -0,0 +1,48 @@
import { Page } from "@playwright/test";
import { expect } from "@playwright/test";
export async function navigateToAssistantInHistorySidebar(
page: Page,
testId: string,
description: string
) {
await page.getByTestId(`assistant-${testId}`).click();
try {
await expect(page.getByText(description)).toBeVisible();
} catch (error) {
console.error("Error in navigateToAssistantInHistorySidebar:", error);
const pageText = await page.textContent("body");
console.log("Page text:", pageText);
throw error;
}
}
export async function sendMessage(page: Page, message: string) {
await page.locator("#onyx-chat-input-textarea").click();
await page.locator("#onyx-chat-input-textarea").fill(message);
await page.locator("#onyx-chat-input-send-button").click();
await page.waitForSelector("#onyx-ai-message");
await page.waitForTimeout(2000);
}
export async function verifyCurrentModel(page: Page, modelName: string) {
await page.waitForTimeout(1000);
const chatInput = page.locator("#onyx-chat-input");
const text = await chatInput.textContent();
expect(text).toContain(modelName);
await page.waitForTimeout(1000);
}
// Start of Selection
export async function switchModel(page: Page, modelName: string) {
await page.getByTestId("llm-popover-trigger").click();
await page
.getByRole("button", { name: `Logo ${modelName}`, exact: true })
.click();
await page.waitForTimeout(1000);
}
export async function startNewChat(page: Page) {
await page.getByRole("link", { name: "Start New Chat" }).click();
await expect(page.locator('div[data-testid="chat-intro"]')).toBeVisible();
}

View File

@ -0,0 +1,74 @@
import { Locator, Page } from "@playwright/test";
/**
* Drag "source" above (higher Y) "target" by using mouse events.
* Positions the cursor on the lower half of source, then moves to the top half of the target.
*/
export async function dragElementAbove(
sourceLocator: Locator,
targetLocator: Locator,
page: Page
) {
// Get bounding boxes
const sourceBB = await sourceLocator.boundingBox();
const targetBB = await targetLocator.boundingBox();
if (!sourceBB || !targetBB) {
throw new Error("Source/target bounding boxes not found.");
}
// Move over source, press mouse down
await page.mouse.move(
sourceBB.x + sourceBB.width / 2,
sourceBB.y + sourceBB.height * 0.75 // Move to 3/4 down the source element
);
await page.mouse.down();
// Move to a point slightly above the target's center
await page.mouse.move(
targetBB.x + targetBB.width / 2,
targetBB.y + targetBB.height * 0.1, // Move to 1/10 down the target element
{ steps: 20 } // Increase steps for smoother drag
);
await page.mouse.up();
// Increase wait time for DnD transitions
await page.waitForTimeout(200);
}
/**
* Drag "source" below (higher Y lower Y) "target" using mouse events.
*/
export async function dragElementBelow(
sourceLocator: Locator,
targetLocator: Locator,
page: Page
) {
// Get bounding boxes
const sourceBB = await targetLocator.boundingBox();
const targetBB = await sourceLocator.boundingBox();
if (!sourceBB || !targetBB) {
throw new Error("Source/target bounding boxes not found.");
}
// Move over source, press mouse down
await page.mouse.move(
sourceBB.x + sourceBB.width / 2,
sourceBB.y + sourceBB.height * 0.25 // Move to 1/4 down the source element
);
await page.mouse.down();
// Move to a point well below the target's bottom edge
await page.mouse.move(
targetBB.x + targetBB.width / 2,
targetBB.y + targetBB.height + 50, // Move 50 pixels below the target element
{ steps: 50 } // Keep the same number of steps for smooth drag
);
// Hold for a moment to ensure the drag is registered
await page.waitForTimeout(500);
await page.mouse.up();
// Wait for DnD transitions and potential animations
await page.waitForTimeout(1000);
}

View File

@ -1,15 +0,0 @@
{
"cookies": [
{
"name": "fastapiusersauth",
"value": "n_EMYYKHn4tQbuPTEbtN1gJ6dQTGek9omJPhO2GhHoA",
"domain": "localhost",
"path": "/",
"expires": 1738801376.508558,
"httpOnly": true,
"secure": false,
"sameSite": "Lax"
}
],
"origins": []
}