Added search quality testing pipeline (#1774)

2025-07-03 11:11:45 +02:00 · 2024-07-06 11:51:50 -07:00
parent de4d8e9a65
commit ac14369716
11 changed files with 1123 additions and 115 deletions
--- a/backend/danswer/db/connector_credential_pair.py
+++ b/backend/danswer/db/connector_credential_pair.py
@ -152,7 +152,7 @@ def add_credential_to_connector(
    credential_id: int,
    cc_pair_name: str | None,
    is_public: bool,
-    user: User,
+    user: User | None,
    db_session: Session,
 ) -> StatusResponse[int]:
    connector = fetch_connector_by_id(connector_id, db_session)
--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@ -119,6 +119,7 @@ def _does_document_exist(
    chunk. This checks for whether the chunk exists already in the index"""
    doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
    doc_fetch_response = http_client.get(doc_url)
+
    if doc_fetch_response.status_code == 404:
        return False

--- a/backend/scripts/save_load_state.py
+++ b/backend/scripts/save_load_state.py
@ -86,7 +86,9 @@ def load_vespa(filename: str) -> None:
            new_doc = json.loads(line.strip())
            doc_id = new_doc["update"].split("::")[-1]
            response = requests.post(
-                DOCUMENT_ID_ENDPOINT + "/" + doc_id, headers=headers, json=new_doc
+                DOCUMENT_ID_ENDPOINT + "/" + doc_id,
+                headers=headers,
+                json=new_doc,
            )
            response.raise_for_status()

--- a/backend/tests/regression/answer_quality/README.md
+++ b/backend/tests/regression/answer_quality/README.md
@ -0,0 +1,68 @@
+# Search Quality Test Script
+
+This Python script automates the process of running search quality tests for a backend system.
+
+## Features
+
+- Loads configuration from a YAML file
+- Sets up Docker environment
+- Manages environment variables
+- Switches to specified Git branch
+- Uploads test documents
+- Runs search quality tests using Relari
+- Cleans up Docker containers (optional)
+
+## Usage
+
+1. Ensure you have the required dependencies installed.
+2. Configure the `search_test_config.yaml` file with your settings.
+3. Navigate to the answer_quality folder:
+```
+cd danswer/backend/tests/regression/answer_quality
+```
+4. Run the script:
+```
+python search_quality_test.py
+```
+
+## Configuration
+
+Edit `search_test_config.yaml` to set:
+
+- output_folder
+    This is the folder where the folders for each test will go 
+    These folders will contain the postgres/vespa data as well as the results for each test
+- zipped_documents_file
+    The path to the zip file containing the files you'd like to test against
+- questions_file
+    The path to the yaml containing the questions you'd like to test with 
+- branch
+    Set the branch to null if you want it to just use the code as is
+- clean_up_docker_containers
+    Set this to true to automatically delete all docker containers, networks and volumes after the test
+- launch_web_ui
+    Set this to true if you want to use the UI during/after the testing process
+- use_cloud_gpu
+    Set to true or false depending on if you want to use the remote gpu
+    Only need to set this if use_cloud_gpu is true
+- model_server_ip
+    This is the ip of the remote model server
+    Only need to set this if use_cloud_gpu is true   
+- model_server_port
+    This is the port of the remote model server
+    Only need to set this if use_cloud_gpu is true
+- existing_test_suffix
+    Use this if you would like to relaunch a previous test instance
+    Input the suffix of the test you'd like to re-launch 
+    (E.g. to use the data from folder "test_1234_5678" put "_1234_5678")
+    No new files will automatically be uploaded
+    Leave empty to run a new test
+- limit
+    Max number of questions you'd like to ask against the dataset
+    Set to null for no limit
+- llm
+    Fill this out according to the normal LLM seeding
+
+Docker daemon must be running for this to work. 
+
+Each script is able to be individually run to upload additional docs or run additional tests
--- a/backend/tests/regression/answer_quality/api_utils.py
+++ b/backend/tests/regression/answer_quality/api_utils.py
@ -0,0 +1,220 @@
+import requests
+from retry import retry
+
+from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import MessageType
+from danswer.connectors.models import InputType
+from danswer.db.enums import IndexingStatus
+from danswer.one_shot_answer.models import DirectQARequest
+from danswer.one_shot_answer.models import ThreadMessage
+from danswer.search.models import IndexFilters
+from danswer.search.models import OptionalSearchSetting
+from danswer.search.models import RetrievalDetails
+from danswer.server.documents.models import ConnectorBase
+from tests.regression.answer_quality.cli_utils import (
+    get_api_server_host_port,
+)
+
+
+def _api_url_builder(run_suffix: str, api_path: str) -> str:
+    return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path
+
+
+@retry(tries=5, delay=2, backoff=2)
+def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
+    filters = IndexFilters(
+        source_type=None,
+        document_set=None,
+        time_cutoff=None,
+        tags=None,
+        access_control_list=None,
+    )
+
+    messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
+
+    new_message_request = DirectQARequest(
+        messages=messages,
+        prompt_id=0,
+        persona_id=0,
+        retrieval_options=RetrievalDetails(
+            run_search=OptionalSearchSetting.ALWAYS,
+            real_time=True,
+            filters=filters,
+            enable_auto_detect_filters=False,
+        ),
+        chain_of_thought=False,
+        return_contexts=True,
+    )
+
+    url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    body = new_message_request.dict()
+    body["user"] = None
+    try:
+        response_json = requests.post(url, headers=headers, json=body).json()
+        content_list = [
+            context.get("content", "")
+            for context in response_json.get("contexts", {}).get("contexts", [])
+        ]
+        answer = response_json.get("answer")
+    except Exception as e:
+        print("Failed to answer the questions, trying again")
+        print(f"error: {str(e)}")
+        raise e
+
+    print("\nquery: ", query)
+    print("answer: ", answer)
+    print("content_list: ", content_list)
+
+    return content_list, answer
+
+
+def check_if_query_ready(run_suffix: str) -> bool:
+    url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/")
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    indexing_status_dict = requests.get(url, headers=headers).json()
+
+    ongoing_index_attempts = False
+    doc_count = 0
+    for index_attempt in indexing_status_dict:
+        status = index_attempt["last_status"]
+        if status == IndexingStatus.IN_PROGRESS or status == IndexingStatus.NOT_STARTED:
+            ongoing_index_attempts = True
+        doc_count += index_attempt["docs_indexed"]
+
+    if not doc_count:
+        print("No docs indexed, waiting for indexing to start")
+    elif ongoing_index_attempts:
+        print(
+            f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
+        )
+
+    return doc_count > 0 and not ongoing_index_attempts
+
+
+def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None:
+    url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/")
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    body = {
+        "connector_id": connector_id,
+        "credential_ids": [credential_id],
+        "from_beginning": True,
+    }
+    print("body:", body)
+    response = requests.post(url, headers=headers, json=body)
+    if response.status_code == 200:
+        print("Connector created successfully:", response.json())
+    else:
+        print("Failed status_code:", response.status_code)
+        print("Failed text:", response.text)
+
+
+def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> None:
+    url = _api_url_builder(
+        run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}"
+    )
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    body = {"name": "zip_folder_contents", "is_public": True}
+    print("body:", body)
+    response = requests.put(url, headers=headers, json=body)
+    if response.status_code == 200:
+        print("Connector created successfully:", response.json())
+    else:
+        print("Failed status_code:", response.status_code)
+        print("Failed text:", response.text)
+
+
+def _get_existing_connector_names(run_suffix: str) -> list[str]:
+    url = _api_url_builder(run_suffix, "/manage/connector")
+    headers = {
+        "Content-Type": "application/json",
+    }
+    body = {
+        "credential_json": {},
+        "admin_public": True,
+    }
+    response = requests.get(url, headers=headers, json=body)
+    if response.status_code == 200:
+        connectors = response.json()
+        return [connector["name"] for connector in connectors]
+    else:
+        raise RuntimeError(response.__dict__)
+
+
+def create_connector(run_suffix: str, file_paths: list[str]) -> int:
+    url = _api_url_builder(run_suffix, "/manage/admin/connector")
+    headers = {
+        "Content-Type": "application/json",
+    }
+    connector_name = base_connector_name = "search_eval_connector"
+    existing_connector_names = _get_existing_connector_names(run_suffix)
+
+    count = 1
+    while connector_name in existing_connector_names:
+        connector_name = base_connector_name + "_" + str(count)
+        count += 1
+
+    connector = ConnectorBase(
+        name=connector_name,
+        source=DocumentSource.FILE,
+        input_type=InputType.LOAD_STATE,
+        connector_specific_config={"file_locations": file_paths},
+        refresh_freq=None,
+        prune_freq=None,
+        disabled=False,
+    )
+
+    body = connector.dict()
+    print("body:", body)
+    response = requests.post(url, headers=headers, json=body)
+    if response.status_code == 200:
+        print("Connector created successfully:", response.json())
+        return response.json()["id"]
+    else:
+        raise RuntimeError(response.__dict__)
+
+
+def create_credential(run_suffix: str) -> int:
+    url = _api_url_builder(run_suffix, "/manage/credential")
+    headers = {
+        "Content-Type": "application/json",
+    }
+    body = {
+        "credential_json": {},
+        "admin_public": True,
+    }
+    response = requests.post(url, headers=headers, json=body)
+    if response.status_code == 200:
+        print("credential created successfully:", response.json())
+        return response.json()["id"]
+    else:
+        raise RuntimeError(response.__dict__)
+
+
+@retry(tries=10, delay=2, backoff=2)
+def upload_file(run_suffix: str, zip_file_path: str) -> list[str]:
+    files = [
+        ("files", open(zip_file_path, "rb")),
+    ]
+
+    api_path = _api_url_builder(run_suffix, "/manage/admin/connector/file/upload")
+    try:
+        response = requests.post(api_path, files=files)
+        response.raise_for_status()  # Raises an HTTPError for bad responses
+        print("file uploaded successfully:", response.json())
+        return response.json()["file_paths"]
+    except Exception as e:
+        print("File upload failed, waiting for API server to come up and trying again")
+        raise e
--- a/backend/tests/regression/answer_quality/cli_utils.py
+++ b/backend/tests/regression/answer_quality/cli_utils.py
@ -0,0 +1,203 @@
+import json
+import os
+import subprocess
+
+from retry import retry
+
+
+def _run_command(command: str) -> tuple[str, str]:
+    process = subprocess.Popen(
+        command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    stdout, stderr = process.communicate()
+    if process.returncode != 0:
+        raise RuntimeError(f"Command failed with error: {stderr.decode()}")
+    return stdout.decode(), stderr.decode()
+
+
+def get_current_commit_sha() -> str:
+    print("Getting current commit SHA...")
+    stdout, _ = _run_command("git rev-parse HEAD")
+    sha = stdout.strip()
+    print(f"Current commit SHA: {sha}")
+    return sha
+
+
+def switch_to_branch(branch: str) -> None:
+    print(f"Switching to branch: {branch}...")
+    _run_command(f"git checkout {branch}")
+    _run_command("git pull")
+    print(f"Successfully switched to branch: {branch}")
+    print("Repository updated successfully.")
+
+
+def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> str:
+    # Use the user's home directory as the base path
+    target_path = os.path.join(os.path.expanduser(base_path), f"test{suffix}")
+    directories = {
+        "DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
+        "DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
+    }
+    if not use_cloud_gpu:
+        directories["DANSWER_INDEX_MODEL_CACHE_DIR"] = os.path.join(
+            target_path, "index_model_cache/"
+        )
+        directories["DANSWER_INFERENCE_MODEL_CACHE_DIR"] = os.path.join(
+            target_path, "inference_model_cache/"
+        )
+
+    # Create directories if they don't exist
+    for env_var, directory in directories.items():
+        os.makedirs(directory, exist_ok=True)
+        os.environ[env_var] = directory
+        print(f"Set {env_var} to: {directory}")
+    relari_output_path = os.path.join(target_path, "relari_output/")
+    os.makedirs(relari_output_path, exist_ok=True)
+    return relari_output_path
+
+
+def set_env_variables(
+    remote_server_ip: str,
+    remote_server_port: str,
+    use_cloud_gpu: bool,
+    llm_config: dict,
+) -> None:
+    env_vars: dict = {}
+    env_vars["ENV_SEED_CONFIGURATION"] = json.dumps({"llms": [llm_config]})
+    env_vars["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "true"
+    if use_cloud_gpu:
+        env_vars["MODEL_SERVER_HOST"] = remote_server_ip
+        env_vars["MODEL_SERVER_PORT"] = remote_server_port
+
+    for env_var_name, env_var in env_vars.items():
+        os.environ[env_var_name] = env_var
+        print(f"Set {env_var_name} to: {env_var}")
+
+
+def start_docker_compose(
+    run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool
+) -> None:
+    print("Starting Docker Compose...")
+    os.chdir(os.path.expanduser("~/danswer/deployment/docker_compose"))
+    command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack{run_suffix} up -d"
+    command += " --build"
+    command += " --pull always"
+    command += " --force-recreate"
+    if not launch_web_ui:
+        command += " --scale web_server=0"
+        command += " --scale nginx=0"
+    if use_cloud_gpu:
+        command += " --scale indexing_model_server=0"
+        command += " --scale inference_model_server=0"
+
+    print("Docker Command:\n", command)
+
+    _run_command(command)
+    print("The Docker has been Composed :)")
+
+
+def cleanup_docker(run_suffix: str) -> None:
+    print(
+        f"Deleting Docker containers, volumes, and networks for project suffix: {run_suffix}"
+    )
+
+    stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
+
+    containers = [json.loads(line) for line in stdout.splitlines()]
+
+    project_name = f"danswer-stack{run_suffix}"
+    containers_to_delete = [
+        c for c in containers if c["Names"].startswith(project_name)
+    ]
+
+    if not containers_to_delete:
+        print(f"No containers found for project: {project_name}")
+    else:
+        container_ids = " ".join([c["ID"] for c in containers_to_delete])
+        _run_command(f"docker rm -f {container_ids}")
+
+        print(
+            f"Successfully deleted {len(containers_to_delete)} containers for project: {project_name}"
+        )
+
+    stdout, _ = _run_command("docker volume ls --format '{{.Name}}'")
+
+    volumes = stdout.splitlines()
+
+    volumes_to_delete = [v for v in volumes if v.startswith(project_name)]
+
+    if not volumes_to_delete:
+        print(f"No volumes found for project: {project_name}")
+        return
+
+    # Delete filtered volumes
+    volume_names = " ".join(volumes_to_delete)
+    _run_command(f"docker volume rm {volume_names}")
+
+    print(
+        f"Successfully deleted {len(volumes_to_delete)} volumes for project: {project_name}"
+    )
+    stdout, _ = _run_command("docker network ls --format '{{.Name}}'")
+
+    networks = stdout.splitlines()
+
+    networks_to_delete = [n for n in networks if run_suffix in n]
+
+    if not networks_to_delete:
+        print(f"No networks found containing suffix: {run_suffix}")
+    else:
+        network_names = " ".join(networks_to_delete)
+        _run_command(f"docker network rm {network_names}")
+
+        print(
+            f"Successfully deleted {len(networks_to_delete)} networks containing suffix: {run_suffix}"
+        )
+
+
+@retry(tries=5, delay=5, backoff=2)
+def get_api_server_host_port(suffix: str) -> str:
+    """
+    This pulls all containers with the provided suffix
+    It then grabs the JSON specific container with a name containing "api_server"
+    It then grabs the port info from the JSON and strips out the relevent data
+    """
+    container_name = "api_server"
+
+    stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
+    containers = [json.loads(line) for line in stdout.splitlines()]
+    server_jsons = []
+
+    for container in containers:
+        if container_name in container["Names"] and suffix in container["Names"]:
+            server_jsons.append(container)
+
+    if not server_jsons:
+        raise RuntimeError(
+            f"No container found containing: {container_name} and {suffix}"
+        )
+    elif len(server_jsons) > 1:
+        raise RuntimeError(
+            f"Too many containers matching {container_name} found, please indicate a suffix"
+        )
+    server_json = server_jsons[0]
+
+    # This is in case the api_server has multiple ports
+    client_port = "8080"
+    ports = server_json.get("Ports", "")
+    port_infos = ports.split(",") if ports else []
+    port_dict = {}
+    for port_info in port_infos:
+        port_arr = port_info.split(":")[-1].split("->") if port_info else []
+        if len(port_arr) == 2:
+            port_dict[port_arr[1]] = port_arr[0]
+
+    # Find the host port where client_port is in the key
+    matching_ports = [value for key, value in port_dict.items() if client_port in key]
+
+    if len(matching_ports) > 1:
+        raise RuntimeError(f"Too many ports matching {client_port} found")
+    if not matching_ports:
+        raise RuntimeError(
+            f"No port found containing: {client_port} for container: {container_name} and suffix: {suffix}"
+        )
+    return matching_ports[0]
--- a/backend/tests/regression/answer_quality/file_uploader.py
+++ b/backend/tests/regression/answer_quality/file_uploader.py
@ -0,0 +1,31 @@
+import os
+from types import SimpleNamespace
+
+import yaml
+
+from tests.regression.answer_quality.api_utils import create_cc_pair
+from tests.regression.answer_quality.api_utils import create_connector
+from tests.regression.answer_quality.api_utils import create_credential
+from tests.regression.answer_quality.api_utils import run_cc_once
+from tests.regression.answer_quality.api_utils import upload_file
+
+
+def upload_test_files(zip_file_path: str, run_suffix: str) -> None:
+    print("zip:", zip_file_path)
+    file_paths = upload_file(run_suffix, zip_file_path)
+
+    conn_id = create_connector(run_suffix, file_paths)
+    cred_id = create_credential(run_suffix)
+
+    create_cc_pair(run_suffix, conn_id, cred_id)
+    run_cc_once(run_suffix, conn_id, cred_id)
+
+
+if __name__ == "__main__":
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    config_path = os.path.join(current_dir, "search_test_config.yaml")
+    with open(config_path, "r") as file:
+        config = SimpleNamespace(**yaml.safe_load(file))
+    file_location = config.zipped_documents_file
+    run_suffix = config.existing_test_suffix
+    upload_test_files(file_location, run_suffix)
--- a/backend/tests/regression/answer_quality/relari.py
+++ b/backend/tests/regression/answer_quality/relari.py
@ -1,138 +1,108 @@
-import argparse
 import json
+import os
+import time
+from types import SimpleNamespace

-from sqlalchemy.orm import Session
+import yaml

-from danswer.configs.constants import MessageType
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.one_shot_answer.answer_question import get_search_answer
-from danswer.one_shot_answer.models import DirectQARequest
-from danswer.one_shot_answer.models import OneShotQAResponse
-from danswer.one_shot_answer.models import ThreadMessage
-from danswer.search.models import IndexFilters
-from danswer.search.models import OptionalSearchSetting
-from danswer.search.models import RetrievalDetails
+from tests.regression.answer_quality.api_utils import check_if_query_ready
+from tests.regression.answer_quality.api_utils import get_answer_from_query
+from tests.regression.answer_quality.cli_utils import get_current_commit_sha


-def get_answer_for_question(query: str, db_session: Session) -> OneShotQAResponse:
-    filters = IndexFilters(
-        source_type=None,
-        document_set=None,
-        time_cutoff=None,
-        tags=None,
-        access_control_list=None,
-    )
+def _get_relari_outputs(samples: list[dict], run_suffix: str) -> list[dict]:
+    while not check_if_query_ready(run_suffix):
+        time.sleep(5)

-    messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
-
-    new_message_request = DirectQARequest(
-        messages=messages,
-        prompt_id=0,
-        persona_id=0,
-        retrieval_options=RetrievalDetails(
-            run_search=OptionalSearchSetting.ALWAYS,
-            real_time=True,
-            filters=filters,
-            enable_auto_detect_filters=False,
-        ),
-        chain_of_thought=False,
-        return_contexts=True,
-    )
-
-    answer = get_search_answer(
-        query_req=new_message_request,
-        user=None,
-        max_document_tokens=None,
-        max_history_tokens=None,
-        db_session=db_session,
-        answer_generation_timeout=100,
-        enable_reflexion=False,
-        bypass_acl=True,
-    )
-
-    return answer
-
-
-def read_questions(questions_file_path: str) -> list[dict]:
-    samples = []
-    with open(questions_file_path, "r", encoding="utf-8") as file:
-        for line in file:
-            sample = json.loads(line.strip())
-            samples.append(sample)
-    return samples
-
-
-def get_relari_outputs(samples: list[dict]) -> list[dict]:
    relari_outputs = []
-    with Session(get_sqlalchemy_engine(), expire_on_commit=False) as db_session:
-        for sample in samples:
-            answer = get_answer_for_question(
-                query=sample["question"], db_session=db_session
-            )
-            assert answer.contexts
+    for sample in samples:
+        retrieved_context, answer = get_answer_from_query(
+            query=sample["question"],
+            run_suffix=run_suffix,
+        )

-            relari_outputs.append(
-                {
-                    "label": sample["uid"],
-                    "question": sample["question"],
-                    "answer": answer.answer,
-                    "retrieved_context": [
-                        context.content for context in answer.contexts.contexts
-                    ],
-                }
-            )
+        relari_outputs.append(
+            {
+                "label": sample["uid"],
+                "question": sample["question"],
+                "answer": answer,
+                "retrieved_context": retrieved_context,
+            }
+        )

    return relari_outputs


-def write_output_file(relari_outputs: list[dict], output_file: str) -> None:
-    with open(output_file, "w", encoding="utf-8") as file:
+def _write_output_file(
+    relari_outputs: list[dict], output_folder_path: str, run_suffix: str
+) -> None:
+    metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix}
+
+    counter = 1
+    output_file_path = os.path.join(output_folder_path, "results.txt")
+    metadata_file_path = os.path.join(output_folder_path, "run_metadata.yaml")
+    while os.path.exists(output_file_path) or os.path.exists(metadata_file_path):
+        output_file_path = os.path.join(output_folder_path, f"results_{counter}.txt")
+        metadata_file_path = os.path.join(
+            output_folder_path, f"run_metadata_{counter}.txt"
+        )
+        counter += 1
+    print("saving question results to:", output_file_path)
+    print("saving metadata to:", metadata_file_path)
+    with open(metadata_file_path, "w", encoding="utf-8") as yaml_file:
+        yaml.dump(metadata, yaml_file)
+    with open(output_file_path, "w", encoding="utf-8") as file:
        for output in relari_outputs:
            file.write(json.dumps(output) + "\n")
+            file.flush()


-def main(questions_file: str, output_file: str, limit: int | None = None) -> None:
-    samples = read_questions(questions_file)
+def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
+    questions = []
+    with open(questions_file_path, "r") as file:
+        for line in file:
+            json_obj = json.loads(line)
+            questions.append(json_obj)
+    return questions
+
+
+def answer_relari_questions(
+    questions_file_path: str,
+    results_folder_path: str,
+    run_suffix: str,
+    limit: int | None = None,
+) -> None:
+    samples = _read_questions_jsonl(questions_file_path)

    if limit is not None:
        samples = samples[:limit]

-    # Use to be in this format but has since changed
-    # response_dict = {
-    #     "question": sample["question"],
-    #     "retrieved_contexts": [
-    #         context.content for context in answer.contexts.contexts
-    #     ],
-    #     "ground_truth_contexts": sample["ground_truth_contexts"],
-    #     "answer": answer.answer,
-    #     "ground_truths": sample["ground_truths"],
-    # }
+    relari_outputs = _get_relari_outputs(samples=samples, run_suffix=run_suffix)

-    relari_outputs = get_relari_outputs(samples=samples)
+    _write_output_file(relari_outputs, results_folder_path, run_suffix)

-    write_output_file(relari_outputs, output_file)
+
+def main() -> None:
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    config_path = os.path.join(current_dir, "search_test_config.yaml")
+    with open(config_path, "r") as file:
+        config = SimpleNamespace(**yaml.safe_load(file))
+
+    current_output_folder = os.path.expanduser(config.output_folder)
+    if config.existing_test_suffix:
+        current_output_folder = os.path.join(
+            current_output_folder, "test" + config.existing_test_suffix, "relari_output"
+        )
+    else:
+        current_output_folder = os.path.join(current_output_folder, "no_defined_suffix")
+
+    answer_relari_questions(
+        config.questions_file,
+        current_output_folder,
+        config.existing_test_suffix,
+        config.limit,
+    )


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--questions_file",
-        type=str,
-        help="Path to the Relari questions file.",
-        default="./tests/regression/answer_quality/combined_golden_dataset.jsonl",
-    )
-    parser.add_argument(
-        "--output_file",
-        type=str,
-        help="Path to the output results file.",
-        default="./tests/regression/answer_quality/relari_results.txt",
-    )
-    parser.add_argument(
-        "--limit",
-        type=int,
-        default=None,
-        help="Limit the number of examples to process.",
-    )
-    args = parser.parse_args()
-
-    main(args.questions_file, args.output_file, args.limit)
+    main()
--- a/backend/tests/regression/answer_quality/search_quality_test.py
+++ b/backend/tests/regression/answer_quality/search_quality_test.py
@ -0,0 +1,58 @@
+import os
+from datetime import datetime
+from types import SimpleNamespace
+
+import yaml
+
+from tests.regression.answer_quality.cli_utils import cleanup_docker
+from tests.regression.answer_quality.cli_utils import manage_data_directories
+from tests.regression.answer_quality.cli_utils import set_env_variables
+from tests.regression.answer_quality.cli_utils import start_docker_compose
+from tests.regression.answer_quality.cli_utils import switch_to_branch
+from tests.regression.answer_quality.file_uploader import upload_test_files
+from tests.regression.answer_quality.relari import answer_relari_questions
+
+
+def load_config(config_filename: str) -> SimpleNamespace:
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    config_path = os.path.join(current_dir, config_filename)
+    with open(config_path, "r") as file:
+        return SimpleNamespace(**yaml.safe_load(file))
+
+
+def main() -> None:
+    config = load_config("search_test_config.yaml")
+    if config.existing_test_suffix:
+        run_suffix = config.existing_test_suffix
+        print("launching danswer with existing data suffix:", run_suffix)
+    else:
+        run_suffix = datetime.now().strftime("_%Y%m%d_%H%M%S")
+        print("run_suffix:", run_suffix)
+
+    set_env_variables(
+        config.model_server_ip,
+        config.model_server_port,
+        config.use_cloud_gpu,
+        config.llm,
+    )
+    relari_output_folder_path = manage_data_directories(
+        run_suffix, config.output_folder, config.use_cloud_gpu
+    )
+    if config.branch:
+        switch_to_branch(config.branch)
+
+    start_docker_compose(run_suffix, config.launch_web_ui, config.use_cloud_gpu)
+
+    if not config.existing_test_suffix:
+        upload_test_files(config.zipped_documents_file, run_suffix)
+
+        answer_relari_questions(
+            config.questions_file, relari_output_folder_path, run_suffix, config.limit
+        )
+
+    if config.clean_up_docker_containers:
+        cleanup_docker(run_suffix)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/tests/regression/answer_quality/search_test_config.yaml
+++ b/backend/tests/regression/answer_quality/search_test_config.yaml
@ -0,0 +1,49 @@
+# Directory where test results will be saved
+output_folder: "~/danswer_test_results"
+
+# Path to the zip file containing sample documents
+zipped_documents_file: "~/sampledocs.zip"
+
+# Path to the YAML file containing sample questions
+questions_file: "~/sample_questions.yaml"
+
+# Git branch to use (null means use current branch as is)
+branch: null
+
+# Whether to remove Docker containers after the test
+clean_up_docker_containers: true
+
+# Whether to launch a web UI for the test
+launch_web_ui: false
+
+# Whether to use a cloud GPU for processing
+use_cloud_gpu: false
+
+# IP address of the model server (placeholder)
+model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
+
+# Port of the model server (placeholder)
+model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
+
+# Suffix for existing test results (empty string means no suffix)
+existing_test_suffix: ""
+
+# Limit on number of tests to run (null means no limit)
+limit: null
+
+# LLM configuration
+llm:
+  # Name of the LLM
+  name: "llm_name"
+  
+  # Provider of the LLM (e.g., OpenAI)
+  provider: "openai"
+  
+  # API key
+  api_key: "PUT_API_KEY_HERE"
+  
+  # Default model name to use
+  default_model_name: "gpt-4o"
+  
+  # List of model names to use for testing
+  model_names: ["gpt-4o"]
--- a/deployment/docker_compose/docker-compose.search-testing.yml
+++ b/deployment/docker_compose/docker-compose.search-testing.yml
@ -0,0 +1,406 @@
+version: '3'
+services:
+  api_server:
+    image: danswer/danswer-backend:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    command: >
+      /bin/sh -c "alembic upgrade head &&
+      echo \"Starting Danswer Api Server\" &&
+      uvicorn danswer.main:app --host 0.0.0.0 --port 8080"
+    depends_on:
+      - relational_db
+      - index
+      # - inference_model_server
+    restart: always
+    ports:
+      - "8080"
+    environment:
+      # Auth Settings
+      - AUTH_TYPE=${AUTH_TYPE:-disabled}
+      - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400}
+      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
+      - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}
+      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
+      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
+      - REQUIRE_EMAIL_VERIFICATION=${REQUIRE_EMAIL_VERIFICATION:-}
+      - SMTP_SERVER=${SMTP_SERVER:-}  # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
+      - SMTP_PORT=${SMTP_PORT:-587}  # For sending verification emails, if unspecified then defaults to '587'
+      - SMTP_USER=${SMTP_USER:-}
+      - SMTP_PASS=${SMTP_PASS:-}
+      - EMAIL_FROM=${EMAIL_FROM:-}
+      - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}
+      - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}
+      - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}
+      # Gen AI Settings
+      - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-}
+      - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-}
+      - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-}
+      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
+      - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-}
+      - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
+      - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
+      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
+      - QA_TIMEOUT=${QA_TIMEOUT:-}
+      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
+      - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-}
+      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
+      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
+      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
+      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
+      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
+      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
+      - BING_API_KEY=${BING_API_KEY:-}
+      # if set, allows for the use of the token budget system
+      - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
+      # Enables the use of bedrock models
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
+      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
+      # Query Options
+      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}  # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
+      - HYBRID_ALPHA=${HYBRID_ALPHA:-}  # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
+      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
+      - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
+      - LANGUAGE_HINT=${LANGUAGE_HINT:-}
+      - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
+      - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
+      # Other services
+      - POSTGRES_HOST=relational_db
+      - VESPA_HOST=index
+      - WEB_DOMAIN=${WEB_DOMAIN:-}  # For frontend redirect auth purpose
+      # Don't change the NLP model configs unless you know what you're doing
+      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
+      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
+      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
+      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
+      - ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-}
+      - ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-}
+      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
+      - MODEL_SERVER_ALLOWED_HOST=${MODEL_SERVER_HOST:-}
+      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
+      # Leave this on pretty please? Nothing sensitive is collected!
+      # https://docs.danswer.dev/more/telemetry
+      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
+      - LOG_LEVEL=${LOG_LEVEL:-info}  # Set to debug to get more fine-grained logs
+      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-}  # Log all of the prompts to the LLM
+      # If set to `true` will enable additional logs about Vespa query performance
+      # (time spent on finding the right docs + time spent fetching summaries from disk)
+      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
+      - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
+
+      # Enterprise Edition only
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
+      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
+      # Seeding configuration
+      - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+
+  background:
+    image: danswer/danswer-backend:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
+    depends_on:
+      - relational_db
+      - index
+      # - inference_model_server
+      # - indexing_model_server
+    restart: always
+    environment:
+      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
+      # Gen AI Settings (Needed by DanswerBot)
+      - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-}
+      - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-}
+      - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-}
+      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
+      - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-}
+      - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
+      - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
+      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
+      - QA_TIMEOUT=${QA_TIMEOUT:-}
+      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
+      - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-}
+      - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
+      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
+      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
+      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
+      - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
+      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
+      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
+      - BING_API_KEY=${BING_API_KEY:-}
+      # Query Options
+      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}  # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
+      - HYBRID_ALPHA=${HYBRID_ALPHA:-}  # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
+      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
+      - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
+      - LANGUAGE_HINT=${LANGUAGE_HINT:-}
+      - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
+      - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
+      # Other Services
+      - POSTGRES_HOST=relational_db
+      - POSTGRES_USER=${POSTGRES_USER:-}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
+      - POSTGRES_DB=${POSTGRES_DB:-}
+      - VESPA_HOST=index
+      - WEB_DOMAIN=${WEB_DOMAIN:-}  # For frontend redirect auth purpose for OAuth2 connectors
+      # Don't change the NLP model configs unless you know what you're doing
+      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
+      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
+      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
+      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}  # Needed by DanswerBot
+      - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
+      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
+      - MODEL_SERVER_ALLOWED_HOST=${MODEL_SERVER_HOST:-}
+      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
+      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-}
+      # Indexing Configs
+      - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
+      - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
+      - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
+      - DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-}
+      - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}
+      - EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-}
+      - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}
+      - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}
+      - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}
+      - JIRA_API_VERSION=${JIRA_API_VERSION:-}
+      - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
+      - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
+      - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
+      # Danswer SlackBot Configs
+      - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-}
+      - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-}
+      - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-}
+      - DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-}
+      - DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-}
+      - DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-}
+      - DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-}  # Currently unused
+      - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}
+      - DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-}
+      - DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-}
+      # Logging
+      # Leave this on pretty please? Nothing sensitive is collected!
+      # https://docs.danswer.dev/more/telemetry
+      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
+      - LOG_LEVEL=${LOG_LEVEL:-info}  # Set to debug to get more fine-grained logs
+      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-}  # Log all of the prompts to the LLM
+      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
+
+      # Enterprise Edition stuff
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+
+  web_server:
+    image: danswer/danswer-web-server:latest
+    build:
+      context: ../../web
+      dockerfile: Dockerfile
+      args:
+        - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false}
+        - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false}
+        - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
+        - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
+        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
+
+        # Enterprise Edition only
+        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
+        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Danswer.
+        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
+    depends_on:
+      - api_server
+    restart: always
+    environment:
+      - INTERNAL_URL=http://api_server:8080
+      - WEB_DOMAIN=${WEB_DOMAIN:-}
+      - THEME_IS_DARK=${THEME_IS_DARK:-}
+
+      # Enterprise Edition only
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
+
+
+  inference_model_server:
+    image: danswer/danswer-model-server:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile.model_server
+    command: >
+      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
+        echo 'Skipping service...';
+        exit 0;
+      else
+        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
+      fi"
+    restart: on-failure
+    environment:
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
+      # Set to debug to get more fine-grained logs
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+    # volumes:
+      # Not necessary, this is just to reduce download time during startup
+      - inference_model_cache_huggingface:/root/.cache/huggingface/
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+
+  indexing_model_server:
+    image: danswer/danswer-model-server:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile.model_server
+    command: >
+      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
+        echo 'Skipping service...';
+        exit 0;
+      else
+        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
+      fi"
+    restart: on-failure
+    environment:
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
+      - INDEXING_ONLY=True
+      # Set to debug to get more fine-grained logs
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+    # volumes:
+      # Not necessary, this is just to reduce download time during startup
+      - index_model_cache_huggingface:/root/.cache/huggingface/
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+  relational_db:
+    image: postgres:15.2-alpine
+    restart: always
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER:-postgres}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
+    ports:
+      - "5432"
+    volumes:
+      - db_volume:/var/lib/postgresql/data
+
+
+  # This container name cannot have an underscore in it due to Vespa expectations of the URL
+  index:
+    image: vespaengine/vespa:8.277.17
+    restart: always
+    ports:
+      - "19071"
+      - "8081"
+    volumes:
+      - vespa_volume:/opt/vespa/var
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+
+  nginx:
+    image: nginx:1.23.4-alpine
+    restart: always
+    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
+    # if api_server / web_server are not up 
+    depends_on:
+      - api_server
+      - web_server
+    environment:
+      - DOMAIN=localhost
+    ports:
+      - "80:80"
+      - "3000:80"  # allow for localhost:3000 usage, since that is the norm
+    volumes:
+      - ../data/nginx:/etc/nginx/conf.d
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+    # the specified script waits for the api_server to start up. 
+    # Without this we've seen issues where nginx shows no error logs but 
+    # does not recieve any traffic
+    # NOTE: we have to use dos2unix to remove Carriage Return chars from the file
+    # in order to make this work on both Unix-like systems and windows
+    command: > 
+      /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh 
+      && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
+        
+
+# volumes:
+#   db_volume:
+#   vespa_volume:
+#   # Created by the container itself
+#   model_cache_huggingface:
+
+volumes:
+  db_volume:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}
+  vespa_volume:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
+  model_cache_huggingface:
+  #   driver: local
+  #   driver_opts:
+  #     type: none
+  #     o: bind
+  #     device: ${DANSWER_MODEL_CACHE_DIR:-./model_cache}
+  # index_model_cache_huggingface:
+  #   driver: local
+  #   driver_opts:
+  #     type: none
+  #     o: bind
+  #     device: ${DANSWER_INDEX_MODEL_CACHE_DIR:-./model_cache}
+  # inference_model_cache_huggingface:
+  #   driver: local
+  #   driver_opts:
+  #     type: none
+  #     o: bind
+  #     device: ${DANSWER_INFERENCE_MODEL_CACHE_DIR:-./model_cache}
+
+# volumes:
+#   db_volume:
+#     driver: local
+#     driver_opts:
+#       type: none
+#       o: bind
+#       device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}
+#   vespa_volume:
+#     driver: local
+#     driver_opts:
+#       type: none
+#       o: bind
+#       device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
+#   model_cache_huggingface:
+#     driver: local
+#     driver_opts:
+#       type: none
+#       o: bind
+#       device: ${DANSWER_MODEL_CACHE_DIR:-./model_cache}