Improve eval pipeline qol (#1908)

This commit is contained in:
hagen-danswer
2024-07-23 17:16:34 -07:00
committed by GitHub
parent 2470c68506
commit 6ff8e6c0ea
7 changed files with 105 additions and 115 deletions

View File

@ -9,7 +9,7 @@ This Python script automates the process of running search quality tests for a b
- Manages environment variables - Manages environment variables
- Switches to specified Git branch - Switches to specified Git branch
- Uploads test documents - Uploads test documents
- Runs search quality tests using Relari - Runs search quality tests
- Cleans up Docker containers (optional) - Cleans up Docker containers (optional)
## Usage ## Usage
@ -29,9 +29,17 @@ export PYTHONPATH=$PYTHONPATH:$PWD/backend
``` ```
cd backend/tests/regression/answer_quality cd backend/tests/regression/answer_quality
``` ```
7. Run the script: 7. To launch the evaluation environment, run the launch_eval_env.py script (this step can be skipped if you are running the env outside of docker, just leave "environment_name" blank):
``` ```
python run_eval_pipeline.py python launch_eval_env.py
```
8. Run the file_uploader.py script to upload the zip files located at the path "zipped_documents_file"
```
python file_uploader.py
```
9. Run the run_qa.py script to ask questions from the jsonl located at the path "questions_file". This will hit the "query/answer-with-quote" API endpoint.
```
python run_qa.py
``` ```
Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data. Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data.
@ -75,12 +83,10 @@ Edit `search_test_config.yaml` to set:
- model_server_port - model_server_port
- This is the port of the remote model server - This is the port of the remote model server
- Only need to set this if use_cloud_gpu is true - Only need to set this if use_cloud_gpu is true
- existing_test_suffix (THIS IS NOT A SUFFIX ANYMORE, TODO UPDATE THE DOCS HERE) - environment_name
- Use this if you would like to relaunch a previous test instance - Use this if you would like to relaunch a previous test instance
- Input the suffix of the test you'd like to re-launch - Input the env_name of the test you'd like to re-launch
- (E.g. to use the data from folder "test-1234-5678" put "-1234-5678") - Leave empty to launch referencing local default network locations
- No new files will automatically be uploaded
- Leave empty to run a new test
- limit - limit
- Max number of questions you'd like to ask against the dataset - Max number of questions you'd like to ask against the dataset
- Set to null for no limit - Set to null for no limit
@ -90,7 +96,7 @@ Edit `search_test_config.yaml` to set:
## Relaunching From Existing Data ## Relaunching From Existing Data
To launch an existing set of containers that has already completed indexing, set the existing_test_suffix variable. This will launch the docker containers mounted on the volumes of the indicated suffix and will not automatically index any documents or run any QA. To launch an existing set of containers that has already completed indexing, set the environment_name variable. This will launch the docker containers mounted on the volumes of the indicated env_name and will not automatically index any documents or run any QA.
Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above). Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above).
- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path. - file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path.

View File

@ -16,13 +16,16 @@ from tests.regression.answer_quality.cli_utils import get_api_server_host_port
GENERAL_HEADERS = {"Content-Type": "application/json"} GENERAL_HEADERS = {"Content-Type": "application/json"}
def _api_url_builder(run_suffix: str, api_path: str) -> str: def _api_url_builder(env_name: str, api_path: str) -> str:
return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path if env_name:
return f"http://localhost:{get_api_server_host_port(env_name)}" + api_path
else:
return "http://localhost:8080" + api_path
@retry(tries=5, delay=5) @retry(tries=5, delay=5)
def get_answer_from_query( def get_answer_from_query(
query: str, only_retrieve_docs: bool, run_suffix: str query: str, only_retrieve_docs: bool, env_name: str
) -> tuple[list[str], str]: ) -> tuple[list[str], str]:
filters = IndexFilters( filters = IndexFilters(
source_type=None, source_type=None,
@ -49,7 +52,7 @@ def get_answer_from_query(
skip_gen_ai_answer_generation=only_retrieve_docs, skip_gen_ai_answer_generation=only_retrieve_docs,
) )
url = _api_url_builder(run_suffix, "/query/answer-with-quote/") url = _api_url_builder(env_name, "/query/answer-with-quote/")
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
} }
@ -70,8 +73,8 @@ def get_answer_from_query(
@retry(tries=10, delay=10) @retry(tries=10, delay=10)
def check_indexing_status(run_suffix: str) -> tuple[int, bool]: def check_indexing_status(env_name: str) -> tuple[int, bool]:
url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/") url = _api_url_builder(env_name, "/manage/admin/connector/indexing-status/")
try: try:
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json() indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
except Exception as e: except Exception as e:
@ -99,8 +102,8 @@ def check_indexing_status(run_suffix: str) -> tuple[int, bool]:
return doc_count, ongoing_index_attempts return doc_count, ongoing_index_attempts
def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None: def run_cc_once(env_name: str, connector_id: int, credential_id: int) -> None:
url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/") url = _api_url_builder(env_name, "/manage/admin/connector/run-once/")
body = { body = {
"connector_id": connector_id, "connector_id": connector_id,
"credential_ids": [credential_id], "credential_ids": [credential_id],
@ -115,9 +118,9 @@ def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None:
print("Failed text:", response.text) print("Failed text:", response.text)
def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> None: def create_cc_pair(env_name: str, connector_id: int, credential_id: int) -> None:
url = _api_url_builder( url = _api_url_builder(
run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}" env_name, f"/manage/connector/{connector_id}/credential/{credential_id}"
) )
body = {"name": "zip_folder_contents", "is_public": True} body = {"name": "zip_folder_contents", "is_public": True}
@ -130,8 +133,8 @@ def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> No
print("Failed text:", response.text) print("Failed text:", response.text)
def _get_existing_connector_names(run_suffix: str) -> list[str]: def _get_existing_connector_names(env_name: str) -> list[str]:
url = _api_url_builder(run_suffix, "/manage/connector") url = _api_url_builder(env_name, "/manage/connector")
body = { body = {
"credential_json": {}, "credential_json": {},
@ -145,10 +148,10 @@ def _get_existing_connector_names(run_suffix: str) -> list[str]:
raise RuntimeError(response.__dict__) raise RuntimeError(response.__dict__)
def create_connector(run_suffix: str, file_paths: list[str]) -> int: def create_connector(env_name: str, file_paths: list[str]) -> int:
url = _api_url_builder(run_suffix, "/manage/admin/connector") url = _api_url_builder(env_name, "/manage/admin/connector")
connector_name = base_connector_name = "search_eval_connector" connector_name = base_connector_name = "search_eval_connector"
existing_connector_names = _get_existing_connector_names(run_suffix) existing_connector_names = _get_existing_connector_names(env_name)
count = 1 count = 1
while connector_name in existing_connector_names: while connector_name in existing_connector_names:
@ -175,8 +178,8 @@ def create_connector(run_suffix: str, file_paths: list[str]) -> int:
raise RuntimeError(response.__dict__) raise RuntimeError(response.__dict__)
def create_credential(run_suffix: str) -> int: def create_credential(env_name: str) -> int:
url = _api_url_builder(run_suffix, "/manage/credential") url = _api_url_builder(env_name, "/manage/credential")
body = { body = {
"credential_json": {}, "credential_json": {},
"admin_public": True, "admin_public": True,
@ -190,12 +193,12 @@ def create_credential(run_suffix: str) -> int:
@retry(tries=10, delay=2, backoff=2) @retry(tries=10, delay=2, backoff=2)
def upload_file(run_suffix: str, zip_file_path: str) -> list[str]: def upload_file(env_name: str, zip_file_path: str) -> list[str]:
files = [ files = [
("files", open(zip_file_path, "rb")), ("files", open(zip_file_path, "rb")),
] ]
api_path = _api_url_builder(run_suffix, "/manage/admin/connector/file/upload") api_path = _api_url_builder(env_name, "/manage/admin/connector/file/upload")
try: try:
response = requests.post(api_path, files=files) response = requests.post(api_path, files=files)
response.raise_for_status() # Raises an HTTPError for bad responses response.raise_for_status() # Raises an HTTPError for bad responses

View File

@ -67,20 +67,20 @@ def switch_to_commit(commit_sha: str) -> None:
print("Repository updated successfully.") print("Repository updated successfully.")
def get_docker_container_env_vars(suffix: str) -> dict: def get_docker_container_env_vars(env_name: str) -> dict:
""" """
Retrieves environment variables from "background" and "api_server" Docker containers. Retrieves environment variables from "background" and "api_server" Docker containers.
""" """
print(f"Getting environment variables for containers with suffix: {suffix}") print(f"Getting environment variables for containers with env_name: {env_name}")
combined_env_vars = {} combined_env_vars = {}
for container_type in ["background", "api_server"]: for container_type in ["background", "api_server"]:
container_name = _run_command( container_name = _run_command(
f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{suffix}/'" f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{env_name}/'"
)[0].strip() )[0].strip()
if not container_name: if not container_name:
raise RuntimeError( raise RuntimeError(
f"No {container_type} container found with suffix: {suffix}" f"No {container_type} container found with env_name: {env_name}"
) )
env_vars_json = _run_command( env_vars_json = _run_command(
@ -95,9 +95,9 @@ def get_docker_container_env_vars(suffix: str) -> dict:
return combined_env_vars return combined_env_vars
def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> None: def manage_data_directories(env_name: str, base_path: str, use_cloud_gpu: bool) -> None:
# Use the user's home directory as the base path # Use the user's home directory as the base path
target_path = os.path.join(os.path.expanduser(base_path), suffix) target_path = os.path.join(os.path.expanduser(base_path), env_name)
directories = { directories = {
"DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"), "DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
"DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"), "DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
@ -144,12 +144,12 @@ def _is_port_in_use(port: int) -> bool:
def start_docker_compose( def start_docker_compose(
run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False env_name: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False
) -> None: ) -> None:
print("Starting Docker Compose...") print("Starting Docker Compose...")
os.chdir(os.path.dirname(__file__)) os.chdir(os.path.dirname(__file__))
os.chdir("../../../../deployment/docker_compose/") os.chdir("../../../../deployment/docker_compose/")
command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack-{run_suffix} up -d" command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack-{env_name} up -d"
command += " --build" command += " --build"
command += " --force-recreate" command += " --force-recreate"
@ -175,17 +175,17 @@ def start_docker_compose(
print("Containers have been launched") print("Containers have been launched")
def cleanup_docker(run_suffix: str) -> None: def cleanup_docker(env_name: str) -> None:
print( print(
f"Deleting Docker containers, volumes, and networks for project suffix: {run_suffix}" f"Deleting Docker containers, volumes, and networks for project env_name: {env_name}"
) )
stdout, _ = _run_command("docker ps -a --format '{{json .}}'") stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
containers = [json.loads(line) for line in stdout.splitlines()] containers = [json.loads(line) for line in stdout.splitlines()]
if not run_suffix: if not env_name:
run_suffix = datetime.now().strftime("-%Y") env_name = datetime.now().strftime("-%Y")
project_name = f"danswer-stack{run_suffix}" project_name = f"danswer-stack{env_name}"
containers_to_delete = [ containers_to_delete = [
c for c in containers if c["Names"].startswith(project_name) c for c in containers if c["Names"].startswith(project_name)
] ]
@ -221,23 +221,23 @@ def cleanup_docker(run_suffix: str) -> None:
networks = stdout.splitlines() networks = stdout.splitlines()
networks_to_delete = [n for n in networks if run_suffix in n] networks_to_delete = [n for n in networks if env_name in n]
if not networks_to_delete: if not networks_to_delete:
print(f"No networks found containing suffix: {run_suffix}") print(f"No networks found containing env_name: {env_name}")
else: else:
network_names = " ".join(networks_to_delete) network_names = " ".join(networks_to_delete)
_run_command(f"docker network rm {network_names}") _run_command(f"docker network rm {network_names}")
print( print(
f"Successfully deleted {len(networks_to_delete)} networks containing suffix: {run_suffix}" f"Successfully deleted {len(networks_to_delete)} networks containing env_name: {env_name}"
) )
@retry(tries=5, delay=5, backoff=2) @retry(tries=5, delay=5, backoff=2)
def get_api_server_host_port(suffix: str) -> str: def get_api_server_host_port(env_name: str) -> str:
""" """
This pulls all containers with the provided suffix This pulls all containers with the provided env_name
It then grabs the JSON specific container with a name containing "api_server" It then grabs the JSON specific container with a name containing "api_server"
It then grabs the port info from the JSON and strips out the relevent data It then grabs the port info from the JSON and strips out the relevent data
""" """
@ -248,16 +248,16 @@ def get_api_server_host_port(suffix: str) -> str:
server_jsons = [] server_jsons = []
for container in containers: for container in containers:
if container_name in container["Names"] and suffix in container["Names"]: if container_name in container["Names"] and env_name in container["Names"]:
server_jsons.append(container) server_jsons.append(container)
if not server_jsons: if not server_jsons:
raise RuntimeError( raise RuntimeError(
f"No container found containing: {container_name} and {suffix}" f"No container found containing: {container_name} and {env_name}"
) )
elif len(server_jsons) > 1: elif len(server_jsons) > 1:
raise RuntimeError( raise RuntimeError(
f"Too many containers matching {container_name} found, please indicate a suffix" f"Too many containers matching {container_name} found, please indicate a env_name"
) )
server_json = server_jsons[0] server_json = server_jsons[0]
@ -278,23 +278,23 @@ def get_api_server_host_port(suffix: str) -> str:
raise RuntimeError(f"Too many ports matching {client_port} found") raise RuntimeError(f"Too many ports matching {client_port} found")
if not matching_ports: if not matching_ports:
raise RuntimeError( raise RuntimeError(
f"No port found containing: {client_port} for container: {container_name} and suffix: {suffix}" f"No port found containing: {client_port} for container: {container_name} and env_name: {env_name}"
) )
return matching_ports[0] return matching_ports[0]
# Added function to restart Vespa container # Added function to restart Vespa container
def restart_vespa_container(suffix: str) -> None: def restart_vespa_container(env_name: str) -> None:
print(f"Restarting Vespa container for suffix: {suffix}") print(f"Restarting Vespa container for env_name: {env_name}")
# Find the Vespa container # Find the Vespa container
stdout, _ = _run_command( stdout, _ = _run_command(
f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{suffix}/'" f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{env_name}/'"
) )
container_name = stdout.strip() container_name = stdout.strip()
if not container_name: if not container_name:
raise RuntimeError(f"No Vespa container found with suffix: {suffix}") raise RuntimeError(f"No Vespa container found with env_name: {env_name}")
# Restart the container # Restart the container
_run_command(f"docker restart {container_name}") _run_command(f"docker restart {container_name}")
@ -307,8 +307,8 @@ def restart_vespa_container(suffix: str) -> None:
if __name__ == "__main__": if __name__ == "__main__":
""" """
Running this just cleans up the docker environment for the container indicated by existing_test_suffix Running this just cleans up the docker environment for the container indicated by environment_name
If no existing_test_suffix is indicated, will just clean up all danswer docker containers/volumes/networks If no environment_name is indicated, will just clean up all danswer docker containers/volumes/networks
Note: vespa/postgres mounts are not deleted Note: vespa/postgres mounts are not deleted
""" """
current_dir = os.path.dirname(os.path.abspath(__file__)) current_dir = os.path.dirname(os.path.abspath(__file__))
@ -318,4 +318,4 @@ if __name__ == "__main__":
if not isinstance(config, dict): if not isinstance(config, dict):
raise TypeError("config must be a dictionary") raise TypeError("config must be a dictionary")
cleanup_docker(config["existing_test_suffix"]) cleanup_docker(config["environment_name"])

View File

@ -13,7 +13,6 @@ from tests.regression.answer_quality.api_utils import create_connector
from tests.regression.answer_quality.api_utils import create_credential from tests.regression.answer_quality.api_utils import create_credential
from tests.regression.answer_quality.api_utils import run_cc_once from tests.regression.answer_quality.api_utils import run_cc_once
from tests.regression.answer_quality.api_utils import upload_file from tests.regression.answer_quality.api_utils import upload_file
from tests.regression.answer_quality.cli_utils import restart_vespa_container
def unzip_and_get_file_paths(zip_file_path: str) -> list[str]: def unzip_and_get_file_paths(zip_file_path: str) -> list[str]:
@ -35,40 +34,37 @@ def create_temp_zip_from_files(file_paths: list[str]) -> str:
return zip_file_path return zip_file_path
def upload_test_files(zip_file_path: str, run_suffix: str) -> None: def upload_test_files(zip_file_path: str, env_name: str) -> None:
print("zip:", zip_file_path) print("zip:", zip_file_path)
file_paths = upload_file(run_suffix, zip_file_path) file_paths = upload_file(env_name, zip_file_path)
conn_id = create_connector(run_suffix, file_paths) conn_id = create_connector(env_name, file_paths)
cred_id = create_credential(run_suffix) cred_id = create_credential(env_name)
create_cc_pair(run_suffix, conn_id, cred_id) create_cc_pair(env_name, conn_id, cred_id)
run_cc_once(run_suffix, conn_id, cred_id) run_cc_once(env_name, conn_id, cred_id)
def manage_file_upload(zip_file_path: str, run_suffix: str) -> None: def manage_file_upload(zip_file_path: str, env_name: str) -> None:
unzipped_file_paths = unzip_and_get_file_paths(zip_file_path) unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)
total_file_count = len(unzipped_file_paths) total_file_count = len(unzipped_file_paths)
while True: while True:
doc_count, ongoing_index_attempts = check_indexing_status(run_suffix) doc_count, ongoing_index_attempts = check_indexing_status(env_name)
if not doc_count: if ongoing_index_attempts:
print("No docs indexed, waiting for indexing to start")
upload_test_files(zip_file_path, run_suffix)
elif ongoing_index_attempts:
print( print(
f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..." f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
) )
elif not doc_count:
print("No docs indexed, waiting for indexing to start")
upload_test_files(zip_file_path, env_name)
elif doc_count < total_file_count: elif doc_count < total_file_count:
print(f"No ongooing indexing attempts but only {doc_count} docs indexed") print(f"No ongooing indexing attempts but only {doc_count} docs indexed")
print("Restarting vespa...")
restart_vespa_container(run_suffix)
print(f"Rerunning with {total_file_count - doc_count} missing docs")
remaining_files = unzipped_file_paths[doc_count:] remaining_files = unzipped_file_paths[doc_count:]
print(f"Grabbed last {len(remaining_files)} docs to try agian") print(f"Grabbed last {len(remaining_files)} docs to try agian")
temp_zip_file_path = create_temp_zip_from_files(remaining_files) temp_zip_file_path = create_temp_zip_from_files(remaining_files)
upload_test_files(temp_zip_file_path, run_suffix) upload_test_files(temp_zip_file_path, env_name)
os.unlink(temp_zip_file_path) os.unlink(temp_zip_file_path)
else: else:
print(f"Successfully uploaded {doc_count} docs!") print(f"Successfully uploaded {doc_count} docs!")
@ -86,5 +82,5 @@ if __name__ == "__main__":
with open(config_path, "r") as file: with open(config_path, "r") as file:
config = SimpleNamespace(**yaml.safe_load(file)) config = SimpleNamespace(**yaml.safe_load(file))
file_location = config.zipped_documents_file file_location = config.zipped_documents_file
run_suffix = config.existing_test_suffix env_name = config.environment_name
manage_file_upload(file_location, run_suffix) manage_file_upload(file_location, env_name)

View File

@ -1,16 +1,12 @@
import os import os
from datetime import datetime
from types import SimpleNamespace from types import SimpleNamespace
import yaml import yaml
from tests.regression.answer_quality.cli_utils import cleanup_docker
from tests.regression.answer_quality.cli_utils import manage_data_directories from tests.regression.answer_quality.cli_utils import manage_data_directories
from tests.regression.answer_quality.cli_utils import set_env_variables from tests.regression.answer_quality.cli_utils import set_env_variables
from tests.regression.answer_quality.cli_utils import start_docker_compose from tests.regression.answer_quality.cli_utils import start_docker_compose
from tests.regression.answer_quality.cli_utils import switch_to_commit from tests.regression.answer_quality.cli_utils import switch_to_commit
from tests.regression.answer_quality.file_uploader import upload_test_files
from tests.regression.answer_quality.run_qa import run_qa_test_and_save_results
def load_config(config_filename: str) -> SimpleNamespace: def load_config(config_filename: str) -> SimpleNamespace:
@ -22,12 +18,16 @@ def load_config(config_filename: str) -> SimpleNamespace:
def main() -> None: def main() -> None:
config = load_config("search_test_config.yaml") config = load_config("search_test_config.yaml")
if config.existing_test_suffix: if config.environment_name:
run_suffix = config.existing_test_suffix env_name = config.environment_name
print("launching danswer with existing data suffix:", run_suffix) print("launching danswer with environment name:", env_name)
else: else:
run_suffix = datetime.now().strftime("-%Y%m%d-%H%M%S") print("No env name defined. Not launching docker.")
print("run_suffix:", run_suffix) print(
"Please define a name in the config yaml to start a new env "
"or use an existing env"
)
return
set_env_variables( set_env_variables(
config.model_server_ip, config.model_server_ip,
@ -35,22 +35,14 @@ def main() -> None:
config.use_cloud_gpu, config.use_cloud_gpu,
config.llm, config.llm,
) )
manage_data_directories(run_suffix, config.output_folder, config.use_cloud_gpu) manage_data_directories(env_name, config.output_folder, config.use_cloud_gpu)
if config.commit_sha: if config.commit_sha:
switch_to_commit(config.commit_sha) switch_to_commit(config.commit_sha)
start_docker_compose( start_docker_compose(
run_suffix, config.launch_web_ui, config.use_cloud_gpu, config.only_state env_name, config.launch_web_ui, config.use_cloud_gpu, config.only_state
) )
if not config.existing_test_suffix and not config.only_state:
upload_test_files(config.zipped_documents_file, run_suffix)
run_qa_test_and_save_results(run_suffix)
if config.clean_up_docker_containers:
cleanup_docker(run_suffix)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -43,12 +43,12 @@ def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
def _get_test_output_folder(config: dict) -> str: def _get_test_output_folder(config: dict) -> str:
base_output_folder = os.path.expanduser(config["output_folder"]) base_output_folder = os.path.expanduser(config["output_folder"])
if config["run_suffix"]: if config["env_name"]:
base_output_folder = os.path.join( base_output_folder = os.path.join(
base_output_folder, config["run_suffix"], "evaluations_output" base_output_folder, config["env_name"], "evaluations_output"
) )
else: else:
base_output_folder = os.path.join(base_output_folder, "no_defined_suffix") base_output_folder = os.path.join(base_output_folder, "no_defined_env_name")
counter = 1 counter = 1
output_folder_path = os.path.join(base_output_folder, "run_1") output_folder_path = os.path.join(base_output_folder, "run_1")
@ -72,12 +72,12 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]:
metadata = { metadata = {
"commit_sha": get_current_commit_sha(), "commit_sha": get_current_commit_sha(),
"run_suffix": config["run_suffix"], "env_name": config["env_name"],
"test_config": config, "test_config": config,
"number_of_questions_in_dataset": len(questions), "number_of_questions_in_dataset": len(questions),
} }
env_vars = get_docker_container_env_vars(config["run_suffix"]) env_vars = get_docker_container_env_vars(config["env_name"])
if env_vars["ENV_SEED_CONFIGURATION"]: if env_vars["ENV_SEED_CONFIGURATION"]:
del env_vars["ENV_SEED_CONFIGURATION"] del env_vars["ENV_SEED_CONFIGURATION"]
if env_vars["GPG_KEY"]: if env_vars["GPG_KEY"]:
@ -118,7 +118,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) -
context_data_list, answer = get_answer_from_query( context_data_list, answer = get_answer_from_query(
query=query, query=query,
only_retrieve_docs=config["only_retrieve_docs"], only_retrieve_docs=config["only_retrieve_docs"],
run_suffix=config["run_suffix"], env_name=config["env_name"],
) )
if not context_data_list: if not context_data_list:
@ -173,7 +173,7 @@ def _process_and_write_query_results(config: dict) -> None:
print("saved test results to folder:", test_output_folder) print("saved test results to folder:", test_output_folder)
def run_qa_test_and_save_results(run_suffix: str = "") -> None: def run_qa_test_and_save_results(env_name: str = "") -> None:
current_dir = os.path.dirname(os.path.abspath(__file__)) current_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(current_dir, "search_test_config.yaml") config_path = os.path.join(current_dir, "search_test_config.yaml")
with open(config_path, "r") as file: with open(config_path, "r") as file:
@ -182,16 +182,16 @@ def run_qa_test_and_save_results(run_suffix: str = "") -> None:
if not isinstance(config, dict): if not isinstance(config, dict):
raise TypeError("config must be a dictionary") raise TypeError("config must be a dictionary")
if not run_suffix: if not env_name:
run_suffix = config["existing_test_suffix"] env_name = config["environment_name"]
config["run_suffix"] = run_suffix config["env_name"] = env_name
_process_and_write_query_results(config) _process_and_write_query_results(config)
if __name__ == "__main__": if __name__ == "__main__":
""" """
To run a different set of questions, update the questions_file in search_test_config.yaml To run a different set of questions, update the questions_file in search_test_config.yaml
If there is more than one instance of Danswer running, specify the suffix in search_test_config.yaml If there is more than one instance of Danswer running, specify the env_name in search_test_config.yaml
""" """
run_qa_test_and_save_results() run_qa_test_and_save_results()

View File

@ -13,15 +13,9 @@ questions_file: "~/sample_questions.yaml"
# Git commit SHA to use (null means use current code as is) # Git commit SHA to use (null means use current code as is)
commit_sha: null commit_sha: null
# Whether to remove Docker containers after the test
clean_up_docker_containers: true
# Whether to launch a web UI for the test # Whether to launch a web UI for the test
launch_web_ui: false launch_web_ui: false
# Whether to only run Vespa and Postgres
only_state: false
# Only retrieve documents, not LLM response # Only retrieve documents, not LLM response
only_retrieve_docs: false only_retrieve_docs: false
@ -34,9 +28,8 @@ model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
# Port of the model server (placeholder) # Port of the model server (placeholder)
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE" model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
# Suffix for existing test results (E.g. -1234-5678) # Name for existing testing env (empty string uses default ports)
# empty string means no suffix environment_name: ""
existing_test_suffix: ""
# Limit on number of tests to run (null means no limit) # Limit on number of tests to run (null means no limit)
limit: null limit: null