Use SHA instead of branch and save more data (#1850)

This commit is contained in:
hagen-danswer 2024-07-17 14:56:24 -07:00 committed by GitHub
parent 1c77395503
commit e93de602c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 63 additions and 31 deletions

View File

@ -53,8 +53,10 @@ Edit `search_test_config.yaml` to set:
- The path to the zip file containing the files you'd like to test against
- questions_file
- The path to the yaml containing the questions you'd like to test with
- branch
- Set the branch to null if you want it to just use the code as is
- commit_sha
- Set this to the SHA of the commit you want to run the test against
- You must clear all local changes if you want to use this option
- Set this to null if you want it to just use the code as is
- clean_up_docker_containers
- Set this to true to automatically delete all docker containers, networks and volumes after the test
- launch_web_ui
@ -71,7 +73,7 @@ Edit `search_test_config.yaml` to set:
- existing_test_suffix
- Use this if you would like to relaunch a previous test instance
- Input the suffix of the test you'd like to re-launch
- (E.g. to use the data from folder "test_1234_5678" put "_1234_5678")
- (E.g. to use the data from folder "test-1234-5678" put "-1234-5678")
- No new files will automatically be uploaded
- Leave empty to run a new test
- limit

View File

@ -39,7 +39,7 @@ def _create_new_chat_session(run_suffix: str) -> int:
raise RuntimeError(response_json)
@retry(tries=15, delay=10, jitter=1)
@retry(tries=10, delay=10)
def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
filters = IndexFilters(
source_type=None,
@ -81,10 +81,16 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
return simple_search_docs, answer
@retry(tries=10, delay=10)
def check_if_query_ready(run_suffix: str) -> bool:
url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/")
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
try:
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
except Exception as e:
print("Failed to check indexing status, API server is likely starting up:")
print(f"\t {str(e)}")
print("trying again")
raise e
ongoing_index_attempts = False
doc_count = 0

View File

@ -60,11 +60,10 @@ def get_current_commit_sha() -> str:
return sha
def switch_to_branch(branch: str) -> None:
print(f"Switching to branch: {branch}...")
_run_command(f"git checkout {branch}")
_run_command("git pull")
print(f"Successfully switched to branch: {branch}")
def switch_to_commit(commit_sha: str) -> None:
print(f"Switching to commit: {commit_sha}...")
_run_command(f"git checkout {commit_sha}")
print(f"Successfully switched to commit: {commit_sha}")
print("Repository updated successfully.")
@ -77,7 +76,7 @@ def get_docker_container_env_vars(suffix: str) -> dict:
combined_env_vars = {}
for container_type in ["background", "api_server"]:
container_name = _run_command(
f"docker ps -a --format '{{{{.Names}}}}' | grep '{container_type}' | grep '{suffix}'"
f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{suffix}/'"
)[0].strip()
if not container_name:
raise RuntimeError(
@ -93,7 +92,6 @@ def get_docker_container_env_vars(suffix: str) -> dict:
key, value = env_var.split("=", 1)
combined_env_vars[key] = value
print(f"Combined env variables: {combined_env_vars}")
return combined_env_vars
@ -117,8 +115,8 @@ def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) ->
os.makedirs(directory, exist_ok=True)
os.environ[env_var] = directory
print(f"Set {env_var} to: {directory}")
relari_output_path = os.path.join(target_path, "relari_output/")
os.makedirs(relari_output_path, exist_ok=True)
results_output_path = os.path.join(target_path, "evaluations_output/")
os.makedirs(results_output_path, exist_ok=True)
def set_env_variables(
@ -287,7 +285,7 @@ def is_vespa_container_healthy(suffix: str) -> bool:
# Find the Vespa container
stdout, _ = _run_command(
f"docker ps -a --format '{{{{.Names}}}}' | grep vespa | grep {suffix}"
f"docker ps -a --format '{{{{.Names}}}}' | awk /vespa/ && /{suffix}/"
)
container_name = stdout.strip()
@ -313,7 +311,7 @@ def restart_vespa_container(suffix: str) -> None:
# Find the Vespa container
stdout, _ = _run_command(
f"docker ps -a --format '{{{{.Names}}}}' | grep vespa | grep {suffix}"
f"docker ps -a --format '{{{{.Names}}}}' | awk /vespa/ && /{suffix}/"
)
container_name = stdout.strip()

View File

@ -8,7 +8,7 @@ from tests.regression.answer_quality.cli_utils import cleanup_docker
from tests.regression.answer_quality.cli_utils import manage_data_directories
from tests.regression.answer_quality.cli_utils import set_env_variables
from tests.regression.answer_quality.cli_utils import start_docker_compose
from tests.regression.answer_quality.cli_utils import switch_to_branch
from tests.regression.answer_quality.cli_utils import switch_to_commit
from tests.regression.answer_quality.file_uploader import upload_test_files
from tests.regression.answer_quality.run_qa import run_qa_test_and_save_results
@ -36,8 +36,8 @@ def main() -> None:
config.llm,
)
manage_data_directories(run_suffix, config.output_folder, config.use_cloud_gpu)
if config.branch:
switch_to_branch(config.branch)
if config.commit_sha:
switch_to_commit(config.commit_sha)
start_docker_compose(run_suffix, config.launch_web_ui, config.use_cloud_gpu)

View File

@ -1,6 +1,7 @@
import json
import multiprocessing
import os
import shutil
import time
import yaml
@ -22,12 +23,12 @@ def _populate_results_file(output_folder_path: str, all_qa_output: list[dict]) -
file.flush()
def _update_metadata_file(test_output_folder: str, count: int) -> None:
def _update_metadata_file(test_output_folder: str, invalid_answer_count: int) -> None:
metadata_path = os.path.join(test_output_folder, METADATA_FILENAME)
with open(metadata_path, "r", encoding="utf-8") as file:
metadata = yaml.safe_load(file)
metadata["number_of_questions_asked"] = count
metadata["number_of_failed_questions"] = invalid_answer_count
with open(metadata_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(metadata, yaml_file)
@ -45,7 +46,7 @@ def _get_test_output_folder(config: dict) -> str:
base_output_folder = os.path.expanduser(config["output_folder"])
if config["run_suffix"]:
base_output_folder = os.path.join(
base_output_folder, ("test" + config["run_suffix"]), "relari_output"
base_output_folder, ("test" + config["run_suffix"]), "evaluations_output"
)
else:
base_output_folder = os.path.join(base_output_folder, "no_defined_suffix")
@ -69,7 +70,9 @@ def _get_test_output_folder(config: dict) -> str:
def _initialize_files(config: dict) -> tuple[str, list[dict]]:
test_output_folder = _get_test_output_folder(config)
questions = _read_questions_jsonl(config["questions_file"])
questions_file_path = config["questions_file"]
questions = _read_questions_jsonl(questions_file_path)
metadata = {
"commit_sha": get_current_commit_sha(),
@ -91,6 +94,23 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]:
with open(metadata_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(metadata, yaml_file)
copied_questions_file_path = os.path.join(
test_output_folder, os.path.basename(questions_file_path)
)
shutil.copy2(questions_file_path, copied_questions_file_path)
zipped_files_path = config["zipped_documents_file"]
copied_zipped_documents_path = os.path.join(
test_output_folder, os.path.basename(zipped_files_path)
)
shutil.copy2(zipped_files_path, copied_zipped_documents_path)
zipped_files_folder = os.path.dirname(zipped_files_path)
jsonl_file_path = os.path.join(zipped_files_folder, "target_docs.jsonl")
if os.path.exists(jsonl_file_path):
copied_jsonl_path = os.path.join(test_output_folder, "target_docs.jsonl")
shutil.copy2(jsonl_file_path, copied_jsonl_path)
return test_output_folder, questions
@ -138,18 +158,23 @@ def _process_and_write_query_results(config: dict) -> None:
_populate_results_file(test_output_folder, results)
valid_answer_count = 0
invalid_answer_count = 0
for result in results:
if result.get("answer"):
valid_answer_count += 1
if not result.get("answer"):
invalid_answer_count += 1
_update_metadata_file(test_output_folder, valid_answer_count)
_update_metadata_file(test_output_folder, invalid_answer_count)
if invalid_answer_count:
print(f"Warning: {invalid_answer_count} questions failed!")
print("Suggest restarting the vespa container and rerunning")
time_to_finish = time.time() - start_time
minutes, seconds = divmod(int(time_to_finish), 60)
print(
f"Took {minutes:02d}:{seconds:02d} to ask and answer {len(results)} questions"
)
print("saved test results to folder:", test_output_folder)
def run_qa_test_and_save_results(run_suffix: str = "") -> None:

View File

@ -10,8 +10,8 @@ zipped_documents_file: "~/sampledocs.zip"
# Path to the YAML file containing sample questions
questions_file: "~/sample_questions.yaml"
# Git branch to use (null means use current branch as is)
branch: null
# Git commit SHA to use (null means use current code as is)
commit_sha: null
# Whether to remove Docker containers after the test
clean_up_docker_containers: true
@ -28,7 +28,8 @@ model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
# Port of the model server (placeholder)
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
# Suffix for existing test results (empty string means no suffix)
# Suffix for existing test results (E.g. -1234-5678)
# empty string means no suffix
existing_test_suffix: ""
# Limit on number of tests to run (null means no limit)