mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
Use SHA instead of branch and save more data (#1850)
This commit is contained in:
parent
1c77395503
commit
e93de602c3
@ -53,8 +53,10 @@ Edit `search_test_config.yaml` to set:
|
||||
- The path to the zip file containing the files you'd like to test against
|
||||
- questions_file
|
||||
- The path to the yaml containing the questions you'd like to test with
|
||||
- branch
|
||||
- Set the branch to null if you want it to just use the code as is
|
||||
- commit_sha
|
||||
- Set this to the SHA of the commit you want to run the test against
|
||||
- You must clear all local changes if you want to use this option
|
||||
- Set this to null if you want it to just use the code as is
|
||||
- clean_up_docker_containers
|
||||
- Set this to true to automatically delete all docker containers, networks and volumes after the test
|
||||
- launch_web_ui
|
||||
@ -71,7 +73,7 @@ Edit `search_test_config.yaml` to set:
|
||||
- existing_test_suffix
|
||||
- Use this if you would like to relaunch a previous test instance
|
||||
- Input the suffix of the test you'd like to re-launch
|
||||
- (E.g. to use the data from folder "test_1234_5678" put "_1234_5678")
|
||||
- (E.g. to use the data from folder "test-1234-5678" put "-1234-5678")
|
||||
- No new files will automatically be uploaded
|
||||
- Leave empty to run a new test
|
||||
- limit
|
||||
|
@ -39,7 +39,7 @@ def _create_new_chat_session(run_suffix: str) -> int:
|
||||
raise RuntimeError(response_json)
|
||||
|
||||
|
||||
@retry(tries=15, delay=10, jitter=1)
|
||||
@retry(tries=10, delay=10)
|
||||
def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
|
||||
filters = IndexFilters(
|
||||
source_type=None,
|
||||
@ -81,10 +81,16 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
|
||||
return simple_search_docs, answer
|
||||
|
||||
|
||||
@retry(tries=10, delay=10)
|
||||
def check_if_query_ready(run_suffix: str) -> bool:
|
||||
url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/")
|
||||
|
||||
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
|
||||
try:
|
||||
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
|
||||
except Exception as e:
|
||||
print("Failed to check indexing status, API server is likely starting up:")
|
||||
print(f"\t {str(e)}")
|
||||
print("trying again")
|
||||
raise e
|
||||
|
||||
ongoing_index_attempts = False
|
||||
doc_count = 0
|
||||
|
@ -60,11 +60,10 @@ def get_current_commit_sha() -> str:
|
||||
return sha
|
||||
|
||||
|
||||
def switch_to_branch(branch: str) -> None:
|
||||
print(f"Switching to branch: {branch}...")
|
||||
_run_command(f"git checkout {branch}")
|
||||
_run_command("git pull")
|
||||
print(f"Successfully switched to branch: {branch}")
|
||||
def switch_to_commit(commit_sha: str) -> None:
|
||||
print(f"Switching to commit: {commit_sha}...")
|
||||
_run_command(f"git checkout {commit_sha}")
|
||||
print(f"Successfully switched to commit: {commit_sha}")
|
||||
print("Repository updated successfully.")
|
||||
|
||||
|
||||
@ -77,7 +76,7 @@ def get_docker_container_env_vars(suffix: str) -> dict:
|
||||
combined_env_vars = {}
|
||||
for container_type in ["background", "api_server"]:
|
||||
container_name = _run_command(
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | grep '{container_type}' | grep '{suffix}'"
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{suffix}/'"
|
||||
)[0].strip()
|
||||
if not container_name:
|
||||
raise RuntimeError(
|
||||
@ -93,7 +92,6 @@ def get_docker_container_env_vars(suffix: str) -> dict:
|
||||
key, value = env_var.split("=", 1)
|
||||
combined_env_vars[key] = value
|
||||
|
||||
print(f"Combined env variables: {combined_env_vars}")
|
||||
return combined_env_vars
|
||||
|
||||
|
||||
@ -117,8 +115,8 @@ def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) ->
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
os.environ[env_var] = directory
|
||||
print(f"Set {env_var} to: {directory}")
|
||||
relari_output_path = os.path.join(target_path, "relari_output/")
|
||||
os.makedirs(relari_output_path, exist_ok=True)
|
||||
results_output_path = os.path.join(target_path, "evaluations_output/")
|
||||
os.makedirs(results_output_path, exist_ok=True)
|
||||
|
||||
|
||||
def set_env_variables(
|
||||
@ -287,7 +285,7 @@ def is_vespa_container_healthy(suffix: str) -> bool:
|
||||
|
||||
# Find the Vespa container
|
||||
stdout, _ = _run_command(
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | grep vespa | grep {suffix}"
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | awk /vespa/ && /{suffix}/"
|
||||
)
|
||||
container_name = stdout.strip()
|
||||
|
||||
@ -313,7 +311,7 @@ def restart_vespa_container(suffix: str) -> None:
|
||||
|
||||
# Find the Vespa container
|
||||
stdout, _ = _run_command(
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | grep vespa | grep {suffix}"
|
||||
f"docker ps -a --format '{{{{.Names}}}}' | awk /vespa/ && /{suffix}/"
|
||||
)
|
||||
container_name = stdout.strip()
|
||||
|
||||
|
@ -8,7 +8,7 @@ from tests.regression.answer_quality.cli_utils import cleanup_docker
|
||||
from tests.regression.answer_quality.cli_utils import manage_data_directories
|
||||
from tests.regression.answer_quality.cli_utils import set_env_variables
|
||||
from tests.regression.answer_quality.cli_utils import start_docker_compose
|
||||
from tests.regression.answer_quality.cli_utils import switch_to_branch
|
||||
from tests.regression.answer_quality.cli_utils import switch_to_commit
|
||||
from tests.regression.answer_quality.file_uploader import upload_test_files
|
||||
from tests.regression.answer_quality.run_qa import run_qa_test_and_save_results
|
||||
|
||||
@ -36,8 +36,8 @@ def main() -> None:
|
||||
config.llm,
|
||||
)
|
||||
manage_data_directories(run_suffix, config.output_folder, config.use_cloud_gpu)
|
||||
if config.branch:
|
||||
switch_to_branch(config.branch)
|
||||
if config.commit_sha:
|
||||
switch_to_commit(config.commit_sha)
|
||||
|
||||
start_docker_compose(run_suffix, config.launch_web_ui, config.use_cloud_gpu)
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
||||
import yaml
|
||||
@ -22,12 +23,12 @@ def _populate_results_file(output_folder_path: str, all_qa_output: list[dict]) -
|
||||
file.flush()
|
||||
|
||||
|
||||
def _update_metadata_file(test_output_folder: str, count: int) -> None:
|
||||
def _update_metadata_file(test_output_folder: str, invalid_answer_count: int) -> None:
|
||||
metadata_path = os.path.join(test_output_folder, METADATA_FILENAME)
|
||||
with open(metadata_path, "r", encoding="utf-8") as file:
|
||||
metadata = yaml.safe_load(file)
|
||||
|
||||
metadata["number_of_questions_asked"] = count
|
||||
metadata["number_of_failed_questions"] = invalid_answer_count
|
||||
with open(metadata_path, "w", encoding="utf-8") as yaml_file:
|
||||
yaml.dump(metadata, yaml_file)
|
||||
|
||||
@ -45,7 +46,7 @@ def _get_test_output_folder(config: dict) -> str:
|
||||
base_output_folder = os.path.expanduser(config["output_folder"])
|
||||
if config["run_suffix"]:
|
||||
base_output_folder = os.path.join(
|
||||
base_output_folder, ("test" + config["run_suffix"]), "relari_output"
|
||||
base_output_folder, ("test" + config["run_suffix"]), "evaluations_output"
|
||||
)
|
||||
else:
|
||||
base_output_folder = os.path.join(base_output_folder, "no_defined_suffix")
|
||||
@ -69,7 +70,9 @@ def _get_test_output_folder(config: dict) -> str:
|
||||
def _initialize_files(config: dict) -> tuple[str, list[dict]]:
|
||||
test_output_folder = _get_test_output_folder(config)
|
||||
|
||||
questions = _read_questions_jsonl(config["questions_file"])
|
||||
questions_file_path = config["questions_file"]
|
||||
|
||||
questions = _read_questions_jsonl(questions_file_path)
|
||||
|
||||
metadata = {
|
||||
"commit_sha": get_current_commit_sha(),
|
||||
@ -91,6 +94,23 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]:
|
||||
with open(metadata_path, "w", encoding="utf-8") as yaml_file:
|
||||
yaml.dump(metadata, yaml_file)
|
||||
|
||||
copied_questions_file_path = os.path.join(
|
||||
test_output_folder, os.path.basename(questions_file_path)
|
||||
)
|
||||
shutil.copy2(questions_file_path, copied_questions_file_path)
|
||||
|
||||
zipped_files_path = config["zipped_documents_file"]
|
||||
copied_zipped_documents_path = os.path.join(
|
||||
test_output_folder, os.path.basename(zipped_files_path)
|
||||
)
|
||||
shutil.copy2(zipped_files_path, copied_zipped_documents_path)
|
||||
|
||||
zipped_files_folder = os.path.dirname(zipped_files_path)
|
||||
jsonl_file_path = os.path.join(zipped_files_folder, "target_docs.jsonl")
|
||||
if os.path.exists(jsonl_file_path):
|
||||
copied_jsonl_path = os.path.join(test_output_folder, "target_docs.jsonl")
|
||||
shutil.copy2(jsonl_file_path, copied_jsonl_path)
|
||||
|
||||
return test_output_folder, questions
|
||||
|
||||
|
||||
@ -138,18 +158,23 @@ def _process_and_write_query_results(config: dict) -> None:
|
||||
|
||||
_populate_results_file(test_output_folder, results)
|
||||
|
||||
valid_answer_count = 0
|
||||
invalid_answer_count = 0
|
||||
for result in results:
|
||||
if result.get("answer"):
|
||||
valid_answer_count += 1
|
||||
if not result.get("answer"):
|
||||
invalid_answer_count += 1
|
||||
|
||||
_update_metadata_file(test_output_folder, valid_answer_count)
|
||||
_update_metadata_file(test_output_folder, invalid_answer_count)
|
||||
|
||||
if invalid_answer_count:
|
||||
print(f"Warning: {invalid_answer_count} questions failed!")
|
||||
print("Suggest restarting the vespa container and rerunning")
|
||||
|
||||
time_to_finish = time.time() - start_time
|
||||
minutes, seconds = divmod(int(time_to_finish), 60)
|
||||
print(
|
||||
f"Took {minutes:02d}:{seconds:02d} to ask and answer {len(results)} questions"
|
||||
)
|
||||
print("saved test results to folder:", test_output_folder)
|
||||
|
||||
|
||||
def run_qa_test_and_save_results(run_suffix: str = "") -> None:
|
||||
|
@ -10,8 +10,8 @@ zipped_documents_file: "~/sampledocs.zip"
|
||||
# Path to the YAML file containing sample questions
|
||||
questions_file: "~/sample_questions.yaml"
|
||||
|
||||
# Git branch to use (null means use current branch as is)
|
||||
branch: null
|
||||
# Git commit SHA to use (null means use current code as is)
|
||||
commit_sha: null
|
||||
|
||||
# Whether to remove Docker containers after the test
|
||||
clean_up_docker_containers: true
|
||||
@ -28,7 +28,8 @@ model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
|
||||
# Port of the model server (placeholder)
|
||||
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
|
||||
|
||||
# Suffix for existing test results (empty string means no suffix)
|
||||
# Suffix for existing test results (E.g. -1234-5678)
|
||||
# empty string means no suffix
|
||||
existing_test_suffix: ""
|
||||
|
||||
# Limit on number of tests to run (null means no limit)
|
||||
|
Loading…
x
Reference in New Issue
Block a user