mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-13 06:32:57 +02:00
Improve eval pipeline qol (#1908)
This commit is contained in:
@ -9,7 +9,7 @@ This Python script automates the process of running search quality tests for a b
|
|||||||
- Manages environment variables
|
- Manages environment variables
|
||||||
- Switches to specified Git branch
|
- Switches to specified Git branch
|
||||||
- Uploads test documents
|
- Uploads test documents
|
||||||
- Runs search quality tests using Relari
|
- Runs search quality tests
|
||||||
- Cleans up Docker containers (optional)
|
- Cleans up Docker containers (optional)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@ -29,9 +29,17 @@ export PYTHONPATH=$PYTHONPATH:$PWD/backend
|
|||||||
```
|
```
|
||||||
cd backend/tests/regression/answer_quality
|
cd backend/tests/regression/answer_quality
|
||||||
```
|
```
|
||||||
7. Run the script:
|
7. To launch the evaluation environment, run the launch_eval_env.py script (this step can be skipped if you are running the env outside of docker, just leave "environment_name" blank):
|
||||||
```
|
```
|
||||||
python run_eval_pipeline.py
|
python launch_eval_env.py
|
||||||
|
```
|
||||||
|
8. Run the file_uploader.py script to upload the zip files located at the path "zipped_documents_file"
|
||||||
|
```
|
||||||
|
python file_uploader.py
|
||||||
|
```
|
||||||
|
9. Run the run_qa.py script to ask questions from the jsonl located at the path "questions_file". This will hit the "query/answer-with-quote" API endpoint.
|
||||||
|
```
|
||||||
|
python run_qa.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data.
|
Note: All data will be saved even after the containers are shut down. There are instructions below to re-launching docker containers using this data.
|
||||||
@ -75,12 +83,10 @@ Edit `search_test_config.yaml` to set:
|
|||||||
- model_server_port
|
- model_server_port
|
||||||
- This is the port of the remote model server
|
- This is the port of the remote model server
|
||||||
- Only need to set this if use_cloud_gpu is true
|
- Only need to set this if use_cloud_gpu is true
|
||||||
- existing_test_suffix (THIS IS NOT A SUFFIX ANYMORE, TODO UPDATE THE DOCS HERE)
|
- environment_name
|
||||||
- Use this if you would like to relaunch a previous test instance
|
- Use this if you would like to relaunch a previous test instance
|
||||||
- Input the suffix of the test you'd like to re-launch
|
- Input the env_name of the test you'd like to re-launch
|
||||||
- (E.g. to use the data from folder "test-1234-5678" put "-1234-5678")
|
- Leave empty to launch referencing local default network locations
|
||||||
- No new files will automatically be uploaded
|
|
||||||
- Leave empty to run a new test
|
|
||||||
- limit
|
- limit
|
||||||
- Max number of questions you'd like to ask against the dataset
|
- Max number of questions you'd like to ask against the dataset
|
||||||
- Set to null for no limit
|
- Set to null for no limit
|
||||||
@ -90,7 +96,7 @@ Edit `search_test_config.yaml` to set:
|
|||||||
|
|
||||||
## Relaunching From Existing Data
|
## Relaunching From Existing Data
|
||||||
|
|
||||||
To launch an existing set of containers that has already completed indexing, set the existing_test_suffix variable. This will launch the docker containers mounted on the volumes of the indicated suffix and will not automatically index any documents or run any QA.
|
To launch an existing set of containers that has already completed indexing, set the environment_name variable. This will launch the docker containers mounted on the volumes of the indicated env_name and will not automatically index any documents or run any QA.
|
||||||
|
|
||||||
Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above).
|
Once these containers are launched you can run file_uploader.py or run_qa.py (assuming you have run the steps in the Usage section above).
|
||||||
- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path.
|
- file_uploader.py will upload and index additional zipped files located at the zipped_documents_file path.
|
||||||
|
@ -16,13 +16,16 @@ from tests.regression.answer_quality.cli_utils import get_api_server_host_port
|
|||||||
GENERAL_HEADERS = {"Content-Type": "application/json"}
|
GENERAL_HEADERS = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
def _api_url_builder(run_suffix: str, api_path: str) -> str:
|
def _api_url_builder(env_name: str, api_path: str) -> str:
|
||||||
return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path
|
if env_name:
|
||||||
|
return f"http://localhost:{get_api_server_host_port(env_name)}" + api_path
|
||||||
|
else:
|
||||||
|
return "http://localhost:8080" + api_path
|
||||||
|
|
||||||
|
|
||||||
@retry(tries=5, delay=5)
|
@retry(tries=5, delay=5)
|
||||||
def get_answer_from_query(
|
def get_answer_from_query(
|
||||||
query: str, only_retrieve_docs: bool, run_suffix: str
|
query: str, only_retrieve_docs: bool, env_name: str
|
||||||
) -> tuple[list[str], str]:
|
) -> tuple[list[str], str]:
|
||||||
filters = IndexFilters(
|
filters = IndexFilters(
|
||||||
source_type=None,
|
source_type=None,
|
||||||
@ -49,7 +52,7 @@ def get_answer_from_query(
|
|||||||
skip_gen_ai_answer_generation=only_retrieve_docs,
|
skip_gen_ai_answer_generation=only_retrieve_docs,
|
||||||
)
|
)
|
||||||
|
|
||||||
url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
|
url = _api_url_builder(env_name, "/query/answer-with-quote/")
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
@ -70,8 +73,8 @@ def get_answer_from_query(
|
|||||||
|
|
||||||
|
|
||||||
@retry(tries=10, delay=10)
|
@retry(tries=10, delay=10)
|
||||||
def check_indexing_status(run_suffix: str) -> tuple[int, bool]:
|
def check_indexing_status(env_name: str) -> tuple[int, bool]:
|
||||||
url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/")
|
url = _api_url_builder(env_name, "/manage/admin/connector/indexing-status/")
|
||||||
try:
|
try:
|
||||||
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
|
indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -99,8 +102,8 @@ def check_indexing_status(run_suffix: str) -> tuple[int, bool]:
|
|||||||
return doc_count, ongoing_index_attempts
|
return doc_count, ongoing_index_attempts
|
||||||
|
|
||||||
|
|
||||||
def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None:
|
def run_cc_once(env_name: str, connector_id: int, credential_id: int) -> None:
|
||||||
url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/")
|
url = _api_url_builder(env_name, "/manage/admin/connector/run-once/")
|
||||||
body = {
|
body = {
|
||||||
"connector_id": connector_id,
|
"connector_id": connector_id,
|
||||||
"credential_ids": [credential_id],
|
"credential_ids": [credential_id],
|
||||||
@ -115,9 +118,9 @@ def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None:
|
|||||||
print("Failed text:", response.text)
|
print("Failed text:", response.text)
|
||||||
|
|
||||||
|
|
||||||
def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> None:
|
def create_cc_pair(env_name: str, connector_id: int, credential_id: int) -> None:
|
||||||
url = _api_url_builder(
|
url = _api_url_builder(
|
||||||
run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}"
|
env_name, f"/manage/connector/{connector_id}/credential/{credential_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
body = {"name": "zip_folder_contents", "is_public": True}
|
body = {"name": "zip_folder_contents", "is_public": True}
|
||||||
@ -130,8 +133,8 @@ def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> No
|
|||||||
print("Failed text:", response.text)
|
print("Failed text:", response.text)
|
||||||
|
|
||||||
|
|
||||||
def _get_existing_connector_names(run_suffix: str) -> list[str]:
|
def _get_existing_connector_names(env_name: str) -> list[str]:
|
||||||
url = _api_url_builder(run_suffix, "/manage/connector")
|
url = _api_url_builder(env_name, "/manage/connector")
|
||||||
|
|
||||||
body = {
|
body = {
|
||||||
"credential_json": {},
|
"credential_json": {},
|
||||||
@ -145,10 +148,10 @@ def _get_existing_connector_names(run_suffix: str) -> list[str]:
|
|||||||
raise RuntimeError(response.__dict__)
|
raise RuntimeError(response.__dict__)
|
||||||
|
|
||||||
|
|
||||||
def create_connector(run_suffix: str, file_paths: list[str]) -> int:
|
def create_connector(env_name: str, file_paths: list[str]) -> int:
|
||||||
url = _api_url_builder(run_suffix, "/manage/admin/connector")
|
url = _api_url_builder(env_name, "/manage/admin/connector")
|
||||||
connector_name = base_connector_name = "search_eval_connector"
|
connector_name = base_connector_name = "search_eval_connector"
|
||||||
existing_connector_names = _get_existing_connector_names(run_suffix)
|
existing_connector_names = _get_existing_connector_names(env_name)
|
||||||
|
|
||||||
count = 1
|
count = 1
|
||||||
while connector_name in existing_connector_names:
|
while connector_name in existing_connector_names:
|
||||||
@ -175,8 +178,8 @@ def create_connector(run_suffix: str, file_paths: list[str]) -> int:
|
|||||||
raise RuntimeError(response.__dict__)
|
raise RuntimeError(response.__dict__)
|
||||||
|
|
||||||
|
|
||||||
def create_credential(run_suffix: str) -> int:
|
def create_credential(env_name: str) -> int:
|
||||||
url = _api_url_builder(run_suffix, "/manage/credential")
|
url = _api_url_builder(env_name, "/manage/credential")
|
||||||
body = {
|
body = {
|
||||||
"credential_json": {},
|
"credential_json": {},
|
||||||
"admin_public": True,
|
"admin_public": True,
|
||||||
@ -190,12 +193,12 @@ def create_credential(run_suffix: str) -> int:
|
|||||||
|
|
||||||
|
|
||||||
@retry(tries=10, delay=2, backoff=2)
|
@retry(tries=10, delay=2, backoff=2)
|
||||||
def upload_file(run_suffix: str, zip_file_path: str) -> list[str]:
|
def upload_file(env_name: str, zip_file_path: str) -> list[str]:
|
||||||
files = [
|
files = [
|
||||||
("files", open(zip_file_path, "rb")),
|
("files", open(zip_file_path, "rb")),
|
||||||
]
|
]
|
||||||
|
|
||||||
api_path = _api_url_builder(run_suffix, "/manage/admin/connector/file/upload")
|
api_path = _api_url_builder(env_name, "/manage/admin/connector/file/upload")
|
||||||
try:
|
try:
|
||||||
response = requests.post(api_path, files=files)
|
response = requests.post(api_path, files=files)
|
||||||
response.raise_for_status() # Raises an HTTPError for bad responses
|
response.raise_for_status() # Raises an HTTPError for bad responses
|
||||||
|
@ -67,20 +67,20 @@ def switch_to_commit(commit_sha: str) -> None:
|
|||||||
print("Repository updated successfully.")
|
print("Repository updated successfully.")
|
||||||
|
|
||||||
|
|
||||||
def get_docker_container_env_vars(suffix: str) -> dict:
|
def get_docker_container_env_vars(env_name: str) -> dict:
|
||||||
"""
|
"""
|
||||||
Retrieves environment variables from "background" and "api_server" Docker containers.
|
Retrieves environment variables from "background" and "api_server" Docker containers.
|
||||||
"""
|
"""
|
||||||
print(f"Getting environment variables for containers with suffix: {suffix}")
|
print(f"Getting environment variables for containers with env_name: {env_name}")
|
||||||
|
|
||||||
combined_env_vars = {}
|
combined_env_vars = {}
|
||||||
for container_type in ["background", "api_server"]:
|
for container_type in ["background", "api_server"]:
|
||||||
container_name = _run_command(
|
container_name = _run_command(
|
||||||
f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{suffix}/'"
|
f"docker ps -a --format '{{{{.Names}}}}' | awk '/{container_type}/ && /{env_name}/'"
|
||||||
)[0].strip()
|
)[0].strip()
|
||||||
if not container_name:
|
if not container_name:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"No {container_type} container found with suffix: {suffix}"
|
f"No {container_type} container found with env_name: {env_name}"
|
||||||
)
|
)
|
||||||
|
|
||||||
env_vars_json = _run_command(
|
env_vars_json = _run_command(
|
||||||
@ -95,9 +95,9 @@ def get_docker_container_env_vars(suffix: str) -> dict:
|
|||||||
return combined_env_vars
|
return combined_env_vars
|
||||||
|
|
||||||
|
|
||||||
def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> None:
|
def manage_data_directories(env_name: str, base_path: str, use_cloud_gpu: bool) -> None:
|
||||||
# Use the user's home directory as the base path
|
# Use the user's home directory as the base path
|
||||||
target_path = os.path.join(os.path.expanduser(base_path), suffix)
|
target_path = os.path.join(os.path.expanduser(base_path), env_name)
|
||||||
directories = {
|
directories = {
|
||||||
"DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
|
"DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
|
||||||
"DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
|
"DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
|
||||||
@ -144,12 +144,12 @@ def _is_port_in_use(port: int) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def start_docker_compose(
|
def start_docker_compose(
|
||||||
run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False
|
env_name: str, launch_web_ui: bool, use_cloud_gpu: bool, only_state: bool = False
|
||||||
) -> None:
|
) -> None:
|
||||||
print("Starting Docker Compose...")
|
print("Starting Docker Compose...")
|
||||||
os.chdir(os.path.dirname(__file__))
|
os.chdir(os.path.dirname(__file__))
|
||||||
os.chdir("../../../../deployment/docker_compose/")
|
os.chdir("../../../../deployment/docker_compose/")
|
||||||
command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack-{run_suffix} up -d"
|
command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack-{env_name} up -d"
|
||||||
command += " --build"
|
command += " --build"
|
||||||
command += " --force-recreate"
|
command += " --force-recreate"
|
||||||
|
|
||||||
@ -175,17 +175,17 @@ def start_docker_compose(
|
|||||||
print("Containers have been launched")
|
print("Containers have been launched")
|
||||||
|
|
||||||
|
|
||||||
def cleanup_docker(run_suffix: str) -> None:
|
def cleanup_docker(env_name: str) -> None:
|
||||||
print(
|
print(
|
||||||
f"Deleting Docker containers, volumes, and networks for project suffix: {run_suffix}"
|
f"Deleting Docker containers, volumes, and networks for project env_name: {env_name}"
|
||||||
)
|
)
|
||||||
|
|
||||||
stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
|
stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
|
||||||
|
|
||||||
containers = [json.loads(line) for line in stdout.splitlines()]
|
containers = [json.loads(line) for line in stdout.splitlines()]
|
||||||
if not run_suffix:
|
if not env_name:
|
||||||
run_suffix = datetime.now().strftime("-%Y")
|
env_name = datetime.now().strftime("-%Y")
|
||||||
project_name = f"danswer-stack{run_suffix}"
|
project_name = f"danswer-stack{env_name}"
|
||||||
containers_to_delete = [
|
containers_to_delete = [
|
||||||
c for c in containers if c["Names"].startswith(project_name)
|
c for c in containers if c["Names"].startswith(project_name)
|
||||||
]
|
]
|
||||||
@ -221,23 +221,23 @@ def cleanup_docker(run_suffix: str) -> None:
|
|||||||
|
|
||||||
networks = stdout.splitlines()
|
networks = stdout.splitlines()
|
||||||
|
|
||||||
networks_to_delete = [n for n in networks if run_suffix in n]
|
networks_to_delete = [n for n in networks if env_name in n]
|
||||||
|
|
||||||
if not networks_to_delete:
|
if not networks_to_delete:
|
||||||
print(f"No networks found containing suffix: {run_suffix}")
|
print(f"No networks found containing env_name: {env_name}")
|
||||||
else:
|
else:
|
||||||
network_names = " ".join(networks_to_delete)
|
network_names = " ".join(networks_to_delete)
|
||||||
_run_command(f"docker network rm {network_names}")
|
_run_command(f"docker network rm {network_names}")
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f"Successfully deleted {len(networks_to_delete)} networks containing suffix: {run_suffix}"
|
f"Successfully deleted {len(networks_to_delete)} networks containing env_name: {env_name}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@retry(tries=5, delay=5, backoff=2)
|
@retry(tries=5, delay=5, backoff=2)
|
||||||
def get_api_server_host_port(suffix: str) -> str:
|
def get_api_server_host_port(env_name: str) -> str:
|
||||||
"""
|
"""
|
||||||
This pulls all containers with the provided suffix
|
This pulls all containers with the provided env_name
|
||||||
It then grabs the JSON specific container with a name containing "api_server"
|
It then grabs the JSON specific container with a name containing "api_server"
|
||||||
It then grabs the port info from the JSON and strips out the relevent data
|
It then grabs the port info from the JSON and strips out the relevent data
|
||||||
"""
|
"""
|
||||||
@ -248,16 +248,16 @@ def get_api_server_host_port(suffix: str) -> str:
|
|||||||
server_jsons = []
|
server_jsons = []
|
||||||
|
|
||||||
for container in containers:
|
for container in containers:
|
||||||
if container_name in container["Names"] and suffix in container["Names"]:
|
if container_name in container["Names"] and env_name in container["Names"]:
|
||||||
server_jsons.append(container)
|
server_jsons.append(container)
|
||||||
|
|
||||||
if not server_jsons:
|
if not server_jsons:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"No container found containing: {container_name} and {suffix}"
|
f"No container found containing: {container_name} and {env_name}"
|
||||||
)
|
)
|
||||||
elif len(server_jsons) > 1:
|
elif len(server_jsons) > 1:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Too many containers matching {container_name} found, please indicate a suffix"
|
f"Too many containers matching {container_name} found, please indicate a env_name"
|
||||||
)
|
)
|
||||||
server_json = server_jsons[0]
|
server_json = server_jsons[0]
|
||||||
|
|
||||||
@ -278,23 +278,23 @@ def get_api_server_host_port(suffix: str) -> str:
|
|||||||
raise RuntimeError(f"Too many ports matching {client_port} found")
|
raise RuntimeError(f"Too many ports matching {client_port} found")
|
||||||
if not matching_ports:
|
if not matching_ports:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"No port found containing: {client_port} for container: {container_name} and suffix: {suffix}"
|
f"No port found containing: {client_port} for container: {container_name} and env_name: {env_name}"
|
||||||
)
|
)
|
||||||
return matching_ports[0]
|
return matching_ports[0]
|
||||||
|
|
||||||
|
|
||||||
# Added function to restart Vespa container
|
# Added function to restart Vespa container
|
||||||
def restart_vespa_container(suffix: str) -> None:
|
def restart_vespa_container(env_name: str) -> None:
|
||||||
print(f"Restarting Vespa container for suffix: {suffix}")
|
print(f"Restarting Vespa container for env_name: {env_name}")
|
||||||
|
|
||||||
# Find the Vespa container
|
# Find the Vespa container
|
||||||
stdout, _ = _run_command(
|
stdout, _ = _run_command(
|
||||||
f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{suffix}/'"
|
f"docker ps -a --format '{{{{.Names}}}}' | awk '/index-1/ && /{env_name}/'"
|
||||||
)
|
)
|
||||||
container_name = stdout.strip()
|
container_name = stdout.strip()
|
||||||
|
|
||||||
if not container_name:
|
if not container_name:
|
||||||
raise RuntimeError(f"No Vespa container found with suffix: {suffix}")
|
raise RuntimeError(f"No Vespa container found with env_name: {env_name}")
|
||||||
|
|
||||||
# Restart the container
|
# Restart the container
|
||||||
_run_command(f"docker restart {container_name}")
|
_run_command(f"docker restart {container_name}")
|
||||||
@ -307,8 +307,8 @@ def restart_vespa_container(suffix: str) -> None:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""
|
"""
|
||||||
Running this just cleans up the docker environment for the container indicated by existing_test_suffix
|
Running this just cleans up the docker environment for the container indicated by environment_name
|
||||||
If no existing_test_suffix is indicated, will just clean up all danswer docker containers/volumes/networks
|
If no environment_name is indicated, will just clean up all danswer docker containers/volumes/networks
|
||||||
Note: vespa/postgres mounts are not deleted
|
Note: vespa/postgres mounts are not deleted
|
||||||
"""
|
"""
|
||||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
@ -318,4 +318,4 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
if not isinstance(config, dict):
|
if not isinstance(config, dict):
|
||||||
raise TypeError("config must be a dictionary")
|
raise TypeError("config must be a dictionary")
|
||||||
cleanup_docker(config["existing_test_suffix"])
|
cleanup_docker(config["environment_name"])
|
||||||
|
@ -13,7 +13,6 @@ from tests.regression.answer_quality.api_utils import create_connector
|
|||||||
from tests.regression.answer_quality.api_utils import create_credential
|
from tests.regression.answer_quality.api_utils import create_credential
|
||||||
from tests.regression.answer_quality.api_utils import run_cc_once
|
from tests.regression.answer_quality.api_utils import run_cc_once
|
||||||
from tests.regression.answer_quality.api_utils import upload_file
|
from tests.regression.answer_quality.api_utils import upload_file
|
||||||
from tests.regression.answer_quality.cli_utils import restart_vespa_container
|
|
||||||
|
|
||||||
|
|
||||||
def unzip_and_get_file_paths(zip_file_path: str) -> list[str]:
|
def unzip_and_get_file_paths(zip_file_path: str) -> list[str]:
|
||||||
@ -35,40 +34,37 @@ def create_temp_zip_from_files(file_paths: list[str]) -> str:
|
|||||||
return zip_file_path
|
return zip_file_path
|
||||||
|
|
||||||
|
|
||||||
def upload_test_files(zip_file_path: str, run_suffix: str) -> None:
|
def upload_test_files(zip_file_path: str, env_name: str) -> None:
|
||||||
print("zip:", zip_file_path)
|
print("zip:", zip_file_path)
|
||||||
file_paths = upload_file(run_suffix, zip_file_path)
|
file_paths = upload_file(env_name, zip_file_path)
|
||||||
|
|
||||||
conn_id = create_connector(run_suffix, file_paths)
|
conn_id = create_connector(env_name, file_paths)
|
||||||
cred_id = create_credential(run_suffix)
|
cred_id = create_credential(env_name)
|
||||||
|
|
||||||
create_cc_pair(run_suffix, conn_id, cred_id)
|
create_cc_pair(env_name, conn_id, cred_id)
|
||||||
run_cc_once(run_suffix, conn_id, cred_id)
|
run_cc_once(env_name, conn_id, cred_id)
|
||||||
|
|
||||||
|
|
||||||
def manage_file_upload(zip_file_path: str, run_suffix: str) -> None:
|
def manage_file_upload(zip_file_path: str, env_name: str) -> None:
|
||||||
unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)
|
unzipped_file_paths = unzip_and_get_file_paths(zip_file_path)
|
||||||
total_file_count = len(unzipped_file_paths)
|
total_file_count = len(unzipped_file_paths)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
doc_count, ongoing_index_attempts = check_indexing_status(run_suffix)
|
doc_count, ongoing_index_attempts = check_indexing_status(env_name)
|
||||||
|
|
||||||
if not doc_count:
|
if ongoing_index_attempts:
|
||||||
print("No docs indexed, waiting for indexing to start")
|
|
||||||
upload_test_files(zip_file_path, run_suffix)
|
|
||||||
elif ongoing_index_attempts:
|
|
||||||
print(
|
print(
|
||||||
f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
|
f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
|
||||||
)
|
)
|
||||||
|
elif not doc_count:
|
||||||
|
print("No docs indexed, waiting for indexing to start")
|
||||||
|
upload_test_files(zip_file_path, env_name)
|
||||||
elif doc_count < total_file_count:
|
elif doc_count < total_file_count:
|
||||||
print(f"No ongooing indexing attempts but only {doc_count} docs indexed")
|
print(f"No ongooing indexing attempts but only {doc_count} docs indexed")
|
||||||
print("Restarting vespa...")
|
|
||||||
restart_vespa_container(run_suffix)
|
|
||||||
print(f"Rerunning with {total_file_count - doc_count} missing docs")
|
|
||||||
remaining_files = unzipped_file_paths[doc_count:]
|
remaining_files = unzipped_file_paths[doc_count:]
|
||||||
print(f"Grabbed last {len(remaining_files)} docs to try agian")
|
print(f"Grabbed last {len(remaining_files)} docs to try agian")
|
||||||
temp_zip_file_path = create_temp_zip_from_files(remaining_files)
|
temp_zip_file_path = create_temp_zip_from_files(remaining_files)
|
||||||
upload_test_files(temp_zip_file_path, run_suffix)
|
upload_test_files(temp_zip_file_path, env_name)
|
||||||
os.unlink(temp_zip_file_path)
|
os.unlink(temp_zip_file_path)
|
||||||
else:
|
else:
|
||||||
print(f"Successfully uploaded {doc_count} docs!")
|
print(f"Successfully uploaded {doc_count} docs!")
|
||||||
@ -86,5 +82,5 @@ if __name__ == "__main__":
|
|||||||
with open(config_path, "r") as file:
|
with open(config_path, "r") as file:
|
||||||
config = SimpleNamespace(**yaml.safe_load(file))
|
config = SimpleNamespace(**yaml.safe_load(file))
|
||||||
file_location = config.zipped_documents_file
|
file_location = config.zipped_documents_file
|
||||||
run_suffix = config.existing_test_suffix
|
env_name = config.environment_name
|
||||||
manage_file_upload(file_location, run_suffix)
|
manage_file_upload(file_location, env_name)
|
||||||
|
@ -1,16 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
from datetime import datetime
|
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from tests.regression.answer_quality.cli_utils import cleanup_docker
|
|
||||||
from tests.regression.answer_quality.cli_utils import manage_data_directories
|
from tests.regression.answer_quality.cli_utils import manage_data_directories
|
||||||
from tests.regression.answer_quality.cli_utils import set_env_variables
|
from tests.regression.answer_quality.cli_utils import set_env_variables
|
||||||
from tests.regression.answer_quality.cli_utils import start_docker_compose
|
from tests.regression.answer_quality.cli_utils import start_docker_compose
|
||||||
from tests.regression.answer_quality.cli_utils import switch_to_commit
|
from tests.regression.answer_quality.cli_utils import switch_to_commit
|
||||||
from tests.regression.answer_quality.file_uploader import upload_test_files
|
|
||||||
from tests.regression.answer_quality.run_qa import run_qa_test_and_save_results
|
|
||||||
|
|
||||||
|
|
||||||
def load_config(config_filename: str) -> SimpleNamespace:
|
def load_config(config_filename: str) -> SimpleNamespace:
|
||||||
@ -22,12 +18,16 @@ def load_config(config_filename: str) -> SimpleNamespace:
|
|||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
config = load_config("search_test_config.yaml")
|
config = load_config("search_test_config.yaml")
|
||||||
if config.existing_test_suffix:
|
if config.environment_name:
|
||||||
run_suffix = config.existing_test_suffix
|
env_name = config.environment_name
|
||||||
print("launching danswer with existing data suffix:", run_suffix)
|
print("launching danswer with environment name:", env_name)
|
||||||
else:
|
else:
|
||||||
run_suffix = datetime.now().strftime("-%Y%m%d-%H%M%S")
|
print("No env name defined. Not launching docker.")
|
||||||
print("run_suffix:", run_suffix)
|
print(
|
||||||
|
"Please define a name in the config yaml to start a new env "
|
||||||
|
"or use an existing env"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
set_env_variables(
|
set_env_variables(
|
||||||
config.model_server_ip,
|
config.model_server_ip,
|
||||||
@ -35,22 +35,14 @@ def main() -> None:
|
|||||||
config.use_cloud_gpu,
|
config.use_cloud_gpu,
|
||||||
config.llm,
|
config.llm,
|
||||||
)
|
)
|
||||||
manage_data_directories(run_suffix, config.output_folder, config.use_cloud_gpu)
|
manage_data_directories(env_name, config.output_folder, config.use_cloud_gpu)
|
||||||
if config.commit_sha:
|
if config.commit_sha:
|
||||||
switch_to_commit(config.commit_sha)
|
switch_to_commit(config.commit_sha)
|
||||||
|
|
||||||
start_docker_compose(
|
start_docker_compose(
|
||||||
run_suffix, config.launch_web_ui, config.use_cloud_gpu, config.only_state
|
env_name, config.launch_web_ui, config.use_cloud_gpu, config.only_state
|
||||||
)
|
)
|
||||||
|
|
||||||
if not config.existing_test_suffix and not config.only_state:
|
|
||||||
upload_test_files(config.zipped_documents_file, run_suffix)
|
|
||||||
|
|
||||||
run_qa_test_and_save_results(run_suffix)
|
|
||||||
|
|
||||||
if config.clean_up_docker_containers:
|
|
||||||
cleanup_docker(run_suffix)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@ -43,12 +43,12 @@ def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
|
|||||||
|
|
||||||
def _get_test_output_folder(config: dict) -> str:
|
def _get_test_output_folder(config: dict) -> str:
|
||||||
base_output_folder = os.path.expanduser(config["output_folder"])
|
base_output_folder = os.path.expanduser(config["output_folder"])
|
||||||
if config["run_suffix"]:
|
if config["env_name"]:
|
||||||
base_output_folder = os.path.join(
|
base_output_folder = os.path.join(
|
||||||
base_output_folder, config["run_suffix"], "evaluations_output"
|
base_output_folder, config["env_name"], "evaluations_output"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
base_output_folder = os.path.join(base_output_folder, "no_defined_suffix")
|
base_output_folder = os.path.join(base_output_folder, "no_defined_env_name")
|
||||||
|
|
||||||
counter = 1
|
counter = 1
|
||||||
output_folder_path = os.path.join(base_output_folder, "run_1")
|
output_folder_path = os.path.join(base_output_folder, "run_1")
|
||||||
@ -72,12 +72,12 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]:
|
|||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"commit_sha": get_current_commit_sha(),
|
"commit_sha": get_current_commit_sha(),
|
||||||
"run_suffix": config["run_suffix"],
|
"env_name": config["env_name"],
|
||||||
"test_config": config,
|
"test_config": config,
|
||||||
"number_of_questions_in_dataset": len(questions),
|
"number_of_questions_in_dataset": len(questions),
|
||||||
}
|
}
|
||||||
|
|
||||||
env_vars = get_docker_container_env_vars(config["run_suffix"])
|
env_vars = get_docker_container_env_vars(config["env_name"])
|
||||||
if env_vars["ENV_SEED_CONFIGURATION"]:
|
if env_vars["ENV_SEED_CONFIGURATION"]:
|
||||||
del env_vars["ENV_SEED_CONFIGURATION"]
|
del env_vars["ENV_SEED_CONFIGURATION"]
|
||||||
if env_vars["GPG_KEY"]:
|
if env_vars["GPG_KEY"]:
|
||||||
@ -118,7 +118,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) -
|
|||||||
context_data_list, answer = get_answer_from_query(
|
context_data_list, answer = get_answer_from_query(
|
||||||
query=query,
|
query=query,
|
||||||
only_retrieve_docs=config["only_retrieve_docs"],
|
only_retrieve_docs=config["only_retrieve_docs"],
|
||||||
run_suffix=config["run_suffix"],
|
env_name=config["env_name"],
|
||||||
)
|
)
|
||||||
|
|
||||||
if not context_data_list:
|
if not context_data_list:
|
||||||
@ -173,7 +173,7 @@ def _process_and_write_query_results(config: dict) -> None:
|
|||||||
print("saved test results to folder:", test_output_folder)
|
print("saved test results to folder:", test_output_folder)
|
||||||
|
|
||||||
|
|
||||||
def run_qa_test_and_save_results(run_suffix: str = "") -> None:
|
def run_qa_test_and_save_results(env_name: str = "") -> None:
|
||||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
config_path = os.path.join(current_dir, "search_test_config.yaml")
|
config_path = os.path.join(current_dir, "search_test_config.yaml")
|
||||||
with open(config_path, "r") as file:
|
with open(config_path, "r") as file:
|
||||||
@ -182,16 +182,16 @@ def run_qa_test_and_save_results(run_suffix: str = "") -> None:
|
|||||||
if not isinstance(config, dict):
|
if not isinstance(config, dict):
|
||||||
raise TypeError("config must be a dictionary")
|
raise TypeError("config must be a dictionary")
|
||||||
|
|
||||||
if not run_suffix:
|
if not env_name:
|
||||||
run_suffix = config["existing_test_suffix"]
|
env_name = config["environment_name"]
|
||||||
|
|
||||||
config["run_suffix"] = run_suffix
|
config["env_name"] = env_name
|
||||||
_process_and_write_query_results(config)
|
_process_and_write_query_results(config)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""
|
"""
|
||||||
To run a different set of questions, update the questions_file in search_test_config.yaml
|
To run a different set of questions, update the questions_file in search_test_config.yaml
|
||||||
If there is more than one instance of Danswer running, specify the suffix in search_test_config.yaml
|
If there is more than one instance of Danswer running, specify the env_name in search_test_config.yaml
|
||||||
"""
|
"""
|
||||||
run_qa_test_and_save_results()
|
run_qa_test_and_save_results()
|
||||||
|
@ -13,15 +13,9 @@ questions_file: "~/sample_questions.yaml"
|
|||||||
# Git commit SHA to use (null means use current code as is)
|
# Git commit SHA to use (null means use current code as is)
|
||||||
commit_sha: null
|
commit_sha: null
|
||||||
|
|
||||||
# Whether to remove Docker containers after the test
|
|
||||||
clean_up_docker_containers: true
|
|
||||||
|
|
||||||
# Whether to launch a web UI for the test
|
# Whether to launch a web UI for the test
|
||||||
launch_web_ui: false
|
launch_web_ui: false
|
||||||
|
|
||||||
# Whether to only run Vespa and Postgres
|
|
||||||
only_state: false
|
|
||||||
|
|
||||||
# Only retrieve documents, not LLM response
|
# Only retrieve documents, not LLM response
|
||||||
only_retrieve_docs: false
|
only_retrieve_docs: false
|
||||||
|
|
||||||
@ -34,9 +28,8 @@ model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
|
|||||||
# Port of the model server (placeholder)
|
# Port of the model server (placeholder)
|
||||||
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
|
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
|
||||||
|
|
||||||
# Suffix for existing test results (E.g. -1234-5678)
|
# Name for existing testing env (empty string uses default ports)
|
||||||
# empty string means no suffix
|
environment_name: ""
|
||||||
existing_test_suffix: ""
|
|
||||||
|
|
||||||
# Limit on number of tests to run (null means no limit)
|
# Limit on number of tests to run (null means no limit)
|
||||||
limit: null
|
limit: null
|
||||||
|
Reference in New Issue
Block a user