mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-22 17:16:20 +02:00
Eval Script Incremental Write (#1784)
This commit is contained in:
@@ -15,8 +15,9 @@ This Python script automates the process of running search quality tests for a b
|
||||
## Usage
|
||||
|
||||
1. Ensure you have the required dependencies installed.
|
||||
2. Configure the `search_test_config.yaml` file with your settings.
|
||||
3. Navigate to the answer_quality folder:
|
||||
2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file.
|
||||
3. Configure the `.env_eval` file with the correct environment variables.
|
||||
4. Navigate to the answer_quality folder:
|
||||
```
|
||||
cd danswer/backend/tests/regression/answer_quality
|
||||
```
|
||||
@@ -63,6 +64,11 @@ Edit `search_test_config.yaml` to set:
|
||||
- llm
|
||||
Fill this out according to the normal LLM seeding
|
||||
|
||||
|
||||
To restart the evaluation using a particular index, set the suffix and turn off clean_up_docker_containers.
|
||||
This also will skip running the evaluation questions, in this case, the relari.py script can be run manually.
|
||||
|
||||
|
||||
Docker daemon must be running for this to work.
|
||||
|
||||
Each script is able to be individually run to upload additional docs or run additional tests
|
@@ -1,18 +1,51 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from threading import Thread
|
||||
from typing import IO
|
||||
|
||||
from retry import retry
|
||||
|
||||
|
||||
def _run_command(command: str) -> tuple[str, str]:
|
||||
def _run_command(command: str, stream_output: bool = False) -> tuple[str, str]:
|
||||
process = subprocess.Popen(
|
||||
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
)
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
stdout_lines: list[str] = []
|
||||
stderr_lines: list[str] = []
|
||||
|
||||
def process_stream(stream: IO[str], lines: list[str]) -> None:
|
||||
for line in stream:
|
||||
lines.append(line)
|
||||
if stream_output:
|
||||
print(
|
||||
line,
|
||||
end="",
|
||||
file=sys.stdout if stream == process.stdout else sys.stderr,
|
||||
)
|
||||
|
||||
stdout_thread = Thread(target=process_stream, args=(process.stdout, stdout_lines))
|
||||
stderr_thread = Thread(target=process_stream, args=(process.stderr, stderr_lines))
|
||||
|
||||
stdout_thread.start()
|
||||
stderr_thread.start()
|
||||
|
||||
stdout_thread.join()
|
||||
stderr_thread.join()
|
||||
|
||||
process.wait()
|
||||
|
||||
if process.returncode != 0:
|
||||
raise RuntimeError(f"Command failed with error: {stderr.decode()}")
|
||||
return stdout.decode(), stderr.decode()
|
||||
raise RuntimeError(f"Command failed with error: {''.join(stderr_lines)}")
|
||||
|
||||
return "".join(stdout_lines), "".join(stderr_lines)
|
||||
|
||||
|
||||
def get_current_commit_sha() -> str:
|
||||
@@ -92,8 +125,8 @@ def start_docker_compose(
|
||||
|
||||
print("Docker Command:\n", command)
|
||||
|
||||
_run_command(command)
|
||||
print("The Docker has been Composed :)")
|
||||
_run_command(command, stream_output=True)
|
||||
print("Containers have been launched")
|
||||
|
||||
|
||||
def cleanup_docker(run_suffix: str) -> None:
|
||||
|
@@ -10,51 +10,40 @@ from tests.regression.answer_quality.api_utils import get_answer_from_query
|
||||
from tests.regression.answer_quality.cli_utils import get_current_commit_sha
|
||||
|
||||
|
||||
def _get_relari_outputs(samples: list[dict], run_suffix: str) -> list[dict]:
|
||||
def _get_and_write_relari_outputs(
|
||||
samples: list[dict], run_suffix: str, output_file_path: str
|
||||
) -> None:
|
||||
while not check_if_query_ready(run_suffix):
|
||||
time.sleep(5)
|
||||
|
||||
relari_outputs = []
|
||||
for sample in samples:
|
||||
retrieved_context, answer = get_answer_from_query(
|
||||
query=sample["question"],
|
||||
run_suffix=run_suffix,
|
||||
)
|
||||
with open(output_file_path, "w", encoding="utf-8") as file:
|
||||
for sample in samples:
|
||||
retrieved_context, answer = get_answer_from_query(
|
||||
query=sample["question"],
|
||||
run_suffix=run_suffix,
|
||||
)
|
||||
|
||||
relari_outputs.append(
|
||||
{
|
||||
if not answer:
|
||||
print("NO ANSWER GIVEN FOR QUESTION:", sample["question"])
|
||||
continue
|
||||
|
||||
output = {
|
||||
"label": sample["uid"],
|
||||
"question": sample["question"],
|
||||
"answer": answer,
|
||||
"retrieved_context": retrieved_context,
|
||||
}
|
||||
)
|
||||
|
||||
return relari_outputs
|
||||
file.write(json.dumps(output) + "\n")
|
||||
file.flush()
|
||||
|
||||
|
||||
def _write_output_file(
|
||||
relari_outputs: list[dict], output_folder_path: str, run_suffix: str
|
||||
) -> None:
|
||||
def _write_metadata_file(run_suffix: str, metadata_file_path: str) -> None:
|
||||
metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix}
|
||||
|
||||
counter = 1
|
||||
output_file_path = os.path.join(output_folder_path, "results.txt")
|
||||
metadata_file_path = os.path.join(output_folder_path, "run_metadata.yaml")
|
||||
while os.path.exists(output_file_path) or os.path.exists(metadata_file_path):
|
||||
output_file_path = os.path.join(output_folder_path, f"results_{counter}.txt")
|
||||
metadata_file_path = os.path.join(
|
||||
output_folder_path, f"run_metadata_{counter}.txt"
|
||||
)
|
||||
counter += 1
|
||||
print("saving question results to:", output_file_path)
|
||||
print("saving metadata to:", metadata_file_path)
|
||||
with open(metadata_file_path, "w", encoding="utf-8") as yaml_file:
|
||||
yaml.dump(metadata, yaml_file)
|
||||
with open(output_file_path, "w", encoding="utf-8") as file:
|
||||
for output in relari_outputs:
|
||||
file.write(json.dumps(output) + "\n")
|
||||
file.flush()
|
||||
|
||||
|
||||
def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
|
||||
@@ -72,14 +61,32 @@ def answer_relari_questions(
|
||||
run_suffix: str,
|
||||
limit: int | None = None,
|
||||
) -> None:
|
||||
results_file = "run_results.jsonl"
|
||||
metadata_file = "run_metadata.yaml"
|
||||
samples = _read_questions_jsonl(questions_file_path)
|
||||
|
||||
if limit is not None:
|
||||
samples = samples[:limit]
|
||||
|
||||
relari_outputs = _get_relari_outputs(samples=samples, run_suffix=run_suffix)
|
||||
counter = 1
|
||||
output_file_path = os.path.join(results_folder_path, results_file)
|
||||
metadata_file_path = os.path.join(results_folder_path, metadata_file)
|
||||
while os.path.exists(output_file_path):
|
||||
output_file_path = os.path.join(
|
||||
results_folder_path,
|
||||
results_file.replace("run_results", f"run_results_{counter}"),
|
||||
)
|
||||
metadata_file_path = os.path.join(
|
||||
results_folder_path,
|
||||
metadata_file.replace("run_metadata", f"run_metadata_{counter}"),
|
||||
)
|
||||
counter += 1
|
||||
|
||||
_write_output_file(relari_outputs, results_folder_path, run_suffix)
|
||||
print("saving question results to:", output_file_path)
|
||||
_write_metadata_file(run_suffix, metadata_file_path)
|
||||
_get_and_write_relari_outputs(
|
||||
samples=samples, run_suffix=run_suffix, output_file_path=output_file_path
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
@@ -37,7 +37,7 @@ limit: null
|
||||
# LLM configuration
|
||||
llm:
|
||||
# Name of the LLM
|
||||
name: "llm_name"
|
||||
name: "default_test_llm"
|
||||
|
||||
# Provider of the LLM (e.g., OpenAI)
|
||||
provider: "openai"
|
||||
|
@@ -23,6 +23,7 @@ services:
|
||||
- VESPA_HOST=index
|
||||
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
||||
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
|
||||
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
@@ -52,6 +53,7 @@ services:
|
||||
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
||||
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
|
||||
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
|
||||
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
Reference in New Issue
Block a user