mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 13:25:50 +02:00
Eval Script Incremental Write (#1784)
This commit is contained in:
@@ -15,8 +15,9 @@ This Python script automates the process of running search quality tests for a b
|
|||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
1. Ensure you have the required dependencies installed.
|
1. Ensure you have the required dependencies installed.
|
||||||
2. Configure the `search_test_config.yaml` file with your settings.
|
2. Configure the `search_test_config.yaml` file based on the `search_test_config.yaml.template` file.
|
||||||
3. Navigate to the answer_quality folder:
|
3. Configure the `.env_eval` file with the correct environment variables.
|
||||||
|
4. Navigate to the answer_quality folder:
|
||||||
```
|
```
|
||||||
cd danswer/backend/tests/regression/answer_quality
|
cd danswer/backend/tests/regression/answer_quality
|
||||||
```
|
```
|
||||||
@@ -63,6 +64,11 @@ Edit `search_test_config.yaml` to set:
|
|||||||
- llm
|
- llm
|
||||||
Fill this out according to the normal LLM seeding
|
Fill this out according to the normal LLM seeding
|
||||||
|
|
||||||
|
|
||||||
|
To restart the evaluation using a particular index, set the suffix and turn off clean_up_docker_containers.
|
||||||
|
This also will skip running the evaluation questions, in this case, the relari.py script can be run manually.
|
||||||
|
|
||||||
|
|
||||||
Docker daemon must be running for this to work.
|
Docker daemon must be running for this to work.
|
||||||
|
|
||||||
Each script is able to be individually run to upload additional docs or run additional tests
|
Each script is able to be individually run to upload additional docs or run additional tests
|
@@ -1,18 +1,51 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from threading import Thread
|
||||||
|
from typing import IO
|
||||||
|
|
||||||
from retry import retry
|
from retry import retry
|
||||||
|
|
||||||
|
|
||||||
def _run_command(command: str) -> tuple[str, str]:
|
def _run_command(command: str, stream_output: bool = False) -> tuple[str, str]:
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
command,
|
||||||
|
shell=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
text=True,
|
||||||
|
bufsize=1,
|
||||||
)
|
)
|
||||||
stdout, stderr = process.communicate()
|
|
||||||
|
stdout_lines: list[str] = []
|
||||||
|
stderr_lines: list[str] = []
|
||||||
|
|
||||||
|
def process_stream(stream: IO[str], lines: list[str]) -> None:
|
||||||
|
for line in stream:
|
||||||
|
lines.append(line)
|
||||||
|
if stream_output:
|
||||||
|
print(
|
||||||
|
line,
|
||||||
|
end="",
|
||||||
|
file=sys.stdout if stream == process.stdout else sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_thread = Thread(target=process_stream, args=(process.stdout, stdout_lines))
|
||||||
|
stderr_thread = Thread(target=process_stream, args=(process.stderr, stderr_lines))
|
||||||
|
|
||||||
|
stdout_thread.start()
|
||||||
|
stderr_thread.start()
|
||||||
|
|
||||||
|
stdout_thread.join()
|
||||||
|
stderr_thread.join()
|
||||||
|
|
||||||
|
process.wait()
|
||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
raise RuntimeError(f"Command failed with error: {stderr.decode()}")
|
raise RuntimeError(f"Command failed with error: {''.join(stderr_lines)}")
|
||||||
return stdout.decode(), stderr.decode()
|
|
||||||
|
return "".join(stdout_lines), "".join(stderr_lines)
|
||||||
|
|
||||||
|
|
||||||
def get_current_commit_sha() -> str:
|
def get_current_commit_sha() -> str:
|
||||||
@@ -92,8 +125,8 @@ def start_docker_compose(
|
|||||||
|
|
||||||
print("Docker Command:\n", command)
|
print("Docker Command:\n", command)
|
||||||
|
|
||||||
_run_command(command)
|
_run_command(command, stream_output=True)
|
||||||
print("The Docker has been Composed :)")
|
print("Containers have been launched")
|
||||||
|
|
||||||
|
|
||||||
def cleanup_docker(run_suffix: str) -> None:
|
def cleanup_docker(run_suffix: str) -> None:
|
||||||
|
@@ -10,51 +10,40 @@ from tests.regression.answer_quality.api_utils import get_answer_from_query
|
|||||||
from tests.regression.answer_quality.cli_utils import get_current_commit_sha
|
from tests.regression.answer_quality.cli_utils import get_current_commit_sha
|
||||||
|
|
||||||
|
|
||||||
def _get_relari_outputs(samples: list[dict], run_suffix: str) -> list[dict]:
|
def _get_and_write_relari_outputs(
|
||||||
|
samples: list[dict], run_suffix: str, output_file_path: str
|
||||||
|
) -> None:
|
||||||
while not check_if_query_ready(run_suffix):
|
while not check_if_query_ready(run_suffix):
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
relari_outputs = []
|
with open(output_file_path, "w", encoding="utf-8") as file:
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
retrieved_context, answer = get_answer_from_query(
|
retrieved_context, answer = get_answer_from_query(
|
||||||
query=sample["question"],
|
query=sample["question"],
|
||||||
run_suffix=run_suffix,
|
run_suffix=run_suffix,
|
||||||
)
|
)
|
||||||
|
|
||||||
relari_outputs.append(
|
if not answer:
|
||||||
{
|
print("NO ANSWER GIVEN FOR QUESTION:", sample["question"])
|
||||||
|
continue
|
||||||
|
|
||||||
|
output = {
|
||||||
"label": sample["uid"],
|
"label": sample["uid"],
|
||||||
"question": sample["question"],
|
"question": sample["question"],
|
||||||
"answer": answer,
|
"answer": answer,
|
||||||
"retrieved_context": retrieved_context,
|
"retrieved_context": retrieved_context,
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
|
||||||
return relari_outputs
|
file.write(json.dumps(output) + "\n")
|
||||||
|
file.flush()
|
||||||
|
|
||||||
|
|
||||||
def _write_output_file(
|
def _write_metadata_file(run_suffix: str, metadata_file_path: str) -> None:
|
||||||
relari_outputs: list[dict], output_folder_path: str, run_suffix: str
|
|
||||||
) -> None:
|
|
||||||
metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix}
|
metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix}
|
||||||
|
|
||||||
counter = 1
|
|
||||||
output_file_path = os.path.join(output_folder_path, "results.txt")
|
|
||||||
metadata_file_path = os.path.join(output_folder_path, "run_metadata.yaml")
|
|
||||||
while os.path.exists(output_file_path) or os.path.exists(metadata_file_path):
|
|
||||||
output_file_path = os.path.join(output_folder_path, f"results_{counter}.txt")
|
|
||||||
metadata_file_path = os.path.join(
|
|
||||||
output_folder_path, f"run_metadata_{counter}.txt"
|
|
||||||
)
|
|
||||||
counter += 1
|
|
||||||
print("saving question results to:", output_file_path)
|
|
||||||
print("saving metadata to:", metadata_file_path)
|
print("saving metadata to:", metadata_file_path)
|
||||||
with open(metadata_file_path, "w", encoding="utf-8") as yaml_file:
|
with open(metadata_file_path, "w", encoding="utf-8") as yaml_file:
|
||||||
yaml.dump(metadata, yaml_file)
|
yaml.dump(metadata, yaml_file)
|
||||||
with open(output_file_path, "w", encoding="utf-8") as file:
|
|
||||||
for output in relari_outputs:
|
|
||||||
file.write(json.dumps(output) + "\n")
|
|
||||||
file.flush()
|
|
||||||
|
|
||||||
|
|
||||||
def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
|
def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
|
||||||
@@ -72,14 +61,32 @@ def answer_relari_questions(
|
|||||||
run_suffix: str,
|
run_suffix: str,
|
||||||
limit: int | None = None,
|
limit: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
results_file = "run_results.jsonl"
|
||||||
|
metadata_file = "run_metadata.yaml"
|
||||||
samples = _read_questions_jsonl(questions_file_path)
|
samples = _read_questions_jsonl(questions_file_path)
|
||||||
|
|
||||||
if limit is not None:
|
if limit is not None:
|
||||||
samples = samples[:limit]
|
samples = samples[:limit]
|
||||||
|
|
||||||
relari_outputs = _get_relari_outputs(samples=samples, run_suffix=run_suffix)
|
counter = 1
|
||||||
|
output_file_path = os.path.join(results_folder_path, results_file)
|
||||||
|
metadata_file_path = os.path.join(results_folder_path, metadata_file)
|
||||||
|
while os.path.exists(output_file_path):
|
||||||
|
output_file_path = os.path.join(
|
||||||
|
results_folder_path,
|
||||||
|
results_file.replace("run_results", f"run_results_{counter}"),
|
||||||
|
)
|
||||||
|
metadata_file_path = os.path.join(
|
||||||
|
results_folder_path,
|
||||||
|
metadata_file.replace("run_metadata", f"run_metadata_{counter}"),
|
||||||
|
)
|
||||||
|
counter += 1
|
||||||
|
|
||||||
_write_output_file(relari_outputs, results_folder_path, run_suffix)
|
print("saving question results to:", output_file_path)
|
||||||
|
_write_metadata_file(run_suffix, metadata_file_path)
|
||||||
|
_get_and_write_relari_outputs(
|
||||||
|
samples=samples, run_suffix=run_suffix, output_file_path=output_file_path
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
@@ -37,7 +37,7 @@ limit: null
|
|||||||
# LLM configuration
|
# LLM configuration
|
||||||
llm:
|
llm:
|
||||||
# Name of the LLM
|
# Name of the LLM
|
||||||
name: "llm_name"
|
name: "default_test_llm"
|
||||||
|
|
||||||
# Provider of the LLM (e.g., OpenAI)
|
# Provider of the LLM (e.g., OpenAI)
|
||||||
provider: "openai"
|
provider: "openai"
|
||||||
|
@@ -23,6 +23,7 @@ services:
|
|||||||
- VESPA_HOST=index
|
- VESPA_HOST=index
|
||||||
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
||||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
||||||
|
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
|
||||||
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
@@ -52,6 +53,7 @@ services:
|
|||||||
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
||||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
||||||
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
|
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
|
||||||
|
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
|
||||||
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
Reference in New Issue
Block a user