Added search quality testing pipeline (#1774)

This commit is contained in:
hagen-danswer
2024-07-06 11:51:50 -07:00
committed by GitHub
parent de4d8e9a65
commit ac14369716
11 changed files with 1123 additions and 115 deletions

View File

@ -152,7 +152,7 @@ def add_credential_to_connector(
credential_id: int, credential_id: int,
cc_pair_name: str | None, cc_pair_name: str | None,
is_public: bool, is_public: bool,
user: User, user: User | None,
db_session: Session, db_session: Session,
) -> StatusResponse[int]: ) -> StatusResponse[int]:
connector = fetch_connector_by_id(connector_id, db_session) connector = fetch_connector_by_id(connector_id, db_session)

View File

@ -119,6 +119,7 @@ def _does_document_exist(
chunk. This checks for whether the chunk exists already in the index""" chunk. This checks for whether the chunk exists already in the index"""
doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
doc_fetch_response = http_client.get(doc_url) doc_fetch_response = http_client.get(doc_url)
if doc_fetch_response.status_code == 404: if doc_fetch_response.status_code == 404:
return False return False

View File

@ -86,7 +86,9 @@ def load_vespa(filename: str) -> None:
new_doc = json.loads(line.strip()) new_doc = json.loads(line.strip())
doc_id = new_doc["update"].split("::")[-1] doc_id = new_doc["update"].split("::")[-1]
response = requests.post( response = requests.post(
DOCUMENT_ID_ENDPOINT + "/" + doc_id, headers=headers, json=new_doc DOCUMENT_ID_ENDPOINT + "/" + doc_id,
headers=headers,
json=new_doc,
) )
response.raise_for_status() response.raise_for_status()

View File

@ -0,0 +1,68 @@
# Search Quality Test Script
This Python script automates the process of running search quality tests for a backend system.
## Features
- Loads configuration from a YAML file
- Sets up Docker environment
- Manages environment variables
- Switches to specified Git branch
- Uploads test documents
- Runs search quality tests using Relari
- Cleans up Docker containers (optional)
## Usage
1. Ensure you have the required dependencies installed.
2. Configure the `search_test_config.yaml` file with your settings.
3. Navigate to the answer_quality folder:
```
cd danswer/backend/tests/regression/answer_quality
```
4. Run the script:
```
python search_quality_test.py
```
## Configuration
Edit `search_test_config.yaml` to set:
- output_folder
This is the folder where the folders for each test will go
These folders will contain the postgres/vespa data as well as the results for each test
- zipped_documents_file
The path to the zip file containing the files you'd like to test against
- questions_file
The path to the yaml containing the questions you'd like to test with
- branch
Set the branch to null if you want it to just use the code as is
- clean_up_docker_containers
Set this to true to automatically delete all docker containers, networks and volumes after the test
- launch_web_ui
Set this to true if you want to use the UI during/after the testing process
- use_cloud_gpu
Set to true or false depending on if you want to use the remote gpu
Only need to set this if use_cloud_gpu is true
- model_server_ip
This is the ip of the remote model server
Only need to set this if use_cloud_gpu is true
- model_server_port
This is the port of the remote model server
Only need to set this if use_cloud_gpu is true
- existing_test_suffix
Use this if you would like to relaunch a previous test instance
Input the suffix of the test you'd like to re-launch
(E.g. to use the data from folder "test_1234_5678" put "_1234_5678")
No new files will automatically be uploaded
Leave empty to run a new test
- limit
Max number of questions you'd like to ask against the dataset
Set to null for no limit
- llm
Fill this out according to the normal LLM seeding
Docker daemon must be running for this to work.
Each script is able to be individually run to upload additional docs or run additional tests

View File

@ -0,0 +1,220 @@
import requests
from retry import retry
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import MessageType
from danswer.connectors.models import InputType
from danswer.db.enums import IndexingStatus
from danswer.one_shot_answer.models import DirectQARequest
from danswer.one_shot_answer.models import ThreadMessage
from danswer.search.models import IndexFilters
from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails
from danswer.server.documents.models import ConnectorBase
from tests.regression.answer_quality.cli_utils import (
get_api_server_host_port,
)
def _api_url_builder(run_suffix: str, api_path: str) -> str:
return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path
@retry(tries=5, delay=2, backoff=2)
def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
filters = IndexFilters(
source_type=None,
document_set=None,
time_cutoff=None,
tags=None,
access_control_list=None,
)
messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
new_message_request = DirectQARequest(
messages=messages,
prompt_id=0,
persona_id=0,
retrieval_options=RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
real_time=True,
filters=filters,
enable_auto_detect_filters=False,
),
chain_of_thought=False,
return_contexts=True,
)
url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
headers = {
"Content-Type": "application/json",
}
body = new_message_request.dict()
body["user"] = None
try:
response_json = requests.post(url, headers=headers, json=body).json()
content_list = [
context.get("content", "")
for context in response_json.get("contexts", {}).get("contexts", [])
]
answer = response_json.get("answer")
except Exception as e:
print("Failed to answer the questions, trying again")
print(f"error: {str(e)}")
raise e
print("\nquery: ", query)
print("answer: ", answer)
print("content_list: ", content_list)
return content_list, answer
def check_if_query_ready(run_suffix: str) -> bool:
url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/")
headers = {
"Content-Type": "application/json",
}
indexing_status_dict = requests.get(url, headers=headers).json()
ongoing_index_attempts = False
doc_count = 0
for index_attempt in indexing_status_dict:
status = index_attempt["last_status"]
if status == IndexingStatus.IN_PROGRESS or status == IndexingStatus.NOT_STARTED:
ongoing_index_attempts = True
doc_count += index_attempt["docs_indexed"]
if not doc_count:
print("No docs indexed, waiting for indexing to start")
elif ongoing_index_attempts:
print(
f"{doc_count} docs indexed but waiting for ongoing indexing jobs to finish..."
)
return doc_count > 0 and not ongoing_index_attempts
def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None:
url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/")
headers = {
"Content-Type": "application/json",
}
body = {
"connector_id": connector_id,
"credential_ids": [credential_id],
"from_beginning": True,
}
print("body:", body)
response = requests.post(url, headers=headers, json=body)
if response.status_code == 200:
print("Connector created successfully:", response.json())
else:
print("Failed status_code:", response.status_code)
print("Failed text:", response.text)
def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> None:
url = _api_url_builder(
run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}"
)
headers = {
"Content-Type": "application/json",
}
body = {"name": "zip_folder_contents", "is_public": True}
print("body:", body)
response = requests.put(url, headers=headers, json=body)
if response.status_code == 200:
print("Connector created successfully:", response.json())
else:
print("Failed status_code:", response.status_code)
print("Failed text:", response.text)
def _get_existing_connector_names(run_suffix: str) -> list[str]:
url = _api_url_builder(run_suffix, "/manage/connector")
headers = {
"Content-Type": "application/json",
}
body = {
"credential_json": {},
"admin_public": True,
}
response = requests.get(url, headers=headers, json=body)
if response.status_code == 200:
connectors = response.json()
return [connector["name"] for connector in connectors]
else:
raise RuntimeError(response.__dict__)
def create_connector(run_suffix: str, file_paths: list[str]) -> int:
url = _api_url_builder(run_suffix, "/manage/admin/connector")
headers = {
"Content-Type": "application/json",
}
connector_name = base_connector_name = "search_eval_connector"
existing_connector_names = _get_existing_connector_names(run_suffix)
count = 1
while connector_name in existing_connector_names:
connector_name = base_connector_name + "_" + str(count)
count += 1
connector = ConnectorBase(
name=connector_name,
source=DocumentSource.FILE,
input_type=InputType.LOAD_STATE,
connector_specific_config={"file_locations": file_paths},
refresh_freq=None,
prune_freq=None,
disabled=False,
)
body = connector.dict()
print("body:", body)
response = requests.post(url, headers=headers, json=body)
if response.status_code == 200:
print("Connector created successfully:", response.json())
return response.json()["id"]
else:
raise RuntimeError(response.__dict__)
def create_credential(run_suffix: str) -> int:
url = _api_url_builder(run_suffix, "/manage/credential")
headers = {
"Content-Type": "application/json",
}
body = {
"credential_json": {},
"admin_public": True,
}
response = requests.post(url, headers=headers, json=body)
if response.status_code == 200:
print("credential created successfully:", response.json())
return response.json()["id"]
else:
raise RuntimeError(response.__dict__)
@retry(tries=10, delay=2, backoff=2)
def upload_file(run_suffix: str, zip_file_path: str) -> list[str]:
files = [
("files", open(zip_file_path, "rb")),
]
api_path = _api_url_builder(run_suffix, "/manage/admin/connector/file/upload")
try:
response = requests.post(api_path, files=files)
response.raise_for_status() # Raises an HTTPError for bad responses
print("file uploaded successfully:", response.json())
return response.json()["file_paths"]
except Exception as e:
print("File upload failed, waiting for API server to come up and trying again")
raise e

View File

@ -0,0 +1,203 @@
import json
import os
import subprocess
from retry import retry
def _run_command(command: str) -> tuple[str, str]:
process = subprocess.Popen(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
if process.returncode != 0:
raise RuntimeError(f"Command failed with error: {stderr.decode()}")
return stdout.decode(), stderr.decode()
def get_current_commit_sha() -> str:
print("Getting current commit SHA...")
stdout, _ = _run_command("git rev-parse HEAD")
sha = stdout.strip()
print(f"Current commit SHA: {sha}")
return sha
def switch_to_branch(branch: str) -> None:
print(f"Switching to branch: {branch}...")
_run_command(f"git checkout {branch}")
_run_command("git pull")
print(f"Successfully switched to branch: {branch}")
print("Repository updated successfully.")
def manage_data_directories(suffix: str, base_path: str, use_cloud_gpu: bool) -> str:
# Use the user's home directory as the base path
target_path = os.path.join(os.path.expanduser(base_path), f"test{suffix}")
directories = {
"DANSWER_POSTGRES_DATA_DIR": os.path.join(target_path, "postgres/"),
"DANSWER_VESPA_DATA_DIR": os.path.join(target_path, "vespa/"),
}
if not use_cloud_gpu:
directories["DANSWER_INDEX_MODEL_CACHE_DIR"] = os.path.join(
target_path, "index_model_cache/"
)
directories["DANSWER_INFERENCE_MODEL_CACHE_DIR"] = os.path.join(
target_path, "inference_model_cache/"
)
# Create directories if they don't exist
for env_var, directory in directories.items():
os.makedirs(directory, exist_ok=True)
os.environ[env_var] = directory
print(f"Set {env_var} to: {directory}")
relari_output_path = os.path.join(target_path, "relari_output/")
os.makedirs(relari_output_path, exist_ok=True)
return relari_output_path
def set_env_variables(
remote_server_ip: str,
remote_server_port: str,
use_cloud_gpu: bool,
llm_config: dict,
) -> None:
env_vars: dict = {}
env_vars["ENV_SEED_CONFIGURATION"] = json.dumps({"llms": [llm_config]})
env_vars["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "true"
if use_cloud_gpu:
env_vars["MODEL_SERVER_HOST"] = remote_server_ip
env_vars["MODEL_SERVER_PORT"] = remote_server_port
for env_var_name, env_var in env_vars.items():
os.environ[env_var_name] = env_var
print(f"Set {env_var_name} to: {env_var}")
def start_docker_compose(
run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool
) -> None:
print("Starting Docker Compose...")
os.chdir(os.path.expanduser("~/danswer/deployment/docker_compose"))
command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack{run_suffix} up -d"
command += " --build"
command += " --pull always"
command += " --force-recreate"
if not launch_web_ui:
command += " --scale web_server=0"
command += " --scale nginx=0"
if use_cloud_gpu:
command += " --scale indexing_model_server=0"
command += " --scale inference_model_server=0"
print("Docker Command:\n", command)
_run_command(command)
print("The Docker has been Composed :)")
def cleanup_docker(run_suffix: str) -> None:
print(
f"Deleting Docker containers, volumes, and networks for project suffix: {run_suffix}"
)
stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
containers = [json.loads(line) for line in stdout.splitlines()]
project_name = f"danswer-stack{run_suffix}"
containers_to_delete = [
c for c in containers if c["Names"].startswith(project_name)
]
if not containers_to_delete:
print(f"No containers found for project: {project_name}")
else:
container_ids = " ".join([c["ID"] for c in containers_to_delete])
_run_command(f"docker rm -f {container_ids}")
print(
f"Successfully deleted {len(containers_to_delete)} containers for project: {project_name}"
)
stdout, _ = _run_command("docker volume ls --format '{{.Name}}'")
volumes = stdout.splitlines()
volumes_to_delete = [v for v in volumes if v.startswith(project_name)]
if not volumes_to_delete:
print(f"No volumes found for project: {project_name}")
return
# Delete filtered volumes
volume_names = " ".join(volumes_to_delete)
_run_command(f"docker volume rm {volume_names}")
print(
f"Successfully deleted {len(volumes_to_delete)} volumes for project: {project_name}"
)
stdout, _ = _run_command("docker network ls --format '{{.Name}}'")
networks = stdout.splitlines()
networks_to_delete = [n for n in networks if run_suffix in n]
if not networks_to_delete:
print(f"No networks found containing suffix: {run_suffix}")
else:
network_names = " ".join(networks_to_delete)
_run_command(f"docker network rm {network_names}")
print(
f"Successfully deleted {len(networks_to_delete)} networks containing suffix: {run_suffix}"
)
@retry(tries=5, delay=5, backoff=2)
def get_api_server_host_port(suffix: str) -> str:
"""
This pulls all containers with the provided suffix
It then grabs the JSON specific container with a name containing "api_server"
It then grabs the port info from the JSON and strips out the relevent data
"""
container_name = "api_server"
stdout, _ = _run_command("docker ps -a --format '{{json .}}'")
containers = [json.loads(line) for line in stdout.splitlines()]
server_jsons = []
for container in containers:
if container_name in container["Names"] and suffix in container["Names"]:
server_jsons.append(container)
if not server_jsons:
raise RuntimeError(
f"No container found containing: {container_name} and {suffix}"
)
elif len(server_jsons) > 1:
raise RuntimeError(
f"Too many containers matching {container_name} found, please indicate a suffix"
)
server_json = server_jsons[0]
# This is in case the api_server has multiple ports
client_port = "8080"
ports = server_json.get("Ports", "")
port_infos = ports.split(",") if ports else []
port_dict = {}
for port_info in port_infos:
port_arr = port_info.split(":")[-1].split("->") if port_info else []
if len(port_arr) == 2:
port_dict[port_arr[1]] = port_arr[0]
# Find the host port where client_port is in the key
matching_ports = [value for key, value in port_dict.items() if client_port in key]
if len(matching_ports) > 1:
raise RuntimeError(f"Too many ports matching {client_port} found")
if not matching_ports:
raise RuntimeError(
f"No port found containing: {client_port} for container: {container_name} and suffix: {suffix}"
)
return matching_ports[0]

View File

@ -0,0 +1,31 @@
import os
from types import SimpleNamespace
import yaml
from tests.regression.answer_quality.api_utils import create_cc_pair
from tests.regression.answer_quality.api_utils import create_connector
from tests.regression.answer_quality.api_utils import create_credential
from tests.regression.answer_quality.api_utils import run_cc_once
from tests.regression.answer_quality.api_utils import upload_file
def upload_test_files(zip_file_path: str, run_suffix: str) -> None:
print("zip:", zip_file_path)
file_paths = upload_file(run_suffix, zip_file_path)
conn_id = create_connector(run_suffix, file_paths)
cred_id = create_credential(run_suffix)
create_cc_pair(run_suffix, conn_id, cred_id)
run_cc_once(run_suffix, conn_id, cred_id)
if __name__ == "__main__":
current_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(current_dir, "search_test_config.yaml")
with open(config_path, "r") as file:
config = SimpleNamespace(**yaml.safe_load(file))
file_location = config.zipped_documents_file
run_suffix = config.existing_test_suffix
upload_test_files(file_location, run_suffix)

View File

@ -1,138 +1,108 @@
import argparse
import json import json
import os
import time
from types import SimpleNamespace
from sqlalchemy.orm import Session import yaml
from danswer.configs.constants import MessageType from tests.regression.answer_quality.api_utils import check_if_query_ready
from danswer.db.engine import get_sqlalchemy_engine from tests.regression.answer_quality.api_utils import get_answer_from_query
from danswer.one_shot_answer.answer_question import get_search_answer from tests.regression.answer_quality.cli_utils import get_current_commit_sha
from danswer.one_shot_answer.models import DirectQARequest
from danswer.one_shot_answer.models import OneShotQAResponse
from danswer.one_shot_answer.models import ThreadMessage
from danswer.search.models import IndexFilters
from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails
def get_answer_for_question(query: str, db_session: Session) -> OneShotQAResponse: def _get_relari_outputs(samples: list[dict], run_suffix: str) -> list[dict]:
filters = IndexFilters( while not check_if_query_ready(run_suffix):
source_type=None, time.sleep(5)
document_set=None,
time_cutoff=None,
tags=None,
access_control_list=None,
)
messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
new_message_request = DirectQARequest(
messages=messages,
prompt_id=0,
persona_id=0,
retrieval_options=RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
real_time=True,
filters=filters,
enable_auto_detect_filters=False,
),
chain_of_thought=False,
return_contexts=True,
)
answer = get_search_answer(
query_req=new_message_request,
user=None,
max_document_tokens=None,
max_history_tokens=None,
db_session=db_session,
answer_generation_timeout=100,
enable_reflexion=False,
bypass_acl=True,
)
return answer
def read_questions(questions_file_path: str) -> list[dict]:
samples = []
with open(questions_file_path, "r", encoding="utf-8") as file:
for line in file:
sample = json.loads(line.strip())
samples.append(sample)
return samples
def get_relari_outputs(samples: list[dict]) -> list[dict]:
relari_outputs = [] relari_outputs = []
with Session(get_sqlalchemy_engine(), expire_on_commit=False) as db_session: for sample in samples:
for sample in samples: retrieved_context, answer = get_answer_from_query(
answer = get_answer_for_question( query=sample["question"],
query=sample["question"], db_session=db_session run_suffix=run_suffix,
) )
assert answer.contexts
relari_outputs.append( relari_outputs.append(
{ {
"label": sample["uid"], "label": sample["uid"],
"question": sample["question"], "question": sample["question"],
"answer": answer.answer, "answer": answer,
"retrieved_context": [ "retrieved_context": retrieved_context,
context.content for context in answer.contexts.contexts }
], )
}
)
return relari_outputs return relari_outputs
def write_output_file(relari_outputs: list[dict], output_file: str) -> None: def _write_output_file(
with open(output_file, "w", encoding="utf-8") as file: relari_outputs: list[dict], output_folder_path: str, run_suffix: str
) -> None:
metadata = {"commit_sha": get_current_commit_sha(), "run_suffix": run_suffix}
counter = 1
output_file_path = os.path.join(output_folder_path, "results.txt")
metadata_file_path = os.path.join(output_folder_path, "run_metadata.yaml")
while os.path.exists(output_file_path) or os.path.exists(metadata_file_path):
output_file_path = os.path.join(output_folder_path, f"results_{counter}.txt")
metadata_file_path = os.path.join(
output_folder_path, f"run_metadata_{counter}.txt"
)
counter += 1
print("saving question results to:", output_file_path)
print("saving metadata to:", metadata_file_path)
with open(metadata_file_path, "w", encoding="utf-8") as yaml_file:
yaml.dump(metadata, yaml_file)
with open(output_file_path, "w", encoding="utf-8") as file:
for output in relari_outputs: for output in relari_outputs:
file.write(json.dumps(output) + "\n") file.write(json.dumps(output) + "\n")
file.flush()
def main(questions_file: str, output_file: str, limit: int | None = None) -> None: def _read_questions_jsonl(questions_file_path: str) -> list[dict]:
samples = read_questions(questions_file) questions = []
with open(questions_file_path, "r") as file:
for line in file:
json_obj = json.loads(line)
questions.append(json_obj)
return questions
def answer_relari_questions(
questions_file_path: str,
results_folder_path: str,
run_suffix: str,
limit: int | None = None,
) -> None:
samples = _read_questions_jsonl(questions_file_path)
if limit is not None: if limit is not None:
samples = samples[:limit] samples = samples[:limit]
# Use to be in this format but has since changed relari_outputs = _get_relari_outputs(samples=samples, run_suffix=run_suffix)
# response_dict = {
# "question": sample["question"],
# "retrieved_contexts": [
# context.content for context in answer.contexts.contexts
# ],
# "ground_truth_contexts": sample["ground_truth_contexts"],
# "answer": answer.answer,
# "ground_truths": sample["ground_truths"],
# }
relari_outputs = get_relari_outputs(samples=samples) _write_output_file(relari_outputs, results_folder_path, run_suffix)
write_output_file(relari_outputs, output_file)
def main() -> None:
current_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(current_dir, "search_test_config.yaml")
with open(config_path, "r") as file:
config = SimpleNamespace(**yaml.safe_load(file))
current_output_folder = os.path.expanduser(config.output_folder)
if config.existing_test_suffix:
current_output_folder = os.path.join(
current_output_folder, "test" + config.existing_test_suffix, "relari_output"
)
else:
current_output_folder = os.path.join(current_output_folder, "no_defined_suffix")
answer_relari_questions(
config.questions_file,
current_output_folder,
config.existing_test_suffix,
config.limit,
)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() main()
parser.add_argument(
"--questions_file",
type=str,
help="Path to the Relari questions file.",
default="./tests/regression/answer_quality/combined_golden_dataset.jsonl",
)
parser.add_argument(
"--output_file",
type=str,
help="Path to the output results file.",
default="./tests/regression/answer_quality/relari_results.txt",
)
parser.add_argument(
"--limit",
type=int,
default=None,
help="Limit the number of examples to process.",
)
args = parser.parse_args()
main(args.questions_file, args.output_file, args.limit)

View File

@ -0,0 +1,58 @@
import os
from datetime import datetime
from types import SimpleNamespace
import yaml
from tests.regression.answer_quality.cli_utils import cleanup_docker
from tests.regression.answer_quality.cli_utils import manage_data_directories
from tests.regression.answer_quality.cli_utils import set_env_variables
from tests.regression.answer_quality.cli_utils import start_docker_compose
from tests.regression.answer_quality.cli_utils import switch_to_branch
from tests.regression.answer_quality.file_uploader import upload_test_files
from tests.regression.answer_quality.relari import answer_relari_questions
def load_config(config_filename: str) -> SimpleNamespace:
current_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(current_dir, config_filename)
with open(config_path, "r") as file:
return SimpleNamespace(**yaml.safe_load(file))
def main() -> None:
config = load_config("search_test_config.yaml")
if config.existing_test_suffix:
run_suffix = config.existing_test_suffix
print("launching danswer with existing data suffix:", run_suffix)
else:
run_suffix = datetime.now().strftime("_%Y%m%d_%H%M%S")
print("run_suffix:", run_suffix)
set_env_variables(
config.model_server_ip,
config.model_server_port,
config.use_cloud_gpu,
config.llm,
)
relari_output_folder_path = manage_data_directories(
run_suffix, config.output_folder, config.use_cloud_gpu
)
if config.branch:
switch_to_branch(config.branch)
start_docker_compose(run_suffix, config.launch_web_ui, config.use_cloud_gpu)
if not config.existing_test_suffix:
upload_test_files(config.zipped_documents_file, run_suffix)
answer_relari_questions(
config.questions_file, relari_output_folder_path, run_suffix, config.limit
)
if config.clean_up_docker_containers:
cleanup_docker(run_suffix)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,49 @@
# Directory where test results will be saved
output_folder: "~/danswer_test_results"
# Path to the zip file containing sample documents
zipped_documents_file: "~/sampledocs.zip"
# Path to the YAML file containing sample questions
questions_file: "~/sample_questions.yaml"
# Git branch to use (null means use current branch as is)
branch: null
# Whether to remove Docker containers after the test
clean_up_docker_containers: true
# Whether to launch a web UI for the test
launch_web_ui: false
# Whether to use a cloud GPU for processing
use_cloud_gpu: false
# IP address of the model server (placeholder)
model_server_ip: "PUT_PUBLIC_CLOUD_IP_HERE"
# Port of the model server (placeholder)
model_server_port: "PUT_PUBLIC_CLOUD_PORT_HERE"
# Suffix for existing test results (empty string means no suffix)
existing_test_suffix: ""
# Limit on number of tests to run (null means no limit)
limit: null
# LLM configuration
llm:
# Name of the LLM
name: "llm_name"
# Provider of the LLM (e.g., OpenAI)
provider: "openai"
# API key
api_key: "PUT_API_KEY_HERE"
# Default model name to use
default_model_name: "gpt-4o"
# List of model names to use for testing
model_names: ["gpt-4o"]

View File

@ -0,0 +1,406 @@
version: '3'
services:
api_server:
image: danswer/danswer-backend:latest
build:
context: ../../backend
dockerfile: Dockerfile
command: >
/bin/sh -c "alembic upgrade head &&
echo \"Starting Danswer Api Server\" &&
uvicorn danswer.main:app --host 0.0.0.0 --port 8080"
depends_on:
- relational_db
- index
# - inference_model_server
restart: always
ports:
- "8080"
environment:
# Auth Settings
- AUTH_TYPE=${AUTH_TYPE:-disabled}
- SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400}
- ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
- VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}
- GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
- GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
- REQUIRE_EMAIL_VERIFICATION=${REQUIRE_EMAIL_VERIFICATION:-}
- SMTP_SERVER=${SMTP_SERVER:-} # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
- SMTP_PORT=${SMTP_PORT:-587} # For sending verification emails, if unspecified then defaults to '587'
- SMTP_USER=${SMTP_USER:-}
- SMTP_PASS=${SMTP_PASS:-}
- EMAIL_FROM=${EMAIL_FROM:-}
- OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}
- OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}
- OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}
# Gen AI Settings
- GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-}
- GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-}
- FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-}
- GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
- GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
- QA_TIMEOUT=${QA_TIMEOUT:-}
- MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
- DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-}
- DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
- LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
- BING_API_KEY=${BING_API_KEY:-}
# if set, allows for the use of the token budget system
- TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
# Enables the use of bedrock models
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
- AWS_REGION_NAME=${AWS_REGION_NAME:-}
# Query Options
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
- ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-}
- ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_ALLOWED_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
# Leave this on pretty please? Nothing sensitive is collected!
# https://docs.danswer.dev/more/telemetry
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
- LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs
- LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # Log all of the prompts to the LLM
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
- LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
# Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
# Seeding configuration
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
extra_hosts:
- "host.docker.internal:host-gateway"
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
background:
image: danswer/danswer-backend:latest
build:
context: ../../backend
dockerfile: Dockerfile
command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
depends_on:
- relational_db
- index
# - inference_model_server
# - indexing_model_server
restart: always
environment:
- ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
# Gen AI Settings (Needed by DanswerBot)
- GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-}
- GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-}
- FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-}
- GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
- GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-}
- GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-}
- GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-}
- GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
- QA_TIMEOUT=${QA_TIMEOUT:-}
- MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
- DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-}
- DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-}
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
- GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
- LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
- BING_API_KEY=${BING_API_KEY:-}
# Query Options
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services
- POSTGRES_HOST=relational_db
- POSTGRES_USER=${POSTGRES_USER:-}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- VESPA_HOST=index
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} # Needed by DanswerBot
- ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_ALLOWED_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-}
# Indexing Configs
- NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
- ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
- DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
- DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-}
- CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}
- EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-}
- CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}
- JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}
- WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}
- JIRA_API_VERSION=${JIRA_API_VERSION:-}
- GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
- NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
- GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
# Danswer SlackBot Configs
- DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-}
- DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-}
- DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-}
- DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-}
- DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-}
- DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-}
- DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused
- NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}
- DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-}
- DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-}
# Logging
# Leave this on pretty please? Nothing sensitive is collected!
# https://docs.danswer.dev/more/telemetry
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
- LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs
- LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # Log all of the prompts to the LLM
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
# Enterprise Edition stuff
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
extra_hosts:
- "host.docker.internal:host-gateway"
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
web_server:
image: danswer/danswer-web-server:latest
build:
context: ../../web
dockerfile: Dockerfile
args:
- NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false}
- NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false}
- NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
- NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
- NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
# Enterprise Edition only
- NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
# DO NOT TURN ON unless you have EXPLICIT PERMISSION from Danswer.
- NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
depends_on:
- api_server
restart: always
environment:
- INTERNAL_URL=http://api_server:8080
- WEB_DOMAIN=${WEB_DOMAIN:-}
- THEME_IS_DARK=${THEME_IS_DARK:-}
# Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
inference_model_server:
image: danswer/danswer-model-server:latest
build:
context: ../../backend
dockerfile: Dockerfile.model_server
command: >
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
echo 'Skipping service...';
exit 0;
else
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
fi"
restart: on-failure
environment:
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
# volumes:
# Not necessary, this is just to reduce download time during startup
- inference_model_cache_huggingface:/root/.cache/huggingface/
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
indexing_model_server:
image: danswer/danswer-model-server:latest
build:
context: ../../backend
dockerfile: Dockerfile.model_server
command: >
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
echo 'Skipping service...';
exit 0;
else
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
fi"
restart: on-failure
environment:
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
- INDEXING_ONLY=True
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
# volumes:
# Not necessary, this is just to reduce download time during startup
- index_model_cache_huggingface:/root/.cache/huggingface/
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
relational_db:
image: postgres:15.2-alpine
restart: always
environment:
- POSTGRES_USER=${POSTGRES_USER:-postgres}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
ports:
- "5432"
volumes:
- db_volume:/var/lib/postgresql/data
# This container name cannot have an underscore in it due to Vespa expectations of the URL
index:
image: vespaengine/vespa:8.277.17
restart: always
ports:
- "19071"
- "8081"
volumes:
- vespa_volume:/opt/vespa/var
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
nginx:
image: nginx:1.23.4-alpine
restart: always
# nginx will immediately crash with `nginx: [emerg] host not found in upstream`
# if api_server / web_server are not up
depends_on:
- api_server
- web_server
environment:
- DOMAIN=localhost
ports:
- "80:80"
- "3000:80" # allow for localhost:3000 usage, since that is the norm
volumes:
- ../data/nginx:/etc/nginx/conf.d
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
# the specified script waits for the api_server to start up.
# Without this we've seen issues where nginx shows no error logs but
# does not recieve any traffic
# NOTE: we have to use dos2unix to remove Carriage Return chars from the file
# in order to make this work on both Unix-like systems and windows
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
# volumes:
# db_volume:
# vespa_volume:
# # Created by the container itself
# model_cache_huggingface:
volumes:
db_volume:
driver: local
driver_opts:
type: none
o: bind
device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}
vespa_volume:
driver: local
driver_opts:
type: none
o: bind
device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
model_cache_huggingface:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_MODEL_CACHE_DIR:-./model_cache}
# index_model_cache_huggingface:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_INDEX_MODEL_CACHE_DIR:-./model_cache}
# inference_model_cache_huggingface:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_INFERENCE_MODEL_CACHE_DIR:-./model_cache}
# volumes:
# db_volume:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_POSTGRES_DATA_DIR:-./postgres_data}
# vespa_volume:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
# model_cache_huggingface:
# driver: local
# driver_opts:
# type: none
# o: bind
# device: ${DANSWER_MODEL_CACHE_DIR:-./model_cache}