some qol improvements for search pipeline (#1809)

This commit is contained in:
hagen-danswer
2024-07-11 17:42:11 -07:00
committed by GitHub
parent 3e645a510e
commit 991ee79e47
4 changed files with 21 additions and 19 deletions

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@
/deployment/data/nginx/app.conf /deployment/data/nginx/app.conf
.vscode/launch.json .vscode/launch.json
*.sw? *.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml

View File

@@ -55,21 +55,14 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
body["user"] = None body["user"] = None
try: try:
response_json = requests.post(url, headers=headers, json=body).json() response_json = requests.post(url, headers=headers, json=body).json()
content_list = [ context_data_list = response_json.get("contexts", {}).get("contexts", [])
context.get("content", "") answer = response_json.get("answer", "")
for context in response_json.get("contexts", {}).get("contexts", [])
]
answer = response_json.get("answer")
except Exception as e: except Exception as e:
print("Failed to answer the questions, trying again") print("Failed to answer the questions, trying again")
print(f"error: {str(e)}") print(f"error: {str(e)}")
raise e raise e
print("\nquery: ", query) return context_data_list, answer
print("answer: ", answer)
print("content_list: ", content_list)
return content_list, answer
def check_if_query_ready(run_suffix: str) -> bool: def check_if_query_ready(run_suffix: str) -> bool:

View File

@@ -112,7 +112,8 @@ def start_docker_compose(
run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool run_suffix: str, launch_web_ui: bool, use_cloud_gpu: bool
) -> None: ) -> None:
print("Starting Docker Compose...") print("Starting Docker Compose...")
os.chdir("../deployment/docker_compose") os.chdir(os.path.dirname(__file__))
os.chdir("../../../../deployment/docker_compose/")
command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack{run_suffix} up -d" command = f"docker compose -f docker-compose.search-testing.yml -p danswer-stack{run_suffix} up -d"
command += " --build" command += " --build"
command += " --force-recreate" command += " --force-recreate"

View File

@@ -16,26 +16,33 @@ def _get_and_write_relari_outputs(
while not check_if_query_ready(run_suffix): while not check_if_query_ready(run_suffix):
time.sleep(5) time.sleep(5)
count = 0
with open(output_file_path, "w", encoding="utf-8") as file: with open(output_file_path, "w", encoding="utf-8") as file:
for sample in samples: for sample in samples:
retrieved_context, answer = get_answer_from_query( print(f"On question number {count}")
query=sample["question"], query = sample["question"]
print(f"query: {query}")
context_data_list, answer = get_answer_from_query(
query=query,
run_suffix=run_suffix, run_suffix=run_suffix,
) )
if not answer: print(f"answer: {answer[:50]}...")
print("NO ANSWER GIVEN FOR QUESTION:", sample["question"]) if not context_data_list:
continue print("No context found")
else:
print(f"{len(context_data_list)} context docs found")
print("\n")
output = { output = {
"label": sample["uid"], "question_data": sample,
"question": sample["question"],
"answer": answer, "answer": answer,
"retrieved_context": retrieved_context, "context_data_list": context_data_list,
} }
file.write(json.dumps(output) + "\n") file.write(json.dumps(output) + "\n")
file.flush() file.flush()
count += 1
def _write_metadata_file(run_suffix: str, metadata_file_path: str) -> None: def _write_metadata_file(run_suffix: str, metadata_file_path: str) -> None: