diff --git a/backend/ee/danswer/server/query_and_chat/chat_backend.py b/backend/ee/danswer/server/query_and_chat/chat_backend.py index 1d2402600..5f393ffdd 100644 --- a/backend/ee/danswer/server/query_and_chat/chat_backend.py +++ b/backend/ee/danswer/server/query_and_chat/chat_backend.py @@ -33,6 +33,7 @@ def translate_doc_response_to_simple_doc( ) -> list[SimpleDoc]: return [ SimpleDoc( + id=doc.document_id, semantic_identifier=doc.semantic_identifier, link=doc.link, blurb=doc.blurb, diff --git a/backend/ee/danswer/server/query_and_chat/models.py b/backend/ee/danswer/server/query_and_chat/models.py index c34a1aea2..b0116f804 100644 --- a/backend/ee/danswer/server/query_and_chat/models.py +++ b/backend/ee/danswer/server/query_and_chat/models.py @@ -44,6 +44,7 @@ class BasicCreateChatMessageRequest(ChunkContext): class SimpleDoc(BaseModel): + id: str semantic_identifier: str link: str | None blurb: str diff --git a/backend/tests/regression/answer_quality/api_utils.py b/backend/tests/regression/answer_quality/api_utils.py index 8d02d2ca9..56529e9fe 100644 --- a/backend/tests/regression/answer_quality/api_utils.py +++ b/backend/tests/regression/answer_quality/api_utils.py @@ -2,23 +2,43 @@ import requests from retry import retry from danswer.configs.constants import DocumentSource -from danswer.configs.constants import MessageType from danswer.connectors.models import InputType from danswer.db.enums import IndexingStatus -from danswer.one_shot_answer.models import DirectQARequest -from danswer.one_shot_answer.models import ThreadMessage from danswer.search.models import IndexFilters from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.server.documents.models import ConnectorBase +from danswer.server.query_and_chat.models import ChatSessionCreationRequest +from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest from tests.regression.answer_quality.cli_utils import get_api_server_host_port -from tests.regression.answer_quality.cli_utils import restart_vespa_container + +GENERAL_HEADERS = {"Content-Type": "application/json"} def _api_url_builder(run_suffix: str, api_path: str) -> str: return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path +def _create_new_chat_session(run_suffix: str) -> int: + create_chat_request = ChatSessionCreationRequest( + persona_id=0, + description=None, + ) + body = create_chat_request.dict() + + create_chat_url = _api_url_builder(run_suffix, "/chat/create-chat-session/") + + response_json = requests.post( + create_chat_url, headers=GENERAL_HEADERS, json=body + ).json() + chat_session_id = response_json.get("chat_session_id") + + if isinstance(chat_session_id, int): + return chat_session_id + else: + raise RuntimeError(response_json) + + @retry(tries=15, delay=10, jitter=1) def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: filters = IndexFilters( @@ -28,51 +48,43 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: tags=None, access_control_list=None, ) - - messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)] - - new_message_request = DirectQARequest( - messages=messages, - prompt_id=0, - persona_id=0, - retrieval_options=RetrievalDetails( - run_search=OptionalSearchSetting.ALWAYS, - real_time=True, - filters=filters, - enable_auto_detect_filters=False, - ), - chain_of_thought=False, - return_contexts=True, + retrieval_options = RetrievalDetails( + run_search=OptionalSearchSetting.ALWAYS, + real_time=True, + filters=filters, + enable_auto_detect_filters=False, ) - url = _api_url_builder(run_suffix, "/query/answer-with-quote/") - headers = { - "Content-Type": "application/json", - } + chat_session_id = _create_new_chat_session(run_suffix) + + url = _api_url_builder(run_suffix, "/chat/send-message-simple-api/") + + new_message_request = BasicCreateChatMessageRequest( + chat_session_id=chat_session_id, + message=query, + retrieval_options=retrieval_options, + query_override=query, + ) body = new_message_request.dict() body["user"] = None try: - response_json = requests.post(url, headers=headers, json=body).json() - context_data_list = response_json.get("contexts", {}).get("contexts", []) + response_json = requests.post(url, headers=GENERAL_HEADERS, json=body).json() + simple_search_docs = response_json.get("simple_search_docs", []) answer = response_json.get("answer", "") except Exception as e: print("Failed to answer the questions:") print(f"\t {str(e)}") - print("Restarting vespa container and trying agian") - restart_vespa_container(run_suffix) + print("trying again") raise e - return context_data_list, answer + return simple_search_docs, answer def check_if_query_ready(run_suffix: str) -> bool: url = _api_url_builder(run_suffix, "/manage/admin/connector/indexing-status/") - headers = { - "Content-Type": "application/json", - } - indexing_status_dict = requests.get(url, headers=headers).json() + indexing_status_dict = requests.get(url, headers=GENERAL_HEADERS).json() ongoing_index_attempts = False doc_count = 0 @@ -94,17 +106,13 @@ def check_if_query_ready(run_suffix: str) -> bool: def run_cc_once(run_suffix: str, connector_id: int, credential_id: int) -> None: url = _api_url_builder(run_suffix, "/manage/admin/connector/run-once/") - headers = { - "Content-Type": "application/json", - } - body = { "connector_id": connector_id, "credential_ids": [credential_id], "from_beginning": True, } print("body:", body) - response = requests.post(url, headers=headers, json=body) + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("Connector created successfully:", response.json()) else: @@ -116,13 +124,10 @@ def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> No url = _api_url_builder( run_suffix, f"/manage/connector/{connector_id}/credential/{credential_id}" ) - headers = { - "Content-Type": "application/json", - } body = {"name": "zip_folder_contents", "is_public": True} print("body:", body) - response = requests.put(url, headers=headers, json=body) + response = requests.put(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("Connector created successfully:", response.json()) else: @@ -132,14 +137,12 @@ def create_cc_pair(run_suffix: str, connector_id: int, credential_id: int) -> No def _get_existing_connector_names(run_suffix: str) -> list[str]: url = _api_url_builder(run_suffix, "/manage/connector") - headers = { - "Content-Type": "application/json", - } + body = { "credential_json": {}, "admin_public": True, } - response = requests.get(url, headers=headers, json=body) + response = requests.get(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: connectors = response.json() return [connector["name"] for connector in connectors] @@ -149,9 +152,6 @@ def _get_existing_connector_names(run_suffix: str) -> list[str]: def create_connector(run_suffix: str, file_paths: list[str]) -> int: url = _api_url_builder(run_suffix, "/manage/admin/connector") - headers = { - "Content-Type": "application/json", - } connector_name = base_connector_name = "search_eval_connector" existing_connector_names = _get_existing_connector_names(run_suffix) @@ -172,7 +172,7 @@ def create_connector(run_suffix: str, file_paths: list[str]) -> int: body = connector.dict() print("body:", body) - response = requests.post(url, headers=headers, json=body) + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("Connector created successfully:", response.json()) return response.json()["id"] @@ -182,14 +182,11 @@ def create_connector(run_suffix: str, file_paths: list[str]) -> int: def create_credential(run_suffix: str) -> int: url = _api_url_builder(run_suffix, "/manage/credential") - headers = { - "Content-Type": "application/json", - } body = { "credential_json": {}, "admin_public": True, } - response = requests.post(url, headers=headers, json=body) + response = requests.post(url, headers=GENERAL_HEADERS, json=body) if response.status_code == 200: print("credential created successfully:", response.json()) return response.json()["id"] diff --git a/backend/tests/regression/answer_quality/run_qa.py b/backend/tests/regression/answer_quality/run_qa.py index c8ec09466..39d4d0b95 100644 --- a/backend/tests/regression/answer_quality/run_qa.py +++ b/backend/tests/regression/answer_quality/run_qa.py @@ -1,4 +1,5 @@ import json +import multiprocessing import os import time @@ -13,11 +14,12 @@ RESULTS_FILENAME = "results.jsonl" METADATA_FILENAME = "metadata.yaml" -def _update_results_file(output_folder_path: str, qa_output: dict) -> None: +def _populate_results_file(output_folder_path: str, all_qa_output: list[dict]) -> None: output_file_path = os.path.join(output_folder_path, RESULTS_FILENAME) - with open(output_file_path, "w", encoding="utf-8") as file: - file.write(json.dumps(qa_output) + "\n") - file.flush() + with open(output_file_path, "a", encoding="utf-8") as file: + for qa_output in all_qa_output: + file.write(json.dumps(qa_output) + "\n") + file.flush() def _update_metadata_file(test_output_folder: str, count: int) -> None: @@ -81,8 +83,8 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]: del env_vars["ENV_SEED_CONFIGURATION"] if env_vars["GPG_KEY"]: del env_vars["GPG_KEY"] - if metadata["config"]["llm"]["api_key"]: - del metadata["config"]["llm"]["api_key"] + if metadata["test_config"]["llm"]["api_key"]: + del metadata["test_config"]["llm"]["api_key"] metadata.update(env_vars) metadata_path = os.path.join(test_output_folder, METADATA_FILENAME) print("saving metadata to:", metadata_path) @@ -92,7 +94,34 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]: return test_output_folder, questions +def _process_question(question_data: dict, config: dict, question_number: int) -> dict: + print(f"On question number {question_number}") + + query = question_data["question"] + print(f"query: {query}") + context_data_list, answer = get_answer_from_query( + query=query, + run_suffix=config["run_suffix"], + ) + + if not context_data_list: + print("No answer or context found") + else: + print(f"answer: {answer[:50]}...") + print(f"{len(context_data_list)} context docs found") + print("\n") + + output = { + "question_data": question_data, + "answer": answer, + "context_data_list": context_data_list, + } + + return output + + def _process_and_write_query_results(config: dict) -> None: + start_time = time.time() test_output_folder, questions = _initialize_files(config) print("saving test results to folder:", test_output_folder) @@ -101,33 +130,26 @@ def _process_and_write_query_results(config: dict) -> None: if config["limit"] is not None: questions = questions[: config["limit"]] - count = 1 - for question_data in questions: - print(f"On question number {count}") - query = question_data["question"] - print(f"query: {query}") - context_data_list, answer = get_answer_from_query( - query=query, - run_suffix=config["run_suffix"], + with multiprocessing.Pool(processes=multiprocessing.cpu_count() * 2) as pool: + results = pool.starmap( + _process_question, [(q, config, i + 1) for i, q in enumerate(questions)] ) - if not context_data_list: - print("No answer or context found") - else: - print(f"answer: {answer[:50]}...") - print(f"{len(context_data_list)} context docs found") - print("\n") + _populate_results_file(test_output_folder, results) - output = { - "question_data": question_data, - "answer": answer, - "context_data_list": context_data_list, - } + valid_answer_count = 0 + for result in results: + if result.get("answer"): + valid_answer_count += 1 - _update_results_file(test_output_folder, output) - _update_metadata_file(test_output_folder, count) - count += 1 + _update_metadata_file(test_output_folder, valid_answer_count) + + time_to_finish = time.time() - start_time + minutes, seconds = divmod(int(time_to_finish), 60) + print( + f"Took {minutes:02d}:{seconds:02d} to ask and answer {len(results)} questions" + ) def run_qa_test_and_save_results(run_suffix: str = "") -> None: