From a0b46c60c6d80a55c550d63f5608297c6f5de339 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Mon, 22 Jul 2024 20:55:18 -0700 Subject: [PATCH] Switched eval api target back to oneshotqa (#1902) --- .../tests/regression/answer_quality/README.md | 3 + .../regression/answer_quality/api_utils.py | 86 +++++++------------ .../tests/regression/answer_quality/run_qa.py | 1 + .../search_test_config.yaml.template | 3 + 4 files changed, 38 insertions(+), 55 deletions(-) diff --git a/backend/tests/regression/answer_quality/README.md b/backend/tests/regression/answer_quality/README.md index 497b193a3c..3d4236bdff 100644 --- a/backend/tests/regression/answer_quality/README.md +++ b/backend/tests/regression/answer_quality/README.md @@ -63,6 +63,9 @@ Edit `search_test_config.yaml` to set: - Set this to true if you want to use the UI during/after the testing process - only_state - Whether to only run Vespa and Postgres +- only_retrieve_docs + - Set true to only retrieve documents, not LLM response + - This is to save on API costs - use_cloud_gpu - Set to true or false depending on if you want to use the remote gpu - Only need to set this if use_cloud_gpu is true diff --git a/backend/tests/regression/answer_quality/api_utils.py b/backend/tests/regression/answer_quality/api_utils.py index 440c39f014..7709b79526 100644 --- a/backend/tests/regression/answer_quality/api_utils.py +++ b/backend/tests/regression/answer_quality/api_utils.py @@ -2,14 +2,15 @@ import requests from retry import retry from danswer.configs.constants import DocumentSource +from danswer.configs.constants import MessageType from danswer.connectors.models import InputType from danswer.db.enums import IndexingStatus +from danswer.one_shot_answer.models import DirectQARequest +from danswer.one_shot_answer.models import ThreadMessage from danswer.search.models import IndexFilters from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.server.documents.models import ConnectorBase -from danswer.server.query_and_chat.models import ChatSessionCreationRequest -from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest from tests.regression.answer_quality.cli_utils import get_api_server_host_port GENERAL_HEADERS = {"Content-Type": "application/json"} @@ -19,38 +20,10 @@ def _api_url_builder(run_suffix: str, api_path: str) -> str: return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path -def _create_new_chat_session(run_suffix: str) -> int: - create_chat_request = ChatSessionCreationRequest( - persona_id=0, - description=None, - ) - body = create_chat_request.dict() - - create_chat_url = _api_url_builder(run_suffix, "/chat/create-chat-session/") - - response_json = requests.post( - create_chat_url, headers=GENERAL_HEADERS, json=body - ).json() - chat_session_id = response_json.get("chat_session_id") - - if isinstance(chat_session_id, int): - return chat_session_id - else: - raise RuntimeError(response_json) - - -def _delete_chat_session(chat_session_id: int, run_suffix: str) -> None: - delete_chat_url = _api_url_builder( - run_suffix, f"/chat/delete-chat-session/{chat_session_id}" - ) - - response = requests.delete(delete_chat_url, headers=GENERAL_HEADERS) - if response.status_code != 200: - raise RuntimeError(response.__dict__) - - @retry(tries=5, delay=5) -def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: +def get_answer_from_query( + query: str, only_retrieve_docs: bool, run_suffix: str +) -> tuple[list[str], str]: filters = IndexFilters( source_type=None, document_set=None, @@ -58,39 +31,42 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]: tags=None, access_control_list=None, ) - retrieval_options = RetrievalDetails( - run_search=OptionalSearchSetting.ALWAYS, - real_time=True, - filters=filters, - enable_auto_detect_filters=False, + + messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)] + + new_message_request = DirectQARequest( + messages=messages, + prompt_id=0, + persona_id=0, + retrieval_options=RetrievalDetails( + run_search=OptionalSearchSetting.ALWAYS, + real_time=True, + filters=filters, + enable_auto_detect_filters=False, + ), + chain_of_thought=False, + return_contexts=True, + skip_gen_ai_answer_generation=only_retrieve_docs, ) - chat_session_id = _create_new_chat_session(run_suffix) - - url = _api_url_builder(run_suffix, "/chat/send-message-simple-api/") - - new_message_request = BasicCreateChatMessageRequest( - chat_session_id=chat_session_id, - message=query, - retrieval_options=retrieval_options, - query_override=query, - ) + url = _api_url_builder(run_suffix, "/query/answer-with-quote/") + headers = { + "Content-Type": "application/json", + } body = new_message_request.dict() body["user"] = None try: - response_json = requests.post(url, headers=GENERAL_HEADERS, json=body).json() - simple_search_docs = response_json.get("simple_search_docs", []) - answer = response_json.get("answer", "") + response_json = requests.post(url, headers=headers, json=body).json() + context_data_list = response_json.get("contexts", {}).get("contexts", []) + answer = response_json.get("answer", "") or "" except Exception as e: print("Failed to answer the questions:") print(f"\t {str(e)}") - print("trying again") + print("Try restarting vespa container and trying agian") raise e - _delete_chat_session(chat_session_id, run_suffix) - - return simple_search_docs, answer + return context_data_list, answer @retry(tries=10, delay=10) diff --git a/backend/tests/regression/answer_quality/run_qa.py b/backend/tests/regression/answer_quality/run_qa.py index 5c358e9164..96aa2c27e4 100644 --- a/backend/tests/regression/answer_quality/run_qa.py +++ b/backend/tests/regression/answer_quality/run_qa.py @@ -117,6 +117,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) - print(f"query: {query}") context_data_list, answer = get_answer_from_query( query=query, + only_retrieve_docs=config["only_retrieve_docs"], run_suffix=config["run_suffix"], ) diff --git a/backend/tests/regression/answer_quality/search_test_config.yaml.template b/backend/tests/regression/answer_quality/search_test_config.yaml.template index c12cea90c8..a451c22f79 100644 --- a/backend/tests/regression/answer_quality/search_test_config.yaml.template +++ b/backend/tests/regression/answer_quality/search_test_config.yaml.template @@ -22,6 +22,9 @@ launch_web_ui: false # Whether to only run Vespa and Postgres only_state: false +# Only retrieve documents, not LLM response +only_retrieve_docs: false + # Whether to use a cloud GPU for processing use_cloud_gpu: false