From a0b46c60c6d80a55c550d63f5608297c6f5de339 Mon Sep 17 00:00:00 2001
From: hagen-danswer <hagen@danswer.ai>
Date: Mon, 22 Jul 2024 20:55:18 -0700
Subject: [PATCH] Switched eval api target back to oneshotqa (#1902)

---
 .../tests/regression/answer_quality/README.md |  3 +
 .../regression/answer_quality/api_utils.py    | 86 +++++++------------
 .../tests/regression/answer_quality/run_qa.py |  1 +
 .../search_test_config.yaml.template          |  3 +
 4 files changed, 38 insertions(+), 55 deletions(-)

diff --git a/backend/tests/regression/answer_quality/README.md b/backend/tests/regression/answer_quality/README.md
index 497b193a3c..3d4236bdff 100644
--- a/backend/tests/regression/answer_quality/README.md
+++ b/backend/tests/regression/answer_quality/README.md
@@ -63,6 +63,9 @@ Edit `search_test_config.yaml` to set:
     - Set this to true if you want to use the UI during/after the testing process
 - only_state
     - Whether to only run Vespa and Postgres
+- only_retrieve_docs
+    - Set true to only retrieve documents, not LLM response
+    - This is to save on API costs
 - use_cloud_gpu
     - Set to true or false depending on if you want to use the remote gpu
     - Only need to set this if use_cloud_gpu is true
diff --git a/backend/tests/regression/answer_quality/api_utils.py b/backend/tests/regression/answer_quality/api_utils.py
index 440c39f014..7709b79526 100644
--- a/backend/tests/regression/answer_quality/api_utils.py
+++ b/backend/tests/regression/answer_quality/api_utils.py
@@ -2,14 +2,15 @@ import requests
 from retry import retry
 
 from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import MessageType
 from danswer.connectors.models import InputType
 from danswer.db.enums import IndexingStatus
+from danswer.one_shot_answer.models import DirectQARequest
+from danswer.one_shot_answer.models import ThreadMessage
 from danswer.search.models import IndexFilters
 from danswer.search.models import OptionalSearchSetting
 from danswer.search.models import RetrievalDetails
 from danswer.server.documents.models import ConnectorBase
-from danswer.server.query_and_chat.models import ChatSessionCreationRequest
-from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest
 from tests.regression.answer_quality.cli_utils import get_api_server_host_port
 
 GENERAL_HEADERS = {"Content-Type": "application/json"}
@@ -19,38 +20,10 @@ def _api_url_builder(run_suffix: str, api_path: str) -> str:
     return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path
 
 
-def _create_new_chat_session(run_suffix: str) -> int:
-    create_chat_request = ChatSessionCreationRequest(
-        persona_id=0,
-        description=None,
-    )
-    body = create_chat_request.dict()
-
-    create_chat_url = _api_url_builder(run_suffix, "/chat/create-chat-session/")
-
-    response_json = requests.post(
-        create_chat_url, headers=GENERAL_HEADERS, json=body
-    ).json()
-    chat_session_id = response_json.get("chat_session_id")
-
-    if isinstance(chat_session_id, int):
-        return chat_session_id
-    else:
-        raise RuntimeError(response_json)
-
-
-def _delete_chat_session(chat_session_id: int, run_suffix: str) -> None:
-    delete_chat_url = _api_url_builder(
-        run_suffix, f"/chat/delete-chat-session/{chat_session_id}"
-    )
-
-    response = requests.delete(delete_chat_url, headers=GENERAL_HEADERS)
-    if response.status_code != 200:
-        raise RuntimeError(response.__dict__)
-
-
 @retry(tries=5, delay=5)
-def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
+def get_answer_from_query(
+    query: str, only_retrieve_docs: bool, run_suffix: str
+) -> tuple[list[str], str]:
     filters = IndexFilters(
         source_type=None,
         document_set=None,
@@ -58,39 +31,42 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
         tags=None,
         access_control_list=None,
     )
-    retrieval_options = RetrievalDetails(
-        run_search=OptionalSearchSetting.ALWAYS,
-        real_time=True,
-        filters=filters,
-        enable_auto_detect_filters=False,
+
+    messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
+
+    new_message_request = DirectQARequest(
+        messages=messages,
+        prompt_id=0,
+        persona_id=0,
+        retrieval_options=RetrievalDetails(
+            run_search=OptionalSearchSetting.ALWAYS,
+            real_time=True,
+            filters=filters,
+            enable_auto_detect_filters=False,
+        ),
+        chain_of_thought=False,
+        return_contexts=True,
+        skip_gen_ai_answer_generation=only_retrieve_docs,
     )
 
-    chat_session_id = _create_new_chat_session(run_suffix)
-
-    url = _api_url_builder(run_suffix, "/chat/send-message-simple-api/")
-
-    new_message_request = BasicCreateChatMessageRequest(
-        chat_session_id=chat_session_id,
-        message=query,
-        retrieval_options=retrieval_options,
-        query_override=query,
-    )
+    url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
+    headers = {
+        "Content-Type": "application/json",
+    }
 
     body = new_message_request.dict()
     body["user"] = None
     try:
-        response_json = requests.post(url, headers=GENERAL_HEADERS, json=body).json()
-        simple_search_docs = response_json.get("simple_search_docs", [])
-        answer = response_json.get("answer", "")
+        response_json = requests.post(url, headers=headers, json=body).json()
+        context_data_list = response_json.get("contexts", {}).get("contexts", [])
+        answer = response_json.get("answer", "") or ""
     except Exception as e:
         print("Failed to answer the questions:")
         print(f"\t {str(e)}")
-        print("trying again")
+        print("Try restarting vespa container and trying agian")
         raise e
 
-    _delete_chat_session(chat_session_id, run_suffix)
-
-    return simple_search_docs, answer
+    return context_data_list, answer
 
 
 @retry(tries=10, delay=10)
diff --git a/backend/tests/regression/answer_quality/run_qa.py b/backend/tests/regression/answer_quality/run_qa.py
index 5c358e9164..96aa2c27e4 100644
--- a/backend/tests/regression/answer_quality/run_qa.py
+++ b/backend/tests/regression/answer_quality/run_qa.py
@@ -117,6 +117,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) -
     print(f"query: {query}")
     context_data_list, answer = get_answer_from_query(
         query=query,
+        only_retrieve_docs=config["only_retrieve_docs"],
         run_suffix=config["run_suffix"],
     )
 
diff --git a/backend/tests/regression/answer_quality/search_test_config.yaml.template b/backend/tests/regression/answer_quality/search_test_config.yaml.template
index c12cea90c8..a451c22f79 100644
--- a/backend/tests/regression/answer_quality/search_test_config.yaml.template
+++ b/backend/tests/regression/answer_quality/search_test_config.yaml.template
@@ -22,6 +22,9 @@ launch_web_ui: false
 # Whether to only run Vespa and Postgres
 only_state: false
 
+# Only retrieve documents, not LLM response
+only_retrieve_docs: false
+
 # Whether to use a cloud GPU for processing
 use_cloud_gpu: false