Switched eval api target back to oneshotqa (#1902)

2025-09-28 21:05:17 +02:00 · 2024-07-22 20:55:18 -07:00
parent 4029233df0
commit a0b46c60c6
4 changed files with 38 additions and 55 deletions
--- a/backend/tests/regression/answer_quality/README.md
+++ b/backend/tests/regression/answer_quality/README.md
@@ -63,6 +63,9 @@ Edit `search_test_config.yaml` to set:
    - Set this to true if you want to use the UI during/after the testing process
 - only_state
    - Whether to only run Vespa and Postgres
+- only_retrieve_docs
+    - Set true to only retrieve documents, not LLM response
+    - This is to save on API costs
 - use_cloud_gpu
    - Set to true or false depending on if you want to use the remote gpu
    - Only need to set this if use_cloud_gpu is true
--- a/backend/tests/regression/answer_quality/api_utils.py
+++ b/backend/tests/regression/answer_quality/api_utils.py
@@ -2,14 +2,15 @@ import requests
 from retry import retry

 from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import MessageType
 from danswer.connectors.models import InputType
 from danswer.db.enums import IndexingStatus
+from danswer.one_shot_answer.models import DirectQARequest
+from danswer.one_shot_answer.models import ThreadMessage
 from danswer.search.models import IndexFilters
 from danswer.search.models import OptionalSearchSetting
 from danswer.search.models import RetrievalDetails
 from danswer.server.documents.models import ConnectorBase
-from danswer.server.query_and_chat.models import ChatSessionCreationRequest
-from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest
 from tests.regression.answer_quality.cli_utils import get_api_server_host_port

 GENERAL_HEADERS = {"Content-Type": "application/json"}
@@ -19,38 +20,10 @@ def _api_url_builder(run_suffix: str, api_path: str) -> str:
    return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path


-def _create_new_chat_session(run_suffix: str) -> int:
-    create_chat_request = ChatSessionCreationRequest(
-        persona_id=0,
-        description=None,
-    )
-    body = create_chat_request.dict()
-
-    create_chat_url = _api_url_builder(run_suffix, "/chat/create-chat-session/")
-
-    response_json = requests.post(
-        create_chat_url, headers=GENERAL_HEADERS, json=body
-    ).json()
-    chat_session_id = response_json.get("chat_session_id")
-
-    if isinstance(chat_session_id, int):
-        return chat_session_id
-    else:
-        raise RuntimeError(response_json)
-
-
-def _delete_chat_session(chat_session_id: int, run_suffix: str) -> None:
-    delete_chat_url = _api_url_builder(
-        run_suffix, f"/chat/delete-chat-session/{chat_session_id}"
-    )
-
-    response = requests.delete(delete_chat_url, headers=GENERAL_HEADERS)
-    if response.status_code != 200:
-        raise RuntimeError(response.__dict__)
-
-
@retry(tries=5, delay=5)
-def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
+def get_answer_from_query(
+    query: str, only_retrieve_docs: bool, run_suffix: str
+) -> tuple[list[str], str]:
    filters = IndexFilters(
        source_type=None,
        document_set=None,
@@ -58,39 +31,42 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
        tags=None,
        access_control_list=None,
    )
+
+    messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
+
+    new_message_request = DirectQARequest(
+        messages=messages,
+        prompt_id=0,
+        persona_id=0,
        retrieval_options=RetrievalDetails(
            run_search=OptionalSearchSetting.ALWAYS,
            real_time=True,
            filters=filters,
            enable_auto_detect_filters=False,
+        ),
+        chain_of_thought=False,
+        return_contexts=True,
+        skip_gen_ai_answer_generation=only_retrieve_docs,
    )

-    chat_session_id = _create_new_chat_session(run_suffix)
-
-    url = _api_url_builder(run_suffix, "/chat/send-message-simple-api/")
-
-    new_message_request = BasicCreateChatMessageRequest(
-        chat_session_id=chat_session_id,
-        message=query,
-        retrieval_options=retrieval_options,
-        query_override=query,
-    )
+    url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
+    headers = {
+        "Content-Type": "application/json",
+    }

    body = new_message_request.dict()
    body["user"] = None
    try:
-        response_json = requests.post(url, headers=GENERAL_HEADERS, json=body).json()
-        simple_search_docs = response_json.get("simple_search_docs", [])
-        answer = response_json.get("answer", "")
+        response_json = requests.post(url, headers=headers, json=body).json()
+        context_data_list = response_json.get("contexts", {}).get("contexts", [])
+        answer = response_json.get("answer", "") or ""
    except Exception as e:
        print("Failed to answer the questions:")
        print(f"\t {str(e)}")
-        print("trying again")
+        print("Try restarting vespa container and trying agian")
        raise e

-    _delete_chat_session(chat_session_id, run_suffix)
-
-    return simple_search_docs, answer
+    return context_data_list, answer


@retry(tries=10, delay=10)
--- a/backend/tests/regression/answer_quality/run_qa.py
+++ b/backend/tests/regression/answer_quality/run_qa.py
@@ -117,6 +117,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) -
    print(f"query: {query}")
    context_data_list, answer = get_answer_from_query(
        query=query,
+        only_retrieve_docs=config["only_retrieve_docs"],
        run_suffix=config["run_suffix"],
    )

--- a/backend/tests/regression/answer_quality/search_test_config.yaml.template
+++ b/backend/tests/regression/answer_quality/search_test_config.yaml.template
@@ -22,6 +22,9 @@ launch_web_ui: false
 # Whether to only run Vespa and Postgres
 only_state: false

+# Only retrieve documents, not LLM response
+only_retrieve_docs: false
+
 # Whether to use a cloud GPU for processing
 use_cloud_gpu: false