Switched eval api target back to oneshotqa (#1902)

2025-07-17 16:43:37 +02:00 · 2024-07-22 20:55:18 -07:00
parent 4029233df0
commit a0b46c60c6
4 changed files with 38 additions and 55 deletions
--- a/backend/tests/regression/answer_quality/README.md
+++ b/backend/tests/regression/answer_quality/README.md
@ -63,6 +63,9 @@ Edit `search_test_config.yaml` to set:
    - Set this to true if you want to use the UI during/after the testing process
 - only_state
    - Whether to only run Vespa and Postgres
 - only_retrieve_docs
    - Set true to only retrieve documents, not LLM response
    - This is to save on API costs
 - use_cloud_gpu
    - Set to true or false depending on if you want to use the remote gpu
    - Only need to set this if use_cloud_gpu is true
--- a/backend/tests/regression/answer_quality/api_utils.py
+++ b/backend/tests/regression/answer_quality/api_utils.py
@ -2,14 +2,15 @@ import requests
 from retry import retry
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import MessageType
 from danswer.connectors.models import InputType
 from danswer.db.enums import IndexingStatus
 from danswer.one_shot_answer.models import DirectQARequest
 from danswer.one_shot_answer.models import ThreadMessage
 from danswer.search.models import IndexFilters
 from danswer.search.models import OptionalSearchSetting
 from danswer.search.models import RetrievalDetails
 from danswer.server.documents.models import ConnectorBase
 from danswer.server.query_and_chat.models import ChatSessionCreationRequest
 from ee.danswer.server.query_and_chat.models import BasicCreateChatMessageRequest
 from tests.regression.answer_quality.cli_utils import get_api_server_host_port
 GENERAL_HEADERS = {"Content-Type": "application/json"}
@ -19,38 +20,10 @@ def _api_url_builder(run_suffix: str, api_path: str) -> str:
    return f"http://localhost:{get_api_server_host_port(run_suffix)}" + api_path
 def _create_new_chat_session(run_suffix: str) -> int:
    create_chat_request = ChatSessionCreationRequest(
        persona_id=0,
        description=None,
    )
    body = create_chat_request.dict()
    create_chat_url = _api_url_builder(run_suffix, "/chat/create-chat-session/")
    response_json = requests.post(
        create_chat_url, headers=GENERAL_HEADERS, json=body
    ).json()
    chat_session_id = response_json.get("chat_session_id")
    if isinstance(chat_session_id, int):
        return chat_session_id
    else:
        raise RuntimeError(response_json)
 def _delete_chat_session(chat_session_id: int, run_suffix: str) -> None:
    delete_chat_url = _api_url_builder(
        run_suffix, f"/chat/delete-chat-session/{chat_session_id}"
    )
    response = requests.delete(delete_chat_url, headers=GENERAL_HEADERS)
    if response.status_code != 200:
        raise RuntimeError(response.__dict__)
@retry(tries=5, delay=5)
-def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
+def get_answer_from_query(
    query: str, only_retrieve_docs: bool, run_suffix: str
 ) -> tuple[list[str], str]:
    filters = IndexFilters(
        source_type=None,
        document_set=None,
@ -58,39 +31,42 @@ def get_answer_from_query(query: str, run_suffix: str) -> tuple[list[str], str]:
        tags=None,
        access_control_list=None,
    )
-    retrieval_options = RetrievalDetails(
+
    messages = [ThreadMessage(message=query, sender=None, role=MessageType.USER)]
    new_message_request = DirectQARequest(
        messages=messages,
        prompt_id=0,
        persona_id=0,
        retrieval_options=RetrievalDetails(
            run_search=OptionalSearchSetting.ALWAYS,
            real_time=True,
            filters=filters,
            enable_auto_detect_filters=False,
        ),
        chain_of_thought=False,
        return_contexts=True,
        skip_gen_ai_answer_generation=only_retrieve_docs,
    )
-    chat_session_id = _create_new_chat_session(run_suffix)
+    url = _api_url_builder(run_suffix, "/query/answer-with-quote/")
-
+    headers = {
-    url = _api_url_builder(run_suffix, "/chat/send-message-simple-api/")
+        "Content-Type": "application/json",
-
+    }
    new_message_request = BasicCreateChatMessageRequest(
        chat_session_id=chat_session_id,
        message=query,
        retrieval_options=retrieval_options,
        query_override=query,
    )
    body = new_message_request.dict()
    body["user"] = None
    try:
-        response_json = requests.post(url, headers=GENERAL_HEADERS, json=body).json()
+        response_json = requests.post(url, headers=headers, json=body).json()
-        simple_search_docs = response_json.get("simple_search_docs", [])
+        context_data_list = response_json.get("contexts", {}).get("contexts", [])
-        answer = response_json.get("answer", "")
+        answer = response_json.get("answer", "") or ""
    except Exception as e:
        print("Failed to answer the questions:")
        print(f"\t {str(e)}")
-        print("trying again")
+        print("Try restarting vespa container and trying agian")
        raise e
-    _delete_chat_session(chat_session_id, run_suffix)
+    return context_data_list, answer
    return simple_search_docs, answer
@retry(tries=10, delay=10)
--- a/backend/tests/regression/answer_quality/run_qa.py
+++ b/backend/tests/regression/answer_quality/run_qa.py
@ -117,6 +117,7 @@ def _process_question(question_data: dict, config: dict, question_number: int) -
    print(f"query: {query}")
    context_data_list, answer = get_answer_from_query(
        query=query,
        only_retrieve_docs=config["only_retrieve_docs"],
        run_suffix=config["run_suffix"],
    )
--- a/backend/tests/regression/answer_quality/search_test_config.yaml.template
+++ b/backend/tests/regression/answer_quality/search_test_config.yaml.template
@ -22,6 +22,9 @@ launch_web_ui: false
 # Whether to only run Vespa and Postgres
 only_state: false
 # Only retrieve documents, not LLM response
 only_retrieve_docs: false
 # Whether to use a cloud GPU for processing
 use_cloud_gpu: false