diff --git a/backend/scripts/benchmark_search.py b/backend/scripts/benchmark_search.py index 88d94a830..e3d80505e 100644 --- a/backend/scripts/benchmark_search.py +++ b/backend/scripts/benchmark_search.py @@ -126,7 +126,7 @@ def generate_random_sentence(): return sentence -def _measure_hybrid_search_latency(filters: dict = {}): +def _measure_hybrid_search_latency(filters: dict | None = None): search_type = os.environ.get("VESPA_RANKING_PROFILE", "hybrid_search") start = time.monotonic() @@ -135,7 +135,7 @@ def _measure_hybrid_search_latency(filters: dict = {}): json={ "query": generate_random_sentence(), "collection": DOCUMENT_INDEX_NAME, - "filters": filters, + "filters": filters or {}, "enable_auto_detect_filters": False, "search_type": SearchType.HYBRID.value if search_type == "hybrid_search" diff --git a/backend/scripts/benchmark_search_isolated.py b/backend/scripts/benchmark_search_isolated.py index 21d9d1366..a97ba799e 100644 --- a/backend/scripts/benchmark_search_isolated.py +++ b/backend/scripts/benchmark_search_isolated.py @@ -1,12 +1,14 @@ import os import random import time +from collections.abc import Mapping import nltk +import requests from danswer.configs.app_configs import DOC_TIME_DECAY from danswer.configs.app_configs import DOCUMENT_INDEX_NAME -from danswer.document_index.vespa.index import _query_vespa +from danswer.document_index.vespa.index import SEARCH_ENDPOINT from danswer.search.search_runner import embed_query # Download the wordlist @@ -127,6 +129,25 @@ def generate_random_sentence(): return sentence +def _query_vespa(query_params: Mapping[str, str | int]) -> list: + response = requests.get( + SEARCH_ENDPOINT, + params=dict( + **query_params, + **{ + "presentation.timing": True, + }, + ), + ) + response.raise_for_status() + + response_json = response.json() + print("timing info", response_json.get("timing")) + hits = response_json["root"].get("children", []) + + return hits + + def _measure_vespa_latency(filters: dict = {}): # yql = ( # VespaIndex.yql_base @@ -135,8 +156,8 @@ def _measure_vespa_latency(filters: dict = {}): # ) yql = ( f"select " - f"documentid, " - f"content " + f"documentid " + # f"content " f"from {DOCUMENT_INDEX_NAME} where " + '({grammar: "weakAnd"}userInput(@query))' ) query = generate_random_sentence()