danswer/backend/scripts/query_time_check/test_query_times.py

"""
RUN THIS AFTER SEED_DUMMY_DOCS.PY
"""
import random
import time

from onyx.configs.constants import DocumentSource
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
from onyx.context.search.models import IndexFilters
from onyx.db.engine import get_session_context_manager
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.document_index_utils import get_multipass_config
from onyx.document_index.vespa.index import VespaIndex
from scripts.query_time_check.seed_dummy_docs import TOTAL_ACL_ENTRIES_PER_CATEGORY
from scripts.query_time_check.seed_dummy_docs import TOTAL_DOC_SETS
from shared_configs.model_server_models import Embedding

# make sure these are smaller than TOTAL_ACL_ENTRIES_PER_CATEGORY and TOTAL_DOC_SETS, respectively
NUMBER_OF_ACL_ENTRIES_PER_QUERY = 6
NUMBER_OF_DOC_SETS_PER_QUERY = 2


def get_slowest_99th_percentile(results: list[float]) -> float:
    return sorted(results)[int(0.99 * len(results))]


# Generate random filters
def _random_filters() -> IndexFilters:
    """
    Generate random filters for the query containing:
    - NUMBER_OF_ACL_ENTRIES_PER_QUERY user emails
    - NUMBER_OF_ACL_ENTRIES_PER_QUERY groups
    - NUMBER_OF_ACL_ENTRIES_PER_QUERY external groups
    - NUMBER_OF_DOC_SETS_PER_QUERY document sets
    """
    access_control_list = [
        f"user_email:user_{random.randint(0, TOTAL_ACL_ENTRIES_PER_CATEGORY - 1)}@example.com",
    ]
    acl_indices = random.sample(
        range(TOTAL_ACL_ENTRIES_PER_CATEGORY), NUMBER_OF_ACL_ENTRIES_PER_QUERY
    )
    for i in acl_indices:
        access_control_list.append(f"group:group_{acl_indices[i]}")
        access_control_list.append(f"external_group:external_group_{acl_indices[i]}")

    doc_sets = []
    doc_set_indices = random.sample(
        range(TOTAL_DOC_SETS), NUMBER_OF_ACL_ENTRIES_PER_QUERY
    )
    for i in doc_set_indices:
        doc_sets.append(f"document_set:Document Set {doc_set_indices[i]}")

    return IndexFilters(
        source_type=[DocumentSource.GOOGLE_DRIVE],
        document_set=doc_sets,
        tags=[],
        access_control_list=access_control_list,
    )


def test_hybrid_retrieval_times(
    number_of_queries: int,
) -> None:
    with get_session_context_manager() as db_session:
        search_settings = get_current_search_settings(db_session)
        multipass_config = get_multipass_config(search_settings)
        index_name = search_settings.index_name

    vespa_index = VespaIndex(
        index_name=index_name,
        secondary_index_name=None,
        large_chunks_enabled=multipass_config.enable_large_chunks,
        secondary_large_chunks_enabled=None,
    )

    # Generate random queries
    queries = [f"Random Query {i}" for i in range(number_of_queries)]

    # Generate random embeddings
    embeddings = [
        Embedding([random.random() for _ in range(DOC_EMBEDDING_DIM)])
        for _ in range(number_of_queries)
    ]

    total_time = 0.0
    results = []
    for i in range(number_of_queries):
        start_time = time.time()

        vespa_index.hybrid_retrieval(
            query=queries[i],
            query_embedding=embeddings[i],
            final_keywords=None,
            filters=_random_filters(),
            hybrid_alpha=0.5,
            time_decay_multiplier=1.0,
            num_to_retrieve=50,
            offset=0,
            title_content_ratio=0.5,
        )

        end_time = time.time()
        query_time = end_time - start_time
        total_time += query_time
        results.append(query_time)

        print(f"Query {i+1}: {query_time:.4f} seconds")

    avg_time = total_time / number_of_queries
    fast_time = min(results)
    slow_time = max(results)
    ninety_ninth_percentile = get_slowest_99th_percentile(results)
    # Write results to a file
    _OUTPUT_PATH = "query_times_results_large_more.txt"
    with open(_OUTPUT_PATH, "w") as f:
        f.write(f"Average query time: {avg_time:.4f} seconds\n")
        f.write(f"Fastest query: {fast_time:.4f} seconds\n")
        f.write(f"Slowest query: {slow_time:.4f} seconds\n")
        f.write(f"99th percentile: {ninety_ninth_percentile:.4f} seconds\n")
    print(f"Results written to {_OUTPUT_PATH}")

    print(f"\nAverage query time: {avg_time:.4f} seconds")
    print(f"Fastest query: {fast_time:.4f} seconds")
    print(f"Slowest query: {max(results):.4f} seconds")
    print(f"99th percentile: {get_slowest_99th_percentile(results):.4f} seconds")


if __name__ == "__main__":
    test_hybrid_retrieval_times(number_of_queries=1000)