final examples and logging

2025-07-09 14:11:33 +02:00 · 2025-03-16 13:06:19 -07:00
parent ab11bf6552
commit 625936306f
2 changed files with 35 additions and 19 deletions
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@ -358,24 +358,37 @@ def stream_chat_message_objects(
    llm: LLM

    test_questions = [
-        "weather in Munich",
-        "weather in New York",
-        # "what is the overlap between finance and economics",
-        # "effects taking vitamin c pills vs eating veggies health outcomes",
-        # "professions people good math",
-        # "biomedical engineers design cutting-edge medical equipment important skill set",
-        # "How do biomedical engineers design cutting-edge medical equipment? And what is the most important skill set?",
-        # "average power output US nuclear power plant",
-        # "typical power range small modular reactors",
-        # "SMRs power industry",
-        # "best use case Onyx AI company",
-        # "techniques calculate square root",
-        # "daily vitamin C requirement adult women",
-        # "boil ocean",
-        # "best soccer player ever"
+        "big bang vs steady state theory",
+        "astronomy",
+        "trace energy momentum tensor conformal field theory",
+        "evidence Big Bang",
+        "Neil Armstrong play tennis moon",
+        "current temperature Hawaii New York Munich",
+        "win quadradoodle",
+        "best practices coding Java",
+        "classes related software engineering",
+        "current temperature Munich",
+        "what is the most important concept in biology",
+        "subfields of finance",
+        "what is the overlap between finance and economics",
+        "effects taking vitamin c pills vs eating veggies health outcomes",
+        "professions people good math",
+        "biomedical engineers design cutting-edge medical equipment important skill set",
+        "How do biomedical engineers design cutting-edge medical equipment? And what is the most important skill set?",
+        "average power output US nuclear power plant",
+        "typical power range small modular reactors",
+        "SMRs power industry",
+        "best use case Onyx AI company",
+        "techniques calculate square root",
+        "daily vitamin C requirement adult women",
+        "boil ocean",
+        "best soccer player ever",
    ]

-    for test_question in test_questions:
+    for test_question_num, test_question in enumerate(test_questions):
+        logger.info(
+            f"------- Running test question {test_question_num + 1} of {len(test_questions)}"
+        )
        try:
            user_id = user.id if user is not None else None

--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@ -333,7 +333,8 @@ def query_vespa(

    search_time = 0.0

-    for query_alpha in [0.4, 0.7, 1.0]:
+    alphas: list[float] = [0.4, 0.7, 1.0]
+    for query_alpha in alphas:
        date_time_start = datetime.now()

        # Create a mutable copy of the query_params
@ -408,14 +409,15 @@ def query_vespa(
        date_time_end = datetime.now()
        search_time += (date_time_end - date_time_start).microseconds / 1000000

+    avg_search_time = search_time / len(alphas)
    ranking_stats.append(
        (
            "Timing",
            query_alpha,
            cast(str, query_params["query"]).strip(),
            "",
-            "",
-            search_time,
+            "Avg:",
+            avg_search_time,
        )
    )

@ -426,6 +428,7 @@ def query_vespa(

    inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]
    # Good Debugging Spot
+    logger.info(f"Search done for all alphs - avg timing: {avg_search_time}")
    return inference_chunks