More latency logging + add limit/offset support

2025-09-28 21:05:17 +02:00 · 2024-01-21 12:04:35 -08:00
parent 777521a437
commit 2c38033ef5
5 changed files with 25 additions and 5 deletions
--- a/backend/danswer/document_index/interfaces.py
+++ b/backend/danswer/document_index/interfaces.py
@@ -96,6 +96,7 @@ class KeywordCapable(abc.ABC):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int,
+        offset: int = 0,
    ) -> list[InferenceChunk]:
        raise NotImplementedError

@@ -108,6 +109,7 @@ class VectorCapable(abc.ABC):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int,
+        offset: int = 0,
    ) -> list[InferenceChunk]:
        raise NotImplementedError

@@ -120,6 +122,7 @@ class HybridCapable(abc.ABC):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int,
+        offset: int = 0,
        hybrid_alpha: float | None = None,
    ) -> list[InferenceChunk]:
        raise NotImplementedError
@@ -132,6 +135,7 @@ class AdminCapable(abc.ABC):
        query: str,
        filters: IndexFilters,
        num_to_retrieve: int,
+        offset: int = 0,
    ) -> list[InferenceChunk]:
        raise NotImplementedError

--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@@ -748,6 +748,7 @@ class VespaIndex(DocumentIndex):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int = NUM_RETURNED_HITS,
+        offset: int = 0,
        edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
    ) -> list[InferenceChunk]:
        # IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY
@@ -769,7 +770,7 @@ class VespaIndex(DocumentIndex):
            "query": final_query,
            "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
            "hits": num_to_retrieve,
-            "offset": 0,
+            "offset": offset,
            "ranking.profile": "keyword_search",
            "timeout": _VESPA_TIMEOUT,
        }
@@ -782,6 +783,7 @@ class VespaIndex(DocumentIndex):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int = NUM_RETURNED_HITS,
+        offset: int = 0,
        distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
        edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
    ) -> list[InferenceChunk]:
@@ -811,7 +813,7 @@ class VespaIndex(DocumentIndex):
            "input.query(query_embedding)": str(query_embedding),
            "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
            "hits": num_to_retrieve,
-            "offset": 0,
+            "offset": offset,
            "ranking.profile": "semantic_search",
            "timeout": _VESPA_TIMEOUT,
        }
@@ -824,6 +826,7 @@ class VespaIndex(DocumentIndex):
        filters: IndexFilters,
        time_decay_multiplier: float,
        num_to_retrieve: int,
+        offset: int = 0,
        hybrid_alpha: float | None = HYBRID_ALPHA,
        title_content_ratio: float | None = TITLE_CONTENT_RATIO,
        distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
@@ -861,7 +864,7 @@ class VespaIndex(DocumentIndex):
            if title_content_ratio is not None
            else TITLE_CONTENT_RATIO,
            "hits": num_to_retrieve,
-            "offset": 0,
+            "offset": offset,
            "ranking.profile": "hybrid_search",
            "timeout": _VESPA_TIMEOUT,
        }
@@ -873,6 +876,7 @@ class VespaIndex(DocumentIndex):
        query: str,
        filters: IndexFilters,
        num_to_retrieve: int = NUM_RETURNED_HITS,
+        offset: int = 0,
    ) -> list[InferenceChunk]:
        vespa_where_clauses = _build_vespa_filters(filters, include_hidden=True)
        yql = (
--- a/backend/danswer/search/models.py
+++ b/backend/danswer/search/models.py
@@ -76,6 +76,7 @@ class SearchQuery(BaseModel):
    filters: IndexFilters
    recency_bias_multiplier: float
    num_hits: int = NUM_RETURNED_HITS
+    offset: int = 0
    search_type: SearchType = SearchType.HYBRID
    skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW
    # Only used if not skip_rerank
@@ -100,8 +101,9 @@ class RetrievalDetails(BaseModel):
    # the query, if None, then use Persona settings
    filters: BaseFilters | None = None
    enable_auto_detect_filters: bool | None = None
-    # TODO Pagination/Offset options
-    # offset: int | None = None
+    # if None, no offset / limit
+    offset: int | None = None
+    limit: int | None = None


 class SearchDoc(BaseModel):
--- a/backend/danswer/search/request_preprocessing.py
+++ b/backend/danswer/search/request_preprocessing.py
@@ -3,6 +3,7 @@ from sqlalchemy.orm import Session
 from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER
 from danswer.configs.chat_configs import DISABLE_LLM_FILTER_EXTRACTION
 from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER
+from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW
 from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
 from danswer.db.models import Persona
@@ -21,11 +22,13 @@ from danswer.secondary_llm_flows.time_filter import extract_time_filter
 from danswer.utils.logger import setup_logger
 from danswer.utils.threadpool_concurrency import FunctionCall
 from danswer.utils.threadpool_concurrency import run_functions_in_parallel
+from danswer.utils.timing import log_function_time


 logger = setup_logger()


+@log_function_time(print_only=True)
 def retrieval_preprocessing(
    query: str,
    retrieval_details: RetrievalDetails,
@@ -163,6 +166,10 @@ def retrieval_preprocessing(
            search_type=persona.search_type,
            filters=final_filters,
            recency_bias_multiplier=recency_bias_multiplier,
+            num_hits=retrieval_details.limit
+            if retrieval_details.limit is not None
+            else NUM_RETURNED_HITS,
+            offset=retrieval_details.offset or 0,
            skip_rerank=skip_reranking,
            skip_llm_chunk_filter=not llm_chunk_filter,
        ),
--- a/backend/danswer/search/search_runner.py
+++ b/backend/danswer/search/search_runner.py
@@ -76,6 +76,7 @@ def query_processing(
    return query


+@log_function_time(print_only=True)
 def embed_query(
    query: str,
    prefix: str = ASYM_QUERY_PREFIX,
@@ -163,6 +164,7 @@ def doc_index_retrieval(
            filters=query.filters,
            time_decay_multiplier=query.recency_bias_multiplier,
            num_to_retrieve=query.num_hits,
+            offset=query.offset,
            hybrid_alpha=hybrid_alpha,
        )

@@ -434,6 +436,7 @@ def rerank_chunks(
    return ranked_chunks


+@log_function_time(print_only=True)
 def filter_chunks(
    query: SearchQuery,
    chunks_to_filter: list[InferenceChunk],