More latency logging + add limit/offset support

This commit is contained in:
Weves 2024-01-21 12:04:35 -08:00 committed by Chris Weaver
parent 777521a437
commit 2c38033ef5
5 changed files with 25 additions and 5 deletions

View File

@ -96,6 +96,7 @@ class KeywordCapable(abc.ABC):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
raise NotImplementedError
@ -108,6 +109,7 @@ class VectorCapable(abc.ABC):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
raise NotImplementedError
@ -120,6 +122,7 @@ class HybridCapable(abc.ABC):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int,
offset: int = 0,
hybrid_alpha: float | None = None,
) -> list[InferenceChunk]:
raise NotImplementedError
@ -132,6 +135,7 @@ class AdminCapable(abc.ABC):
query: str,
filters: IndexFilters,
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
raise NotImplementedError

View File

@ -748,6 +748,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
) -> list[InferenceChunk]:
# IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY
@ -769,7 +770,7 @@ class VespaIndex(DocumentIndex):
"query": final_query,
"input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
"hits": num_to_retrieve,
"offset": 0,
"offset": offset,
"ranking.profile": "keyword_search",
"timeout": _VESPA_TIMEOUT,
}
@ -782,6 +783,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
) -> list[InferenceChunk]:
@ -811,7 +813,7 @@ class VespaIndex(DocumentIndex):
"input.query(query_embedding)": str(query_embedding),
"input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
"hits": num_to_retrieve,
"offset": 0,
"offset": offset,
"ranking.profile": "semantic_search",
"timeout": _VESPA_TIMEOUT,
}
@ -824,6 +826,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters,
time_decay_multiplier: float,
num_to_retrieve: int,
offset: int = 0,
hybrid_alpha: float | None = HYBRID_ALPHA,
title_content_ratio: float | None = TITLE_CONTENT_RATIO,
distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
@ -861,7 +864,7 @@ class VespaIndex(DocumentIndex):
if title_content_ratio is not None
else TITLE_CONTENT_RATIO,
"hits": num_to_retrieve,
"offset": 0,
"offset": offset,
"ranking.profile": "hybrid_search",
"timeout": _VESPA_TIMEOUT,
}
@ -873,6 +876,7 @@ class VespaIndex(DocumentIndex):
query: str,
filters: IndexFilters,
num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
) -> list[InferenceChunk]:
vespa_where_clauses = _build_vespa_filters(filters, include_hidden=True)
yql = (

View File

@ -76,6 +76,7 @@ class SearchQuery(BaseModel):
filters: IndexFilters
recency_bias_multiplier: float
num_hits: int = NUM_RETURNED_HITS
offset: int = 0
search_type: SearchType = SearchType.HYBRID
skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW
# Only used if not skip_rerank
@ -100,8 +101,9 @@ class RetrievalDetails(BaseModel):
# the query, if None, then use Persona settings
filters: BaseFilters | None = None
enable_auto_detect_filters: bool | None = None
# TODO Pagination/Offset options
# offset: int | None = None
# if None, no offset / limit
offset: int | None = None
limit: int | None = None
class SearchDoc(BaseModel):

View File

@ -3,6 +3,7 @@ from sqlalchemy.orm import Session
from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER
from danswer.configs.chat_configs import DISABLE_LLM_FILTER_EXTRACTION
from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER
from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW
from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from danswer.db.models import Persona
@ -21,11 +22,13 @@ from danswer.secondary_llm_flows.time_filter import extract_time_filter
from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import FunctionCall
from danswer.utils.threadpool_concurrency import run_functions_in_parallel
from danswer.utils.timing import log_function_time
logger = setup_logger()
@log_function_time(print_only=True)
def retrieval_preprocessing(
query: str,
retrieval_details: RetrievalDetails,
@ -163,6 +166,10 @@ def retrieval_preprocessing(
search_type=persona.search_type,
filters=final_filters,
recency_bias_multiplier=recency_bias_multiplier,
num_hits=retrieval_details.limit
if retrieval_details.limit is not None
else NUM_RETURNED_HITS,
offset=retrieval_details.offset or 0,
skip_rerank=skip_reranking,
skip_llm_chunk_filter=not llm_chunk_filter,
),

View File

@ -76,6 +76,7 @@ def query_processing(
return query
@log_function_time(print_only=True)
def embed_query(
query: str,
prefix: str = ASYM_QUERY_PREFIX,
@ -163,6 +164,7 @@ def doc_index_retrieval(
filters=query.filters,
time_decay_multiplier=query.recency_bias_multiplier,
num_to_retrieve=query.num_hits,
offset=query.offset,
hybrid_alpha=hybrid_alpha,
)
@ -434,6 +436,7 @@ def rerank_chunks(
return ranked_chunks
@log_function_time(print_only=True)
def filter_chunks(
query: SearchQuery,
chunks_to_filter: list[InferenceChunk],