More latency logging + add limit/offset support

This commit is contained in:
Weves
2024-01-21 12:04:35 -08:00
committed by Chris Weaver
parent 777521a437
commit 2c38033ef5
5 changed files with 25 additions and 5 deletions

View File

@@ -96,6 +96,7 @@ class KeywordCapable(abc.ABC):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int, num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
raise NotImplementedError raise NotImplementedError
@@ -108,6 +109,7 @@ class VectorCapable(abc.ABC):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int, num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
raise NotImplementedError raise NotImplementedError
@@ -120,6 +122,7 @@ class HybridCapable(abc.ABC):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int, num_to_retrieve: int,
offset: int = 0,
hybrid_alpha: float | None = None, hybrid_alpha: float | None = None,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
raise NotImplementedError raise NotImplementedError
@@ -132,6 +135,7 @@ class AdminCapable(abc.ABC):
query: str, query: str,
filters: IndexFilters, filters: IndexFilters,
num_to_retrieve: int, num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
raise NotImplementedError raise NotImplementedError

View File

@@ -748,6 +748,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int = NUM_RETURNED_HITS, num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
edit_keyword_query: bool = EDIT_KEYWORD_QUERY, edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
# IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY # IMPORTANT: THIS FUNCTION IS NOT UP TO DATE, DOES NOT WORK CORRECTLY
@@ -769,7 +770,7 @@ class VespaIndex(DocumentIndex):
"query": final_query, "query": final_query,
"input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier), "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
"hits": num_to_retrieve, "hits": num_to_retrieve,
"offset": 0, "offset": offset,
"ranking.profile": "keyword_search", "ranking.profile": "keyword_search",
"timeout": _VESPA_TIMEOUT, "timeout": _VESPA_TIMEOUT,
} }
@@ -782,6 +783,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int = NUM_RETURNED_HITS, num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF, distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
edit_keyword_query: bool = EDIT_KEYWORD_QUERY, edit_keyword_query: bool = EDIT_KEYWORD_QUERY,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
@@ -811,7 +813,7 @@ class VespaIndex(DocumentIndex):
"input.query(query_embedding)": str(query_embedding), "input.query(query_embedding)": str(query_embedding),
"input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier), "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
"hits": num_to_retrieve, "hits": num_to_retrieve,
"offset": 0, "offset": offset,
"ranking.profile": "semantic_search", "ranking.profile": "semantic_search",
"timeout": _VESPA_TIMEOUT, "timeout": _VESPA_TIMEOUT,
} }
@@ -824,6 +826,7 @@ class VespaIndex(DocumentIndex):
filters: IndexFilters, filters: IndexFilters,
time_decay_multiplier: float, time_decay_multiplier: float,
num_to_retrieve: int, num_to_retrieve: int,
offset: int = 0,
hybrid_alpha: float | None = HYBRID_ALPHA, hybrid_alpha: float | None = HYBRID_ALPHA,
title_content_ratio: float | None = TITLE_CONTENT_RATIO, title_content_ratio: float | None = TITLE_CONTENT_RATIO,
distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF, distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF,
@@ -861,7 +864,7 @@ class VespaIndex(DocumentIndex):
if title_content_ratio is not None if title_content_ratio is not None
else TITLE_CONTENT_RATIO, else TITLE_CONTENT_RATIO,
"hits": num_to_retrieve, "hits": num_to_retrieve,
"offset": 0, "offset": offset,
"ranking.profile": "hybrid_search", "ranking.profile": "hybrid_search",
"timeout": _VESPA_TIMEOUT, "timeout": _VESPA_TIMEOUT,
} }
@@ -873,6 +876,7 @@ class VespaIndex(DocumentIndex):
query: str, query: str,
filters: IndexFilters, filters: IndexFilters,
num_to_retrieve: int = NUM_RETURNED_HITS, num_to_retrieve: int = NUM_RETURNED_HITS,
offset: int = 0,
) -> list[InferenceChunk]: ) -> list[InferenceChunk]:
vespa_where_clauses = _build_vespa_filters(filters, include_hidden=True) vespa_where_clauses = _build_vespa_filters(filters, include_hidden=True)
yql = ( yql = (

View File

@@ -76,6 +76,7 @@ class SearchQuery(BaseModel):
filters: IndexFilters filters: IndexFilters
recency_bias_multiplier: float recency_bias_multiplier: float
num_hits: int = NUM_RETURNED_HITS num_hits: int = NUM_RETURNED_HITS
offset: int = 0
search_type: SearchType = SearchType.HYBRID search_type: SearchType = SearchType.HYBRID
skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW
# Only used if not skip_rerank # Only used if not skip_rerank
@@ -100,8 +101,9 @@ class RetrievalDetails(BaseModel):
# the query, if None, then use Persona settings # the query, if None, then use Persona settings
filters: BaseFilters | None = None filters: BaseFilters | None = None
enable_auto_detect_filters: bool | None = None enable_auto_detect_filters: bool | None = None
# TODO Pagination/Offset options # if None, no offset / limit
# offset: int | None = None offset: int | None = None
limit: int | None = None
class SearchDoc(BaseModel): class SearchDoc(BaseModel):

View File

@@ -3,6 +3,7 @@ from sqlalchemy.orm import Session
from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER
from danswer.configs.chat_configs import DISABLE_LLM_FILTER_EXTRACTION from danswer.configs.chat_configs import DISABLE_LLM_FILTER_EXTRACTION
from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER
from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW
from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from danswer.db.models import Persona from danswer.db.models import Persona
@@ -21,11 +22,13 @@ from danswer.secondary_llm_flows.time_filter import extract_time_filter
from danswer.utils.logger import setup_logger from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import FunctionCall from danswer.utils.threadpool_concurrency import FunctionCall
from danswer.utils.threadpool_concurrency import run_functions_in_parallel from danswer.utils.threadpool_concurrency import run_functions_in_parallel
from danswer.utils.timing import log_function_time
logger = setup_logger() logger = setup_logger()
@log_function_time(print_only=True)
def retrieval_preprocessing( def retrieval_preprocessing(
query: str, query: str,
retrieval_details: RetrievalDetails, retrieval_details: RetrievalDetails,
@@ -163,6 +166,10 @@ def retrieval_preprocessing(
search_type=persona.search_type, search_type=persona.search_type,
filters=final_filters, filters=final_filters,
recency_bias_multiplier=recency_bias_multiplier, recency_bias_multiplier=recency_bias_multiplier,
num_hits=retrieval_details.limit
if retrieval_details.limit is not None
else NUM_RETURNED_HITS,
offset=retrieval_details.offset or 0,
skip_rerank=skip_reranking, skip_rerank=skip_reranking,
skip_llm_chunk_filter=not llm_chunk_filter, skip_llm_chunk_filter=not llm_chunk_filter,
), ),

View File

@@ -76,6 +76,7 @@ def query_processing(
return query return query
@log_function_time(print_only=True)
def embed_query( def embed_query(
query: str, query: str,
prefix: str = ASYM_QUERY_PREFIX, prefix: str = ASYM_QUERY_PREFIX,
@@ -163,6 +164,7 @@ def doc_index_retrieval(
filters=query.filters, filters=query.filters,
time_decay_multiplier=query.recency_bias_multiplier, time_decay_multiplier=query.recency_bias_multiplier,
num_to_retrieve=query.num_hits, num_to_retrieve=query.num_hits,
offset=query.offset,
hybrid_alpha=hybrid_alpha, hybrid_alpha=hybrid_alpha,
) )
@@ -434,6 +436,7 @@ def rerank_chunks(
return ranked_chunks return ranked_chunks
@log_function_time(print_only=True)
def filter_chunks( def filter_chunks(
query: SearchQuery, query: SearchQuery,
chunks_to_filter: list[InferenceChunk], chunks_to_filter: list[InferenceChunk],