mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-12 22:23:01 +02:00
* fix: autoflake & import order * docs: readme * fix: mypy * feat: eval * docs: readme * fix: oops forgot to remove comment * fix: typo * fix: rename var * updated default config * fix: config issue * oops * fix: black * fix: eval and config * feat: non tool calling query mod
95 lines
3.1 KiB
Python
95 lines
3.1 KiB
Python
from typing import Optional
|
|
|
|
from pydantic import BaseModel
|
|
from sqlalchemy.orm import Session
|
|
|
|
from onyx.context.search.models import InferenceChunk
|
|
from onyx.db.models import Document
|
|
from onyx.utils.logger import setup_logger
|
|
from tests.regression.search_quality.util_retrieve import group_by_documents
|
|
|
|
logger = setup_logger(__name__)
|
|
|
|
|
|
class Metrics(BaseModel):
|
|
# computed if ground truth is provided
|
|
ground_truth_ratio_topk: Optional[float] = None
|
|
ground_truth_avg_rank_delta: Optional[float] = None
|
|
|
|
# computed if reranked results are provided
|
|
soft_truth_ratio_topk: Optional[float] = None
|
|
soft_truth_avg_rank_delta: Optional[float] = None
|
|
|
|
|
|
metric_names = list(Metrics.model_fields.keys())
|
|
|
|
|
|
def get_corresponding_document(
|
|
doc_link: str, db_session: Session
|
|
) -> Optional[Document]:
|
|
"""Get the corresponding document from the database."""
|
|
doc_filter = db_session.query(Document).filter(Document.link == doc_link)
|
|
count = doc_filter.count()
|
|
if count == 0:
|
|
logger.warning(f"Could not find document with link {doc_link}, ignoring")
|
|
return None
|
|
if count > 1:
|
|
logger.warning(f"Found multiple documents with link {doc_link}, using first")
|
|
return doc_filter.first()
|
|
|
|
|
|
def evaluate_one_query(
|
|
search_chunks: list[InferenceChunk],
|
|
rerank_chunks: list[InferenceChunk],
|
|
true_documents: list[Document],
|
|
topk: int,
|
|
) -> Metrics:
|
|
"""Computes metrics for the search results, relative to the ground truth and reranked results."""
|
|
metrics_dict: dict[str, float] = {}
|
|
|
|
search_documents = group_by_documents(search_chunks)
|
|
search_ranks = {docid: rank for rank, docid in enumerate(search_documents)}
|
|
search_ranks_topk = {
|
|
docid: rank for rank, docid in enumerate(search_documents[:topk])
|
|
}
|
|
true_ranks = {doc.id: rank for rank, doc in enumerate(true_documents)}
|
|
|
|
if true_documents:
|
|
metrics_dict["ground_truth_ratio_topk"] = _compute_ratio(
|
|
search_ranks_topk, true_ranks
|
|
)
|
|
metrics_dict["ground_truth_avg_rank_delta"] = _compute_avg_rank_delta(
|
|
search_ranks, true_ranks
|
|
)
|
|
|
|
if rerank_chunks:
|
|
# build soft truth out of ground truth + reranked results, up to topk
|
|
soft_ranks = true_ranks
|
|
for docid in group_by_documents(rerank_chunks):
|
|
if len(soft_ranks) >= topk:
|
|
break
|
|
if docid not in soft_ranks:
|
|
soft_ranks[docid] = len(soft_ranks)
|
|
|
|
metrics_dict["soft_truth_ratio_topk"] = _compute_ratio(
|
|
search_ranks_topk, soft_ranks
|
|
)
|
|
metrics_dict["soft_truth_avg_rank_delta"] = _compute_avg_rank_delta(
|
|
search_ranks, soft_ranks
|
|
)
|
|
|
|
return Metrics(**metrics_dict)
|
|
|
|
|
|
def _compute_ratio(search_ranks: dict[str, int], true_ranks: dict[str, int]) -> float:
|
|
return len(set(search_ranks) & set(true_ranks)) / len(true_ranks)
|
|
|
|
|
|
def _compute_avg_rank_delta(
|
|
search_ranks: dict[str, int], true_ranks: dict[str, int]
|
|
) -> float:
|
|
out = len(search_ranks)
|
|
return sum(
|
|
abs(search_ranks.get(docid, out) - rank) for docid, rank in true_ranks.items()
|
|
) / len(true_ranks)
|