Files
Rei Meguro 9dbe12cea8 Feat: Search Eval Testing Overhaul (provide ground truth, categorize query, etc.) (#4739)
* fix: autoflake & import order

* docs: readme

* fix: mypy

* feat: eval

* docs: readme

* fix: oops forgot to remove comment

* fix: typo

* fix: rename var

* updated default config

* fix: config issue

* oops

* fix: black

* fix: eval and config

* feat: non tool calling query mod
2025-05-21 19:25:10 +00:00

76 lines
2.8 KiB
Python

from datetime import datetime
from pathlib import Path
import yaml
from pydantic import BaseModel
from onyx.agents.agent_search.shared_graph_utils.models import QueryExpansionType
from onyx.configs.chat_configs import DOC_TIME_DECAY
from onyx.configs.chat_configs import HYBRID_ALPHA
from onyx.configs.chat_configs import HYBRID_ALPHA_KEYWORD
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.utils.logger import setup_logger
logger = setup_logger(__name__)
class SearchEvalConfig(BaseModel):
hybrid_alpha: float
hybrid_alpha_keyword: float
doc_time_decay: float
num_returned_hits: int
rank_profile: QueryExpansionType
offset: int
title_content_ratio: float
user_email: str | None
skip_rerank: bool
eval_topk: int
export_folder: str
def load_config() -> SearchEvalConfig:
"""Loads the search evaluation configs from the config file."""
# open the config file
current_dir = Path(__file__).parent
config_path = current_dir / "search_eval_config.yaml"
if not config_path.exists():
raise FileNotFoundError(f"Search eval config file not found at {config_path}")
with config_path.open("r") as file:
config_raw = yaml.safe_load(file)
# create the export folder
export_folder = config_raw.get("EXPORT_FOLDER", "eval-%Y-%m-%d-%H-%M-%S")
export_folder = datetime.now().strftime(export_folder)
export_path = Path(export_folder)
export_path.mkdir(parents=True, exist_ok=True)
logger.info(f"Created export folder: {export_path}")
# create the config
config = SearchEvalConfig(
hybrid_alpha=config_raw.get("HYBRID_ALPHA", HYBRID_ALPHA),
hybrid_alpha_keyword=config_raw.get(
"HYBRID_ALPHA_KEYWORD", HYBRID_ALPHA_KEYWORD
),
doc_time_decay=config_raw.get("DOC_TIME_DECAY", DOC_TIME_DECAY),
num_returned_hits=config_raw.get("NUM_RETURNED_HITS", NUM_RETURNED_HITS),
rank_profile=config_raw.get("RANK_PROFILE", QueryExpansionType.SEMANTIC),
offset=config_raw.get("OFFSET", 0),
title_content_ratio=config_raw.get("TITLE_CONTENT_RATIO", TITLE_CONTENT_RATIO),
user_email=config_raw.get("USER_EMAIL"),
skip_rerank=config_raw.get("SKIP_RERANK", False),
eval_topk=config_raw.get("EVAL_TOPK", 5),
export_folder=export_folder,
)
logger.info(f"Using search parameters: {config}")
# export the config
config_file = export_path / "search_eval_config.yaml"
with config_file.open("w") as file:
config_dict = config.model_dump(mode="python")
config_dict["rank_profile"] = config.rank_profile.value
yaml.dump(config_dict, file, sort_keys=False)
logger.info(f"Exported config to {config_file}")
return config