danswer/backend/tests/unit/model_server/test_custom_models.py
joachim-danswer 463340b8a1
Reduce ranking scores for short chunks without actual information (#4098)
* remove title for slack

* initial working code

* simplification

* improvements

* name change to information_content_model

* avoid boost_score > 1.0

* nit

* EL comments and improvements

Improvements:
  - proper import of information content model from cache or HF
  - warm up for information content model

Other:
  - EL PR review comments

* nit

* requirements version update

* fixed docker file

* new home for model_server configs

* default off

* small updates

* YS comments - pt 1

* renaming to chunk_boost & chunk table def

* saving and deleting chunk stats in new table

* saving and updating chunk stats

* improved dict score update

* create columns for individual boost factors

* RK comments

* Update migration

* manual import reordering
2025-03-13 17:35:45 +00:00

146 lines
4.9 KiB
Python

from typing import Any
from unittest.mock import Mock
from unittest.mock import patch
import numpy as np
import numpy.typing as npt
import pytest
from model_server.custom_models import run_content_classification_inference
from shared_configs.configs import (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH,
)
from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
from shared_configs.model_server_models import ContentClassificationPrediction
@pytest.fixture
def mock_content_model() -> Mock:
model = Mock()
# Create actual numpy arrays for the mock returns
predict_output = np.array(
[1, 0] * 50, dtype=np.int64
) # Pre-allocate enough elements
proba_output = np.array(
[[0.3, 0.7], [0.7, 0.3]] * 50, dtype=np.float64
) # Pre-allocate enough elements
# Create a mock tensor that has a numpy method and supports indexing
class MockTensor:
def __init__(self, value: npt.NDArray[Any]) -> None:
self.value = value
def numpy(self) -> npt.NDArray[Any]:
return self.value
def __getitem__(self, idx: Any) -> Any:
result = self.value[idx]
# Wrap scalar values back in MockTensor
if isinstance(result, (np.float64, np.int64)):
return MockTensor(np.array([result]))
return MockTensor(result)
# Mock the direct call to return a MockTensor for each input
def model_call(inputs: list[str]) -> list[MockTensor]:
batch_size = len(inputs)
return [MockTensor(predict_output[i : i + 1]) for i in range(batch_size)]
model.side_effect = model_call
# Mock predict_proba to return MockTensor-wrapped numpy array
def predict_proba_call(x: list[str]) -> MockTensor:
batch_size = len(x)
return MockTensor(proba_output[:batch_size])
model.predict_proba.side_effect = predict_proba_call
return model
@patch("model_server.custom_models.get_local_information_content_model")
def test_run_content_classification_inference(
mock_get_model: Mock,
mock_content_model: Mock,
) -> None:
"""
Test the content classification inference function.
Verifies that the function correctly processes text inputs and returns appropriate predictions.
"""
# Setup
mock_get_model.return_value = mock_content_model
test_inputs = [
"Imagine a short text with content",
"Imagine a short text without content",
"x "
* (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH + 1
), # Long input that exceeds maximal length for when the model should be applied
"", # Empty input
]
# Execute
results = run_content_classification_inference(test_inputs)
# Assert
assert len(results) == len(test_inputs)
assert all(isinstance(r, ContentClassificationPrediction) for r in results)
# Check each prediction has expected attributes and ranges
for result_num, result in enumerate(results):
assert hasattr(result, "predicted_label")
assert hasattr(result, "content_boost_factor")
assert isinstance(result.predicted_label, int)
assert isinstance(result.content_boost_factor, float)
assert (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
<= result.content_boost_factor
<= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
)
if result_num == 2:
assert (
result.content_boost_factor
== INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
)
assert result.predicted_label == 1
elif result_num == 3:
assert (
result.content_boost_factor
== INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
)
assert result.predicted_label == 0
# Verify model handling of long inputs
mock_content_model.predict_proba.reset_mock()
long_input = ["x " * 1000] # Definitely exceeds MAX_LENGTH
results = run_content_classification_inference(long_input)
assert len(results) == 1
assert (
mock_content_model.predict_proba.call_count == 0
) # Should skip model call for too-long input
@patch("model_server.custom_models.get_local_information_content_model")
def test_batch_processing(
mock_get_model: Mock,
mock_content_model: Mock,
) -> None:
"""
Test that the function correctly handles batch processing of inputs.
"""
# Setup
mock_get_model.return_value = mock_content_model
# Create test input larger than batch size
test_inputs = [f"Test input {i}" for i in range(40)] # > BATCH_SIZE (32)
# Execute
results = run_content_classification_inference(test_inputs)
# Assert
assert len(results) == 40
# Verify batching occurred (should have called predict_proba twice)
assert mock_content_model.predict_proba.call_count == 2