diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py index cd20ae4de..c56a607b1 100644 --- a/backend/model_server/custom_models.py +++ b/backend/model_server/custom_models.py @@ -13,14 +13,14 @@ from model_server.constants import MODEL_WARM_UP_STRING from model_server.onyx_torch_model import ConnectorClassifier from model_server.onyx_torch_model import HybridClassifier from model_server.utils import simple_log_function_time -from onyx.configs.model_configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX -from onyx.configs.model_configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN -from onyx.configs.model_configs import ( - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE, -) from onyx.utils.logger import setup_logger from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG +from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX +from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN +from shared_configs.configs import ( + INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE, +) from shared_configs.configs import INDEXING_ONLY from shared_configs.configs import INFORMATION_CONTENT_MODEL_TAG from shared_configs.configs import INFORMATION_CONTENT_MODEL_VERSION diff --git a/backend/onyx/configs/model_configs.py b/backend/onyx/configs/model_configs.py index b2e8bb786..34dd42656 100644 --- a/backend/onyx/configs/model_configs.py +++ b/backend/onyx/configs/model_configs.py @@ -144,18 +144,3 @@ USE_INFORMATION_CONTENT_CLASSIFICATION = ( INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH = float( os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH") or 10 ) - -# Minimum (most severe) downgrade factor for short chunks below the cutoff if no content -INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN = float( - os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN") or 0.8 -) - -# Maximum (least severe) downgrade factor for chunks above the cutoff -INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = float( - os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX") or 1.0 -) - -# Temperature for the information content classification model -INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE = float( - os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE") or 4.0 -) diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index de43dc3a3..f4ecd79ec 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -279,3 +279,15 @@ SUPPORTED_EMBEDDING_MODELS = [ index_name="danswer_chunk_intfloat_multilingual_e5_small", ), ] +# Maximum (least severe) downgrade factor for chunks above the cutoff +INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = float( + os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX") or 1.0 +) +# Minimum (most severe) downgrade factor for short chunks below the cutoff if no content +INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN = float( + os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN") or 0.8 +) +# Temperature for the information content classification model +INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE = float( + os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE") or 4.0 +)