Enable default quantization (#4815)

* Adjust migration

* update default in form

* Add cloud indices for bfloat16

* Update backend/shared_configs/configs.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update vespa schema gen script

* Move embedding configs

* Remove unused imports

* remove import from shared configs

* Remove unused model

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Chris Weaver
2025-06-05 14:02:08 -07:00
committed by GitHub
parent 85eeb21b77
commit dc542fd7fa
8 changed files with 229 additions and 223 deletions

View File

@ -10,12 +10,19 @@ from alembic import op
import sqlalchemy as sa
from sqlalchemy import table, column, String, Integer, Boolean
from onyx.db.search_settings import (
get_new_default_embedding_model,
get_old_default_embedding_model,
user_has_overridden_embedding_model,
)
from onyx.configs.model_configs import ASYM_PASSAGE_PREFIX
from onyx.configs.model_configs import ASYM_QUERY_PREFIX
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import NORMALIZE_EMBEDDINGS
from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
from onyx.db.enums import EmbeddingPrecision
from onyx.db.models import IndexModelStatus
from onyx.db.search_settings import user_has_overridden_embedding_model
from onyx.indexing.models import IndexingSetting
from onyx.natural_language_processing.search_nlp_models import clean_model_name
# revision identifiers, used by Alembic.
revision = "dbaa756c2ccf"
@ -24,6 +31,47 @@ branch_labels: None = None
depends_on: None = None
def _get_old_default_embedding_model() -> IndexingSetting:
is_overridden = user_has_overridden_embedding_model()
return IndexingSetting(
model_name=(
DOCUMENT_ENCODER_MODEL
if is_overridden
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
),
model_dim=(
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
),
embedding_precision=(EmbeddingPrecision.FLOAT),
normalize=(
NORMALIZE_EMBEDDINGS
if is_overridden
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
),
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
index_name="danswer_chunk",
multipass_indexing=False,
enable_contextual_rag=False,
api_url=None,
)
def _get_new_default_embedding_model() -> IndexingSetting:
return IndexingSetting(
model_name=DOCUMENT_ENCODER_MODEL,
model_dim=DOC_EMBEDDING_DIM,
embedding_precision=(EmbeddingPrecision.BFLOAT16),
normalize=NORMALIZE_EMBEDDINGS,
query_prefix=ASYM_QUERY_PREFIX,
passage_prefix=ASYM_PASSAGE_PREFIX,
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
multipass_indexing=False,
enable_contextual_rag=False,
api_url=None,
)
def upgrade() -> None:
op.create_table(
"embedding_model",
@ -61,7 +109,7 @@ def upgrade() -> None:
# the user selected via env variables before this change. This is needed since
# all index_attempts must be associated with an embedding model, so without this
# we will run into violations of non-null contraints
old_embedding_model = get_old_default_embedding_model()
old_embedding_model = _get_old_default_embedding_model()
op.bulk_insert(
EmbeddingModel,
[
@ -79,7 +127,7 @@ def upgrade() -> None:
# if the user has not overridden the default embedding model via env variables,
# insert the new default model into the database to auto-upgrade them
if not user_has_overridden_embedding_model():
new_embedding_model = get_new_default_embedding_model()
new_embedding_model = _get_new_default_embedding_model()
op.bulk_insert(
EmbeddingModel,
[

View File

@ -0,0 +1,156 @@
from pydantic import BaseModel
from onyx.db.enums import EmbeddingPrecision
class _BaseEmbeddingModel(BaseModel):
"""Private model for defining base embedding model configurations."""
name: str
dim: int
index_name: str
class SupportedEmbeddingModel(BaseModel):
name: str
dim: int
index_name: str
embedding_precision: EmbeddingPrecision
# Base embedding model configurations (without precision)
_BASE_EMBEDDING_MODELS = [
# Cloud-based models
_BaseEmbeddingModel(
name="cohere/embed-english-v3.0",
dim=1024,
index_name="danswer_chunk_cohere_embed_english_v3_0",
),
_BaseEmbeddingModel(
name="cohere/embed-english-v3.0",
dim=1024,
index_name="danswer_chunk_embed_english_v3_0",
),
_BaseEmbeddingModel(
name="cohere/embed-english-light-v3.0",
dim=384,
index_name="danswer_chunk_cohere_embed_english_light_v3_0",
),
_BaseEmbeddingModel(
name="cohere/embed-english-light-v3.0",
dim=384,
index_name="danswer_chunk_embed_english_light_v3_0",
),
_BaseEmbeddingModel(
name="openai/text-embedding-3-large",
dim=3072,
index_name="danswer_chunk_openai_text_embedding_3_large",
),
_BaseEmbeddingModel(
name="openai/text-embedding-3-large",
dim=3072,
index_name="danswer_chunk_text_embedding_3_large",
),
_BaseEmbeddingModel(
name="openai/text-embedding-3-small",
dim=1536,
index_name="danswer_chunk_openai_text_embedding_3_small",
),
_BaseEmbeddingModel(
name="openai/text-embedding-3-small",
dim=1536,
index_name="danswer_chunk_text_embedding_3_small",
),
_BaseEmbeddingModel(
name="google/text-embedding-005",
dim=768,
index_name="danswer_chunk_google_text_embedding_005",
),
_BaseEmbeddingModel(
name="google/textembedding-gecko@003",
dim=768,
index_name="danswer_chunk_google_textembedding_gecko_003",
),
_BaseEmbeddingModel(
name="google/textembedding-gecko@003",
dim=768,
index_name="danswer_chunk_textembedding_gecko_003",
),
_BaseEmbeddingModel(
name="voyage/voyage-large-2-instruct",
dim=1024,
index_name="danswer_chunk_voyage_large_2_instruct",
),
_BaseEmbeddingModel(
name="voyage/voyage-large-2-instruct",
dim=1024,
index_name="danswer_chunk_large_2_instruct",
),
_BaseEmbeddingModel(
name="voyage/voyage-light-2-instruct",
dim=384,
index_name="danswer_chunk_voyage_light_2_instruct",
),
_BaseEmbeddingModel(
name="voyage/voyage-light-2-instruct",
dim=384,
index_name="danswer_chunk_light_2_instruct",
),
# Self-hosted models
_BaseEmbeddingModel(
name="nomic-ai/nomic-embed-text-v1",
dim=768,
index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1",
),
_BaseEmbeddingModel(
name="nomic-ai/nomic-embed-text-v1",
dim=768,
index_name="danswer_chunk_nomic_embed_text_v1",
),
_BaseEmbeddingModel(
name="intfloat/e5-base-v2",
dim=768,
index_name="danswer_chunk_intfloat_e5_base_v2",
),
_BaseEmbeddingModel(
name="intfloat/e5-small-v2",
dim=384,
index_name="danswer_chunk_intfloat_e5_small_v2",
),
_BaseEmbeddingModel(
name="intfloat/multilingual-e5-base",
dim=768,
index_name="danswer_chunk_intfloat_multilingual_e5_base",
),
_BaseEmbeddingModel(
name="intfloat/multilingual-e5-small",
dim=384,
index_name="danswer_chunk_intfloat_multilingual_e5_small",
),
]
# Automatically generate both FLOAT and BFLOAT16 versions of all models
SUPPORTED_EMBEDDING_MODELS = [
# BFLOAT16 precision versions
*[
SupportedEmbeddingModel(
name=model.name,
dim=model.dim,
index_name=f"{model.index_name}_bfloat16",
embedding_precision=EmbeddingPrecision.BFLOAT16,
)
for model in _BASE_EMBEDDING_MODELS
],
# FLOAT precision versions
# NOTE: need to keep this one for backwards compatibility. We now default to
# BFLOAT16.
*[
SupportedEmbeddingModel(
name=model.name,
dim=model.dim,
index_name=model.index_name,
embedding_precision=EmbeddingPrecision.FLOAT,
)
for model in _BASE_EMBEDDING_MODELS
],
]

View File

@ -3,25 +3,15 @@ from sqlalchemy import delete
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.configs.model_configs import ASYM_PASSAGE_PREFIX
from onyx.configs.model_configs import ASYM_QUERY_PREFIX
from onyx.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import NORMALIZE_EMBEDDINGS
from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
from onyx.context.search.models import SavedSearchSettings
from onyx.db.engine import get_session_with_current_tenant
from onyx.db.enums import EmbeddingPrecision
from onyx.db.llm import fetch_embedding_provider
from onyx.db.models import CloudEmbeddingProvider
from onyx.db.models import IndexAttempt
from onyx.db.models import IndexModelStatus
from onyx.db.models import SearchSettings
from onyx.indexing.models import IndexingSetting
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.natural_language_processing.search_nlp_models import warm_up_cross_encoder
from onyx.server.manage.embedding.models import (
CloudEmbeddingProvider as ServerCloudEmbeddingProvider,
@ -264,79 +254,3 @@ def update_search_settings_status(
def user_has_overridden_embedding_model() -> bool:
return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL
def get_old_default_search_settings() -> SearchSettings:
is_overridden = user_has_overridden_embedding_model()
return SearchSettings(
model_name=(
DOCUMENT_ENCODER_MODEL
if is_overridden
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
),
model_dim=(
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
),
normalize=(
NORMALIZE_EMBEDDINGS
if is_overridden
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
),
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
status=IndexModelStatus.PRESENT,
index_name="danswer_chunk",
)
def get_new_default_search_settings(is_present: bool) -> SearchSettings:
return SearchSettings(
model_name=DOCUMENT_ENCODER_MODEL,
model_dim=DOC_EMBEDDING_DIM,
normalize=NORMALIZE_EMBEDDINGS,
query_prefix=ASYM_QUERY_PREFIX,
passage_prefix=ASYM_PASSAGE_PREFIX,
status=IndexModelStatus.PRESENT if is_present else IndexModelStatus.FUTURE,
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
)
def get_old_default_embedding_model() -> IndexingSetting:
is_overridden = user_has_overridden_embedding_model()
return IndexingSetting(
model_name=(
DOCUMENT_ENCODER_MODEL
if is_overridden
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
),
model_dim=(
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
),
embedding_precision=(EmbeddingPrecision.FLOAT),
normalize=(
NORMALIZE_EMBEDDINGS
if is_overridden
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
),
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
index_name="danswer_chunk",
multipass_indexing=False,
enable_contextual_rag=False,
api_url=None,
)
def get_new_default_embedding_model() -> IndexingSetting:
return IndexingSetting(
model_name=DOCUMENT_ENCODER_MODEL,
model_dim=DOC_EMBEDDING_DIM,
embedding_precision=(EmbeddingPrecision.FLOAT),
normalize=NORMALIZE_EMBEDDINGS,
query_prefix=ASYM_QUERY_PREFIX,
passage_prefix=ASYM_PASSAGE_PREFIX,
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
multipass_indexing=False,
enable_contextual_rag=False,
api_url=None,
)

View File

@ -7,6 +7,8 @@ from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import KV_SEARCH_SETTINGS
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
from onyx.configs.embedding_configs import SupportedEmbeddingModel
from onyx.configs.model_configs import FAST_GEN_AI_MODEL_VERSION
from onyx.configs.model_configs import GEN_AI_API_KEY
from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
@ -59,8 +61,6 @@ from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS
from shared_configs.model_server_models import SupportedEmbeddingModel
logger = setup_logger()

View File

@ -4,21 +4,26 @@ import argparse
import jinja2
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
from onyx.db.enums import EmbeddingPrecision
from onyx.utils.logger import setup_logger
from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS
logger = setup_logger()
def write_schema(index_name: str, dim: int, template: jinja2.Template) -> None:
def write_schema(
index_name: str,
dim: int,
embedding_precision: EmbeddingPrecision,
template: jinja2.Template,
) -> None:
index_filename = index_name + ".sd"
schema = template.render(
multi_tenant=True,
schema_name=index_name,
dim=dim,
embedding_precision=EmbeddingPrecision.FLOAT.value,
embedding_precision=embedding_precision.value,
)
with open(index_filename, "w", encoding="utf-8") as f:
@ -41,8 +46,13 @@ def main() -> None:
num_indexes = 0
for model in SUPPORTED_EMBEDDING_MODELS:
write_schema(model.index_name, model.dim, template)
write_schema(model.index_name + "__danswer_alt_index", model.dim, template)
write_schema(model.index_name, model.dim, model.embedding_precision, template)
write_schema(
model.index_name + "__danswer_alt_index",
model.dim,
model.embedding_precision,
template,
)
num_indexes += 2
logger.info(f"Wrote {num_indexes} indexes.")

View File

@ -3,8 +3,6 @@ from typing import Any
from typing import List
from urllib.parse import urlparse
from shared_configs.model_server_models import SupportedEmbeddingModel
# Used for logging
SLACK_CHANNEL_ID = "channel_id"
@ -170,120 +168,6 @@ IGNORED_SYNCING_TENANT_LIST = (
else None
)
SUPPORTED_EMBEDDING_MODELS = [
# Cloud-based models
SupportedEmbeddingModel(
name="cohere/embed-english-v3.0",
dim=1024,
index_name="danswer_chunk_cohere_embed_english_v3_0",
),
SupportedEmbeddingModel(
name="cohere/embed-english-v3.0",
dim=1024,
index_name="danswer_chunk_embed_english_v3_0",
),
SupportedEmbeddingModel(
name="cohere/embed-english-light-v3.0",
dim=384,
index_name="danswer_chunk_cohere_embed_english_light_v3_0",
),
SupportedEmbeddingModel(
name="cohere/embed-english-light-v3.0",
dim=384,
index_name="danswer_chunk_embed_english_light_v3_0",
),
SupportedEmbeddingModel(
name="openai/text-embedding-3-large",
dim=3072,
index_name="danswer_chunk_openai_text_embedding_3_large",
),
SupportedEmbeddingModel(
name="openai/text-embedding-3-large",
dim=3072,
index_name="danswer_chunk_text_embedding_3_large",
),
SupportedEmbeddingModel(
name="openai/text-embedding-3-small",
dim=1536,
index_name="danswer_chunk_openai_text_embedding_3_small",
),
SupportedEmbeddingModel(
name="openai/text-embedding-3-small",
dim=1536,
index_name="danswer_chunk_text_embedding_3_small",
),
SupportedEmbeddingModel(
name="google/text-embedding-005",
dim=768,
index_name="danswer_chunk_google_text_embedding_004",
),
SupportedEmbeddingModel(
name="google/text-embedding-005",
dim=768,
index_name="danswer_chunk_text_embedding_004",
),
SupportedEmbeddingModel(
name="google/textembedding-gecko@003",
dim=768,
index_name="danswer_chunk_google_textembedding_gecko_003",
),
SupportedEmbeddingModel(
name="google/textembedding-gecko@003",
dim=768,
index_name="danswer_chunk_textembedding_gecko_003",
),
SupportedEmbeddingModel(
name="voyage/voyage-large-2-instruct",
dim=1024,
index_name="danswer_chunk_voyage_large_2_instruct",
),
SupportedEmbeddingModel(
name="voyage/voyage-large-2-instruct",
dim=1024,
index_name="danswer_chunk_large_2_instruct",
),
SupportedEmbeddingModel(
name="voyage/voyage-light-2-instruct",
dim=384,
index_name="danswer_chunk_voyage_light_2_instruct",
),
SupportedEmbeddingModel(
name="voyage/voyage-light-2-instruct",
dim=384,
index_name="danswer_chunk_light_2_instruct",
),
# Self-hosted models
SupportedEmbeddingModel(
name="nomic-ai/nomic-embed-text-v1",
dim=768,
index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1",
),
SupportedEmbeddingModel(
name="nomic-ai/nomic-embed-text-v1",
dim=768,
index_name="danswer_chunk_nomic_embed_text_v1",
),
SupportedEmbeddingModel(
name="intfloat/e5-base-v2",
dim=768,
index_name="danswer_chunk_intfloat_e5_base_v2",
),
SupportedEmbeddingModel(
name="intfloat/e5-small-v2",
dim=384,
index_name="danswer_chunk_intfloat_e5_small_v2",
),
SupportedEmbeddingModel(
name="intfloat/multilingual-e5-base",
dim=768,
index_name="danswer_chunk_intfloat_multilingual_e5_base",
),
SupportedEmbeddingModel(
name="intfloat/multilingual-e5-small",
dim=384,
index_name="danswer_chunk_intfloat_multilingual_e5_small",
),
]
# Maximum (least severe) downgrade factor for chunks above the cutoff
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = float(
os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX") or 1.0

View File

@ -78,12 +78,6 @@ class InformationContentClassificationRequests(BaseModel):
queries: list[str]
class SupportedEmbeddingModel(BaseModel):
name: str
dim: int
index_name: str
class ContentClassificationPrediction(BaseModel):
predicted_label: int
content_boost_factor: float

View File

@ -71,7 +71,7 @@ export default function EmbeddingForm() {
disable_rerank_for_streaming: false,
api_url: null,
num_rerank: 0,
embedding_precision: EmbeddingPrecision.FLOAT,
embedding_precision: EmbeddingPrecision.BFLOAT16,
reduced_dimension: null,
});