mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-12 06:05:43 +02:00
Enable default quantization (#4815)
* Adjust migration * update default in form * Add cloud indices for bfloat16 * Update backend/shared_configs/configs.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Update vespa schema gen script * Move embedding configs * Remove unused imports * remove import from shared configs * Remove unused model --------- Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
@ -10,12 +10,19 @@ from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import table, column, String, Integer, Boolean
|
||||
|
||||
from onyx.db.search_settings import (
|
||||
get_new_default_embedding_model,
|
||||
get_old_default_embedding_model,
|
||||
user_has_overridden_embedding_model,
|
||||
)
|
||||
from onyx.configs.model_configs import ASYM_PASSAGE_PREFIX
|
||||
from onyx.configs.model_configs import ASYM_QUERY_PREFIX
|
||||
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
|
||||
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
|
||||
from onyx.configs.model_configs import NORMALIZE_EMBEDDINGS
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
|
||||
from onyx.db.enums import EmbeddingPrecision
|
||||
from onyx.db.models import IndexModelStatus
|
||||
from onyx.db.search_settings import user_has_overridden_embedding_model
|
||||
from onyx.indexing.models import IndexingSetting
|
||||
from onyx.natural_language_processing.search_nlp_models import clean_model_name
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "dbaa756c2ccf"
|
||||
@ -24,6 +31,47 @@ branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def _get_old_default_embedding_model() -> IndexingSetting:
|
||||
is_overridden = user_has_overridden_embedding_model()
|
||||
return IndexingSetting(
|
||||
model_name=(
|
||||
DOCUMENT_ENCODER_MODEL
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
),
|
||||
model_dim=(
|
||||
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
|
||||
),
|
||||
embedding_precision=(EmbeddingPrecision.FLOAT),
|
||||
normalize=(
|
||||
NORMALIZE_EMBEDDINGS
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
|
||||
),
|
||||
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
|
||||
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
|
||||
index_name="danswer_chunk",
|
||||
multipass_indexing=False,
|
||||
enable_contextual_rag=False,
|
||||
api_url=None,
|
||||
)
|
||||
|
||||
|
||||
def _get_new_default_embedding_model() -> IndexingSetting:
|
||||
return IndexingSetting(
|
||||
model_name=DOCUMENT_ENCODER_MODEL,
|
||||
model_dim=DOC_EMBEDDING_DIM,
|
||||
embedding_precision=(EmbeddingPrecision.BFLOAT16),
|
||||
normalize=NORMALIZE_EMBEDDINGS,
|
||||
query_prefix=ASYM_QUERY_PREFIX,
|
||||
passage_prefix=ASYM_PASSAGE_PREFIX,
|
||||
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
|
||||
multipass_indexing=False,
|
||||
enable_contextual_rag=False,
|
||||
api_url=None,
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"embedding_model",
|
||||
@ -61,7 +109,7 @@ def upgrade() -> None:
|
||||
# the user selected via env variables before this change. This is needed since
|
||||
# all index_attempts must be associated with an embedding model, so without this
|
||||
# we will run into violations of non-null contraints
|
||||
old_embedding_model = get_old_default_embedding_model()
|
||||
old_embedding_model = _get_old_default_embedding_model()
|
||||
op.bulk_insert(
|
||||
EmbeddingModel,
|
||||
[
|
||||
@ -79,7 +127,7 @@ def upgrade() -> None:
|
||||
# if the user has not overridden the default embedding model via env variables,
|
||||
# insert the new default model into the database to auto-upgrade them
|
||||
if not user_has_overridden_embedding_model():
|
||||
new_embedding_model = get_new_default_embedding_model()
|
||||
new_embedding_model = _get_new_default_embedding_model()
|
||||
op.bulk_insert(
|
||||
EmbeddingModel,
|
||||
[
|
||||
|
156
backend/onyx/configs/embedding_configs.py
Normal file
156
backend/onyx/configs/embedding_configs.py
Normal file
@ -0,0 +1,156 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.db.enums import EmbeddingPrecision
|
||||
|
||||
|
||||
class _BaseEmbeddingModel(BaseModel):
|
||||
"""Private model for defining base embedding model configurations."""
|
||||
|
||||
name: str
|
||||
dim: int
|
||||
index_name: str
|
||||
|
||||
|
||||
class SupportedEmbeddingModel(BaseModel):
|
||||
name: str
|
||||
dim: int
|
||||
index_name: str
|
||||
embedding_precision: EmbeddingPrecision
|
||||
|
||||
|
||||
# Base embedding model configurations (without precision)
|
||||
_BASE_EMBEDDING_MODELS = [
|
||||
# Cloud-based models
|
||||
_BaseEmbeddingModel(
|
||||
name="cohere/embed-english-v3.0",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_cohere_embed_english_v3_0",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="cohere/embed-english-v3.0",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_embed_english_v3_0",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="cohere/embed-english-light-v3.0",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_cohere_embed_english_light_v3_0",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="cohere/embed-english-light-v3.0",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_embed_english_light_v3_0",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="openai/text-embedding-3-large",
|
||||
dim=3072,
|
||||
index_name="danswer_chunk_openai_text_embedding_3_large",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="openai/text-embedding-3-large",
|
||||
dim=3072,
|
||||
index_name="danswer_chunk_text_embedding_3_large",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="openai/text-embedding-3-small",
|
||||
dim=1536,
|
||||
index_name="danswer_chunk_openai_text_embedding_3_small",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="openai/text-embedding-3-small",
|
||||
dim=1536,
|
||||
index_name="danswer_chunk_text_embedding_3_small",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="google/text-embedding-005",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_google_text_embedding_005",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="google/textembedding-gecko@003",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_google_textembedding_gecko_003",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="google/textembedding-gecko@003",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_textembedding_gecko_003",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="voyage/voyage-large-2-instruct",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_voyage_large_2_instruct",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="voyage/voyage-large-2-instruct",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_large_2_instruct",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="voyage/voyage-light-2-instruct",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_voyage_light_2_instruct",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="voyage/voyage-light-2-instruct",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_light_2_instruct",
|
||||
),
|
||||
# Self-hosted models
|
||||
_BaseEmbeddingModel(
|
||||
name="nomic-ai/nomic-embed-text-v1",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="nomic-ai/nomic-embed-text-v1",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_nomic_embed_text_v1",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="intfloat/e5-base-v2",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_intfloat_e5_base_v2",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="intfloat/e5-small-v2",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_intfloat_e5_small_v2",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="intfloat/multilingual-e5-base",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_intfloat_multilingual_e5_base",
|
||||
),
|
||||
_BaseEmbeddingModel(
|
||||
name="intfloat/multilingual-e5-small",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_intfloat_multilingual_e5_small",
|
||||
),
|
||||
]
|
||||
|
||||
# Automatically generate both FLOAT and BFLOAT16 versions of all models
|
||||
SUPPORTED_EMBEDDING_MODELS = [
|
||||
# BFLOAT16 precision versions
|
||||
*[
|
||||
SupportedEmbeddingModel(
|
||||
name=model.name,
|
||||
dim=model.dim,
|
||||
index_name=f"{model.index_name}_bfloat16",
|
||||
embedding_precision=EmbeddingPrecision.BFLOAT16,
|
||||
)
|
||||
for model in _BASE_EMBEDDING_MODELS
|
||||
],
|
||||
# FLOAT precision versions
|
||||
# NOTE: need to keep this one for backwards compatibility. We now default to
|
||||
# BFLOAT16.
|
||||
*[
|
||||
SupportedEmbeddingModel(
|
||||
name=model.name,
|
||||
dim=model.dim,
|
||||
index_name=model.index_name,
|
||||
embedding_precision=EmbeddingPrecision.FLOAT,
|
||||
)
|
||||
for model in _BASE_EMBEDDING_MODELS
|
||||
],
|
||||
]
|
@ -3,25 +3,15 @@ from sqlalchemy import delete
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.model_configs import ASYM_PASSAGE_PREFIX
|
||||
from onyx.configs.model_configs import ASYM_QUERY_PREFIX
|
||||
from onyx.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
from onyx.configs.model_configs import DOC_EMBEDDING_DIM
|
||||
from onyx.configs.model_configs import DOCUMENT_ENCODER_MODEL
|
||||
from onyx.configs.model_configs import NORMALIZE_EMBEDDINGS
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
|
||||
from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
|
||||
from onyx.context.search.models import SavedSearchSettings
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import EmbeddingPrecision
|
||||
from onyx.db.llm import fetch_embedding_provider
|
||||
from onyx.db.models import CloudEmbeddingProvider
|
||||
from onyx.db.models import IndexAttempt
|
||||
from onyx.db.models import IndexModelStatus
|
||||
from onyx.db.models import SearchSettings
|
||||
from onyx.indexing.models import IndexingSetting
|
||||
from onyx.natural_language_processing.search_nlp_models import clean_model_name
|
||||
from onyx.natural_language_processing.search_nlp_models import warm_up_cross_encoder
|
||||
from onyx.server.manage.embedding.models import (
|
||||
CloudEmbeddingProvider as ServerCloudEmbeddingProvider,
|
||||
@ -264,79 +254,3 @@ def update_search_settings_status(
|
||||
|
||||
def user_has_overridden_embedding_model() -> bool:
|
||||
return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
|
||||
|
||||
def get_old_default_search_settings() -> SearchSettings:
|
||||
is_overridden = user_has_overridden_embedding_model()
|
||||
return SearchSettings(
|
||||
model_name=(
|
||||
DOCUMENT_ENCODER_MODEL
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
),
|
||||
model_dim=(
|
||||
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
|
||||
),
|
||||
normalize=(
|
||||
NORMALIZE_EMBEDDINGS
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
|
||||
),
|
||||
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
|
||||
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
|
||||
status=IndexModelStatus.PRESENT,
|
||||
index_name="danswer_chunk",
|
||||
)
|
||||
|
||||
|
||||
def get_new_default_search_settings(is_present: bool) -> SearchSettings:
|
||||
return SearchSettings(
|
||||
model_name=DOCUMENT_ENCODER_MODEL,
|
||||
model_dim=DOC_EMBEDDING_DIM,
|
||||
normalize=NORMALIZE_EMBEDDINGS,
|
||||
query_prefix=ASYM_QUERY_PREFIX,
|
||||
passage_prefix=ASYM_PASSAGE_PREFIX,
|
||||
status=IndexModelStatus.PRESENT if is_present else IndexModelStatus.FUTURE,
|
||||
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
|
||||
)
|
||||
|
||||
|
||||
def get_old_default_embedding_model() -> IndexingSetting:
|
||||
is_overridden = user_has_overridden_embedding_model()
|
||||
return IndexingSetting(
|
||||
model_name=(
|
||||
DOCUMENT_ENCODER_MODEL
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
|
||||
),
|
||||
model_dim=(
|
||||
DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
|
||||
),
|
||||
embedding_precision=(EmbeddingPrecision.FLOAT),
|
||||
normalize=(
|
||||
NORMALIZE_EMBEDDINGS
|
||||
if is_overridden
|
||||
else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
|
||||
),
|
||||
query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""),
|
||||
passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""),
|
||||
index_name="danswer_chunk",
|
||||
multipass_indexing=False,
|
||||
enable_contextual_rag=False,
|
||||
api_url=None,
|
||||
)
|
||||
|
||||
|
||||
def get_new_default_embedding_model() -> IndexingSetting:
|
||||
return IndexingSetting(
|
||||
model_name=DOCUMENT_ENCODER_MODEL,
|
||||
model_dim=DOC_EMBEDDING_DIM,
|
||||
embedding_precision=(EmbeddingPrecision.FLOAT),
|
||||
normalize=NORMALIZE_EMBEDDINGS,
|
||||
query_prefix=ASYM_QUERY_PREFIX,
|
||||
passage_prefix=ASYM_PASSAGE_PREFIX,
|
||||
index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}",
|
||||
multipass_indexing=False,
|
||||
enable_contextual_rag=False,
|
||||
api_url=None,
|
||||
)
|
||||
|
@ -7,6 +7,8 @@ from onyx.configs.app_configs import MANAGED_VESPA
|
||||
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
|
||||
from onyx.configs.constants import KV_REINDEX_KEY
|
||||
from onyx.configs.constants import KV_SEARCH_SETTINGS
|
||||
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
|
||||
from onyx.configs.embedding_configs import SupportedEmbeddingModel
|
||||
from onyx.configs.model_configs import FAST_GEN_AI_MODEL_VERSION
|
||||
from onyx.configs.model_configs import GEN_AI_API_KEY
|
||||
from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
|
||||
@ -59,8 +61,6 @@ from shared_configs.configs import ALT_INDEX_SUFFIX
|
||||
from shared_configs.configs import MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS
|
||||
from shared_configs.model_server_models import SupportedEmbeddingModel
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -4,21 +4,26 @@ import argparse
|
||||
|
||||
import jinja2
|
||||
|
||||
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
|
||||
from onyx.db.enums import EmbeddingPrecision
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def write_schema(index_name: str, dim: int, template: jinja2.Template) -> None:
|
||||
def write_schema(
|
||||
index_name: str,
|
||||
dim: int,
|
||||
embedding_precision: EmbeddingPrecision,
|
||||
template: jinja2.Template,
|
||||
) -> None:
|
||||
index_filename = index_name + ".sd"
|
||||
|
||||
schema = template.render(
|
||||
multi_tenant=True,
|
||||
schema_name=index_name,
|
||||
dim=dim,
|
||||
embedding_precision=EmbeddingPrecision.FLOAT.value,
|
||||
embedding_precision=embedding_precision.value,
|
||||
)
|
||||
|
||||
with open(index_filename, "w", encoding="utf-8") as f:
|
||||
@ -41,8 +46,13 @@ def main() -> None:
|
||||
|
||||
num_indexes = 0
|
||||
for model in SUPPORTED_EMBEDDING_MODELS:
|
||||
write_schema(model.index_name, model.dim, template)
|
||||
write_schema(model.index_name + "__danswer_alt_index", model.dim, template)
|
||||
write_schema(model.index_name, model.dim, model.embedding_precision, template)
|
||||
write_schema(
|
||||
model.index_name + "__danswer_alt_index",
|
||||
model.dim,
|
||||
model.embedding_precision,
|
||||
template,
|
||||
)
|
||||
num_indexes += 2
|
||||
|
||||
logger.info(f"Wrote {num_indexes} indexes.")
|
||||
|
@ -3,8 +3,6 @@ from typing import Any
|
||||
from typing import List
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from shared_configs.model_server_models import SupportedEmbeddingModel
|
||||
|
||||
# Used for logging
|
||||
SLACK_CHANNEL_ID = "channel_id"
|
||||
|
||||
@ -170,120 +168,6 @@ IGNORED_SYNCING_TENANT_LIST = (
|
||||
else None
|
||||
)
|
||||
|
||||
SUPPORTED_EMBEDDING_MODELS = [
|
||||
# Cloud-based models
|
||||
SupportedEmbeddingModel(
|
||||
name="cohere/embed-english-v3.0",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_cohere_embed_english_v3_0",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="cohere/embed-english-v3.0",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_embed_english_v3_0",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="cohere/embed-english-light-v3.0",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_cohere_embed_english_light_v3_0",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="cohere/embed-english-light-v3.0",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_embed_english_light_v3_0",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="openai/text-embedding-3-large",
|
||||
dim=3072,
|
||||
index_name="danswer_chunk_openai_text_embedding_3_large",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="openai/text-embedding-3-large",
|
||||
dim=3072,
|
||||
index_name="danswer_chunk_text_embedding_3_large",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="openai/text-embedding-3-small",
|
||||
dim=1536,
|
||||
index_name="danswer_chunk_openai_text_embedding_3_small",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="openai/text-embedding-3-small",
|
||||
dim=1536,
|
||||
index_name="danswer_chunk_text_embedding_3_small",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="google/text-embedding-005",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_google_text_embedding_004",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="google/text-embedding-005",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_text_embedding_004",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="google/textembedding-gecko@003",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_google_textembedding_gecko_003",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="google/textembedding-gecko@003",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_textembedding_gecko_003",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="voyage/voyage-large-2-instruct",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_voyage_large_2_instruct",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="voyage/voyage-large-2-instruct",
|
||||
dim=1024,
|
||||
index_name="danswer_chunk_large_2_instruct",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="voyage/voyage-light-2-instruct",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_voyage_light_2_instruct",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="voyage/voyage-light-2-instruct",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_light_2_instruct",
|
||||
),
|
||||
# Self-hosted models
|
||||
SupportedEmbeddingModel(
|
||||
name="nomic-ai/nomic-embed-text-v1",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="nomic-ai/nomic-embed-text-v1",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_nomic_embed_text_v1",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="intfloat/e5-base-v2",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_intfloat_e5_base_v2",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="intfloat/e5-small-v2",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_intfloat_e5_small_v2",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="intfloat/multilingual-e5-base",
|
||||
dim=768,
|
||||
index_name="danswer_chunk_intfloat_multilingual_e5_base",
|
||||
),
|
||||
SupportedEmbeddingModel(
|
||||
name="intfloat/multilingual-e5-small",
|
||||
dim=384,
|
||||
index_name="danswer_chunk_intfloat_multilingual_e5_small",
|
||||
),
|
||||
]
|
||||
# Maximum (least severe) downgrade factor for chunks above the cutoff
|
||||
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = float(
|
||||
os.environ.get("INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX") or 1.0
|
||||
|
@ -78,12 +78,6 @@ class InformationContentClassificationRequests(BaseModel):
|
||||
queries: list[str]
|
||||
|
||||
|
||||
class SupportedEmbeddingModel(BaseModel):
|
||||
name: str
|
||||
dim: int
|
||||
index_name: str
|
||||
|
||||
|
||||
class ContentClassificationPrediction(BaseModel):
|
||||
predicted_label: int
|
||||
content_boost_factor: float
|
||||
|
@ -71,7 +71,7 @@ export default function EmbeddingForm() {
|
||||
disable_rerank_for_streaming: false,
|
||||
api_url: null,
|
||||
num_rerank: 0,
|
||||
embedding_precision: EmbeddingPrecision.FLOAT,
|
||||
embedding_precision: EmbeddingPrecision.BFLOAT16,
|
||||
reduced_dimension: null,
|
||||
});
|
||||
|
||||
|
Reference in New Issue
Block a user