mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-27 10:13:05 +01:00
* Update text embedding model to version 005 and enhance embedding retrieval process * re * Fix formatting issues * Add support for Bedrock reranking provider and AWS credentials handling * fix: improve AWS key format validation and error messages * Fix vertex embedding model crash * feat: add environment template for local development setup * Add display name for Claude 3.7 Sonnet model * Add display names for Gemini 2.0 models and update Claude 3.7 Sonnet entry * Fix ruff errors by ensuring lines are within 130 characters * revert to currently default onyx browser settings * add / fix boto requirements --------- Co-authored-by: ferdinand loesch <f.loesch@sportradar.com> Co-authored-by: Ferdinand Loesch <ferdinandloesch@me.com> Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
from shared_configs.enums import EmbeddingProvider
|
|
from shared_configs.enums import EmbedTextType
|
|
|
|
|
|
MODEL_WARM_UP_STRING = "hi " * 512
|
|
DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
|
|
DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
|
|
DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
|
|
DEFAULT_VERTEX_MODEL = "text-embedding-005"
|
|
|
|
|
|
class EmbeddingModelTextType:
|
|
PROVIDER_TEXT_TYPE_MAP = {
|
|
EmbeddingProvider.COHERE: {
|
|
EmbedTextType.QUERY: "search_query",
|
|
EmbedTextType.PASSAGE: "search_document",
|
|
},
|
|
EmbeddingProvider.VOYAGE: {
|
|
EmbedTextType.QUERY: "query",
|
|
EmbedTextType.PASSAGE: "document",
|
|
},
|
|
EmbeddingProvider.GOOGLE: {
|
|
EmbedTextType.QUERY: "RETRIEVAL_QUERY",
|
|
EmbedTextType.PASSAGE: "RETRIEVAL_DOCUMENT",
|
|
},
|
|
}
|
|
|
|
@staticmethod
|
|
def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str:
|
|
return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type]
|
|
|
|
|
|
class GPUStatus:
|
|
CUDA = "cuda"
|
|
MAC_MPS = "mps"
|
|
NONE = "none"
|