This commit is contained in:
Yuhong Sun 2024-07-05 08:25:42 -07:00
parent fa049f4f98
commit 4fb1bb084d
6 changed files with 39 additions and 8 deletions

View File

@ -68,7 +68,7 @@ RUN apt-get update && \
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Pre-downloading models for setups with limited egress # Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')" RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')"
# Pre-downloading NLTK for setups with limited egress # Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \ RUN python -c "import nltk; \

View File

@ -18,6 +18,8 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \
apt-get autoremove -y apt-get autoremove -y
# Pre-downloading models for setups with limited egress # Pre-downloading models for setups with limited egress
ENV HF_HOME=/hf_model_cache
WORKDIR /hf_model_cache
RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \ RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \
from huggingface_hub import snapshot_download; \ from huggingface_hub import snapshot_download; \
AutoTokenizer.from_pretrained('danswer/intent-model'); \ AutoTokenizer.from_pretrained('danswer/intent-model'); \
@ -27,6 +29,10 @@ snapshot_download('danswer/intent-model'); \
snapshot_download('intfloat/e5-base-v2'); \ snapshot_download('intfloat/e5-base-v2'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')" snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')"
RUN python -c "from sentence_transformers import SentenceTransformer; \
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True); \
model.save('nomic-ai/nomic-embed-text-v1.5')"
WORKDIR /app WORKDIR /app
# Utils used by model server # Utils used by model server

View File

@ -12,10 +12,9 @@ import os
# The useable models configured as below must be SentenceTransformer compatible # The useable models configured as below must be SentenceTransformer compatible
# NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING # NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING
# IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI # IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI
DEFAULT_DOCUMENT_ENCODER_MODEL = "intfloat/e5-base-v2" DEFAULT_DOCUMENT_ENCODER_MODEL = "nomic-ai/nomic-embed-text-v1"
DOCUMENT_ENCODER_MODEL = ( DOCUMENT_ENCODER_MODEL = DEFAULT_DOCUMENT_ENCODER_MODEL
os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL
)
# If the below is changed, Vespa deployment must also be changed # If the below is changed, Vespa deployment must also be changed
DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768) DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768)
# Model should be chosen with 512 context size, ideally don't change this # Model should be chosen with 512 context size, ideally don't change this

View File

@ -82,10 +82,10 @@ def embed_text(
max_context_length: int, max_context_length: int,
normalize_embeddings: bool, normalize_embeddings: bool,
) -> list[list[float]]: ) -> list[list[float]]:
model = get_embedding_model( model = SentenceTransformer(
model_name=model_name, max_context_length=max_context_length "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True
) )
embeddings = model.encode(texts, normalize_embeddings=normalize_embeddings) embeddings = model.encode(texts)
if not isinstance(embeddings, list): if not isinstance(embeddings, list):
embeddings = embeddings.tolist() embeddings = embeddings.tolist()

View File

@ -1,3 +1,4 @@
einops==0.8.0
fastapi==0.109.2 fastapi==0.109.2
h5py==3.9.0 h5py==3.9.0
pydantic==1.10.13 pydantic==1.10.13

View File

@ -101,6 +101,8 @@ services:
options: options:
max-size: "50m" max-size: "50m"
max-file: "6" max-file: "6"
networks:
- internal
background: background:
@ -203,6 +205,8 @@ services:
options: options:
max-size: "50m" max-size: "50m"
max-file: "6" max-file: "6"
networks:
- internal
web_server: web_server:
@ -231,6 +235,8 @@ services:
# Enterprise Edition only # Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
networks:
- internal
inference_model_server: inference_model_server:
@ -258,6 +264,8 @@ services:
options: options:
max-size: "50m" max-size: "50m"
max-file: "6" max-file: "6"
networks:
- internal
indexing_model_server: indexing_model_server:
@ -286,6 +294,9 @@ services:
options: options:
max-size: "50m" max-size: "50m"
max-file: "6" max-file: "6"
networks:
- internal
relational_db: relational_db:
image: postgres:15.2-alpine image: postgres:15.2-alpine
@ -297,6 +308,8 @@ services:
- "5432:5432" - "5432:5432"
volumes: volumes:
- db_volume:/var/lib/postgresql/data - db_volume:/var/lib/postgresql/data
networks:
- internal
# This container name cannot have an underscore in it due to Vespa expectations of the URL # This container name cannot have an underscore in it due to Vespa expectations of the URL
@ -313,6 +326,8 @@ services:
options: options:
max-size: "50m" max-size: "50m"
max-file: "6" max-file: "6"
networks:
- internal
nginx: nginx:
@ -343,6 +358,9 @@ services:
command: > command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
networks:
- internal
- local
volumes: volumes:
@ -350,3 +368,10 @@ volumes:
vespa_volume: vespa_volume:
# Created by the container itself # Created by the container itself
model_cache_huggingface: model_cache_huggingface:
networks:
internal:
internal: true
local:
internal: false