From 4fb1bb084d9ed84f83a0233dc0f773d5f5e7a7ab Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Fri, 5 Jul 2024 08:25:42 -0700 Subject: [PATCH] k --- backend/Dockerfile | 2 +- backend/Dockerfile.model_server | 6 +++++ backend/danswer/configs/model_configs.py | 7 +++--- backend/model_server/encoders.py | 6 ++--- backend/requirements/model_server.txt | 1 + .../docker_compose/docker-compose.dev.yml | 25 +++++++++++++++++++ 6 files changed, 39 insertions(+), 8 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 7f9daad94..8fe82eaa8 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -68,7 +68,7 @@ RUN apt-get update && \ rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key # Pre-downloading models for setups with limited egress -RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')" +RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')" # Pre-downloading NLTK for setups with limited egress RUN python -c "import nltk; \ diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server index 89f24e2ac..58d24f3b4 100644 --- a/backend/Dockerfile.model_server +++ b/backend/Dockerfile.model_server @@ -18,6 +18,8 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \ apt-get autoremove -y # Pre-downloading models for setups with limited egress +ENV HF_HOME=/hf_model_cache +WORKDIR /hf_model_cache RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \ from huggingface_hub import snapshot_download; \ AutoTokenizer.from_pretrained('danswer/intent-model'); \ @@ -27,6 +29,10 @@ snapshot_download('danswer/intent-model'); \ snapshot_download('intfloat/e5-base-v2'); \ snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')" +RUN python -c "from sentence_transformers import SentenceTransformer; \ +model = SentenceTransformer('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True); \ +model.save('nomic-ai/nomic-embed-text-v1.5')" + WORKDIR /app # Utils used by model server diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index f5be89779..78d803cbf 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -12,10 +12,9 @@ import os # The useable models configured as below must be SentenceTransformer compatible # NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING # IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI -DEFAULT_DOCUMENT_ENCODER_MODEL = "intfloat/e5-base-v2" -DOCUMENT_ENCODER_MODEL = ( - os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL -) +DEFAULT_DOCUMENT_ENCODER_MODEL = "nomic-ai/nomic-embed-text-v1" +DOCUMENT_ENCODER_MODEL = DEFAULT_DOCUMENT_ENCODER_MODEL + # If the below is changed, Vespa deployment must also be changed DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768) # Model should be chosen with 512 context size, ideally don't change this diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 705386a8c..50680a292 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -82,10 +82,10 @@ def embed_text( max_context_length: int, normalize_embeddings: bool, ) -> list[list[float]]: - model = get_embedding_model( - model_name=model_name, max_context_length=max_context_length + model = SentenceTransformer( + "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True ) - embeddings = model.encode(texts, normalize_embeddings=normalize_embeddings) + embeddings = model.encode(texts) if not isinstance(embeddings, list): embeddings = embeddings.tolist() diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 4ef8ffa5b..512d05352 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -1,3 +1,4 @@ +einops==0.8.0 fastapi==0.109.2 h5py==3.9.0 pydantic==1.10.13 diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index ea58ded3f..805b28636 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -101,6 +101,8 @@ services: options: max-size: "50m" max-file: "6" + networks: + - internal background: @@ -203,6 +205,8 @@ services: options: max-size: "50m" max-file: "6" + networks: + - internal web_server: @@ -231,6 +235,8 @@ services: # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} + networks: + - internal inference_model_server: @@ -258,6 +264,8 @@ services: options: max-size: "50m" max-file: "6" + networks: + - internal indexing_model_server: @@ -286,6 +294,9 @@ services: options: max-size: "50m" max-file: "6" + networks: + - internal + relational_db: image: postgres:15.2-alpine @@ -297,6 +308,8 @@ services: - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data + networks: + - internal # This container name cannot have an underscore in it due to Vespa expectations of the URL @@ -313,6 +326,8 @@ services: options: max-size: "50m" max-file: "6" + networks: + - internal nginx: @@ -343,6 +358,9 @@ services: command: > /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" + networks: + - internal + - local volumes: @@ -350,3 +368,10 @@ volumes: vespa_volume: # Created by the container itself model_cache_huggingface: + + +networks: + internal: + internal: true + local: + internal: false