Model Server (#695)

Provides the ability to pull out the NLP models into a separate model server which can then be hosted on a GPU instance if desired.
This commit is contained in:
Yuhong Sun
2023-11-06 16:36:09 -08:00
committed by GitHub
parent fe938b6fc6
commit 7433dddac3
20 changed files with 614 additions and 85 deletions

View File

@@ -42,6 +42,8 @@ services:
- SKIP_RERANKING=${SKIP_RERANKING:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
@@ -94,6 +96,8 @@ services:
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
@@ -157,6 +161,25 @@ services:
/bin/sh -c "sleep 10 &&
envsubst '$$\{DOMAIN\}' < /etc/nginx/conf.d/app.conf.template.dev > /etc/nginx/conf.d/app.conf &&
while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\""
# Run with --profile model-server to bring up the danswer-model-server container
model_server:
image: danswer/danswer-model-server:latest
build:
context: ../../backend
dockerfile: Dockerfile.model_server
profiles:
- "model-server"
command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000
restart: always
environment:
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
- model_cache_torch:/root/.cache/torch/
- model_cache_huggingface:/root/.cache/huggingface/
volumes:
local_dynamic_storage:
file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them

View File

@@ -101,6 +101,25 @@ services:
while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\""
env_file:
- .env.nginx
# Run with --profile model-server to bring up the danswer-model-server container
model_server:
image: danswer/danswer-model-server:latest
build:
context: ../../backend
dockerfile: Dockerfile.model_server
profiles:
- "model-server"
command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000
restart: always
environment:
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
- model_cache_torch:/root/.cache/torch/
- model_cache_huggingface:/root/.cache/huggingface/
volumes:
local_dynamic_storage:
file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them

View File

@@ -110,6 +110,25 @@ services:
- ../data/certbot/conf:/etc/letsencrypt
- ../data/certbot/www:/var/www/certbot
entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"
# Run with --profile model-server to bring up the danswer-model-server container
model_server:
image: danswer/danswer-model-server:latest
build:
context: ../../backend
dockerfile: Dockerfile.model_server
profiles:
- "model-server"
command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000
restart: always
environment:
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
- NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes:
- model_cache_torch:/root/.cache/torch/
- model_cache_huggingface:/root/.cache/huggingface/
volumes:
local_dynamic_storage:
file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them