danswer/deployment/docker_compose/docker-compose.model-server-test.yml

services:
  indexing_model_server:
    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
    build:
      context: ../../backend
      dockerfile: Dockerfile.model_server
    command: >
      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
        echo 'Skipping service...';
        exit 0;
      else
        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
      fi"
    restart: on-failure
    environment:
      - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}

      # Analytics Configs
      - SENTRY_DSN=${SENTRY_DSN:-}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/root/.cache/huggingface/
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "6"
    ports:
      - "9000:9000"  # <-- Add this line to expose the port to the host

volumes:
  indexing_huggingface_model_cache: