danswer/backend/Dockerfile.model_server

FROM python:3.11.7-slim-bookworm

LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
AI models for Onyx. This container and all the code is MIT Licensed and free for all to use. \
You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
visit https://github.com/onyx-dot-app/onyx."

# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION} \
    DANSWER_RUNNING_IN_DOCKER="true"


RUN echo "ONYX_VERSION: ${ONYX_VERSION}"

COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN pip install --no-cache-dir --upgrade \
        --retries 5 \
        --timeout 30 \
        -r /tmp/requirements.txt

RUN apt-get remove -y --allow-remove-essential perl-base && \
    apt-get autoremove -y

# Pre-downloading models for setups with limited egress
# Download tokenizers, distilbert for the Onyx model
# Download model weights
# Run Nomic to pull in the custom architecture and have it cached locally
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"

# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
# running Onyx, don't overwrite it with the built in cache folder
RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface

WORKDIR /app

# Utils used by model server
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py

# Place to fetch version information
COPY ./onyx/__init__.py /app/onyx/__init__.py

# Shared between Onyx Backend and Model Server
COPY ./shared_configs /app/shared_configs

# Model Server main code
COPY ./model_server /app/model_server

ENV PYTHONPATH=/app

CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]