services: api_server: image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile command: > /bin/sh -c "alembic upgrade head && echo \"Starting Onyx Api Server\" && uvicorn onyx.main:app --host 0.0.0.0 --port 8080" depends_on: - relational_db - index - cache - inference_model_server restart: always ports: - "8080:8080" environment: # Auth Settings - AUTH_TYPE=${AUTH_TYPE:-disabled} - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-} - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-} - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} - REQUIRE_EMAIL_VERIFICATION=${REQUIRE_EMAIL_VERIFICATION:-} - SMTP_SERVER=${SMTP_SERVER:-} # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' - SMTP_PORT=${SMTP_PORT:-587} # For sending verification emails, if unspecified then defaults to '587' - SMTP_USER=${SMTP_USER:-} - SMTP_PASS=${SMTP_PASS:-} - EMAIL_FROM=${EMAIL_FROM:-} - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-} - CORS_ALLOWED_ORIGIN=${CORS_ALLOWED_ORIGIN:-} # Gen AI Settings - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} # if set, allows for the use of the token budget system - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-} # Query Options - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - LANGUAGE_HINT=${LANGUAGE_HINT:-} - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} # Other services - POSTGRES_HOST=relational_db - VESPA_HOST=index - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose # Don't change the NLP model configs unless you know what you're doing - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-} - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} - VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-} # We do not recommend changing this value - SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-} # Leave this on pretty please? Nothing sensitive is collected! # https://docs.onyx.app/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging # Log all of Onyx prompts and interactions with the LLM - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-} # If set to `true` will enable additional logs about Vespa query performance # (time spent on finding the right docs + time spent fetching summaries from disk) - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-} - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-} # Enables the use of bedrock models or IAM Auth - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} - AWS_REGION_NAME=${AWS_REGION_NAME:-} - USE_IAM_AUTH=${USE_IAM_AUTH} # Chat Configs - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-} # Enterprise Edition only - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - ONYX_QUERY_HISTORY_TYPE=${ONYX_QUERY_HISTORY_TYPE:-} # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres # volumes: # - ./bundle.pem:/app/bundle.pem:ro extra_hosts: - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" volumes: # optional, only for debugging purposes - api_server_logs:/var/log background: image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile command: /usr/bin/supervisord depends_on: - relational_db - index - cache - inference_model_server - indexing_model_server restart: always environment: - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} # Gen AI Settings (Needed by OnyxBot) - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} - QA_TIMEOUT=${QA_TIMEOUT:-} - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-} - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} # Query Options - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - LANGUAGE_HINT=${LANGUAGE_HINT:-} - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} # Other Services - POSTGRES_HOST=relational_db - POSTGRES_USER=${POSTGRES_USER:-} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-} - POSTGRES_DB=${POSTGRES_DB:-} - VESPA_HOST=index - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors # Don't change the NLP model configs unless you know what you're doing - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} # Needed by OnyxBot - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} # Indexing Configs - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-} - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-} - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-} - DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-} - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-} - EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-} - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-} - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-} - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-} - JIRA_API_VERSION=${JIRA_API_VERSION:-} - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} # Onyx SlackBot Configs - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-} - DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-} - DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-} - DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-} - DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-} - DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-} - DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-} # Logging # Leave this on pretty please? Nothing sensitive is collected! # https://docs.onyx.app/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging # Log all of Onyx prompts and interactions with the LLM - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-} - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} # Celery Configs (defaults are set in the supervisord.conf file. # prefer doing that to have one source of defaults) - CELERY_WORKER_INDEXING_CONCURRENCY=${CELERY_WORKER_INDEXING_CONCURRENCY:-} - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-} - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-} # Enterprise Edition only - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - USE_IAM_AUTH=${USE_IAM_AUTH} - AWS_REGION_NAME=${AWS_REGION_NAME-} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-} # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres # volumes: # - ./bundle.pem:/app/bundle.pem:ro extra_hosts: - "host.docker.internal:host-gateway" # optional, only for debugging purposes volumes: - background_logs:/var/log logging: driver: json-file options: max-size: "50m" max-file: "6" web_server: image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest} build: context: ../../web dockerfile: Dockerfile args: - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-} - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} depends_on: - api_server restart: always environment: - INTERNAL_URL=http://api_server:8080 - WEB_DOMAIN=${WEB_DOMAIN:-} - THEME_IS_DARK=${THEME_IS_DARK:-} # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} inference_model_server: image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest} # for GPU support, please read installation guidelines in the README.md # bare minimum to get this working is to install nvidia-container-toolkit deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] build: context: ../../backend dockerfile: Dockerfile.model_server command: > /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then echo 'Skipping service...'; exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi" restart: on-failure environment: - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} volumes: # Not necessary, this is just to reduce download time during startup - model_cache_huggingface:/root/.cache/huggingface/ # optional, only for debugging purposes - inference_model_server_logs:/var/log logging: driver: json-file options: max-size: "50m" max-file: "6" indexing_model_server: image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile.model_server # for GPU support, please read installation guidelines in the README.md # bare minimum to get this working is to install nvidia-container-toolkit deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] command: > /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then echo 'Skipping service...'; exit 0; else exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; fi" restart: on-failure environment: - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - INDEXING_ONLY=True # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1} volumes: # Not necessary, this is just to reduce download time during startup - indexing_huggingface_model_cache:/root/.cache/huggingface/ # optional, only for debugging purposes - indexing_model_server_logs:/var/log logging: driver: json-file options: max-size: "50m" max-file: "6" relational_db: image: postgres:15.2-alpine command: -c 'max_connections=250' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} ports: - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 restart: always ports: - "19071:19071" - "8081:8081" volumes: - vespa_volume:/opt/vespa/var logging: driver: json-file options: max-size: "50m" max-file: "6" nginx: image: nginx:1.23.4-alpine restart: always # nginx will immediately crash with `nginx: [emerg] host not found in upstream` # if api_server / web_server are not up depends_on: - api_server - web_server environment: - DOMAIN=localhost ports: - "80:80" - "3000:80" # allow for localhost:3000 usage, since that is the norm volumes: - ../data/nginx:/etc/nginx/conf.d logging: driver: json-file options: max-size: "50m" max-file: "6" # The specified script waits for the api_server to start up. # Without this we've seen issues where nginx shows no error logs but # does not recieve any traffic # NOTE: we have to use dos2unix to remove Carriage Return chars from the file # in order to make this work on both Unix-like systems and windows command: > /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" cache: image: redis:7.4-alpine restart: always ports: - "6379:6379" # docker silently mounts /data even without an explicit volume mount, which enables # persistence. explicitly setting save and appendonly forces ephemeral behavior. command: redis-server --save "" --appendonly no volumes: db_volume: vespa_volume: # Created by the container itself model_cache_huggingface: indexing_huggingface_model_cache: # for logs that we don't want to lose on container restarts api_server_logs: background_logs: inference_model_server_logs: indexing_model_server_logs: