From 3755e575a57b9294bbfbd3f06e84a24f09743d5a Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Fri, 4 Oct 2024 09:00:48 -0700 Subject: [PATCH] harden connections to redis (#2677) * set broker_connection_retry_on_startup to silence deprecation warning (we're OK with retrying on startup) * env var for CELERY_BROKER_POOL_LIMIT * add redis retry on timeout and health check interval * set socket_keepalive = True * remove shadow declaration of REDIS_HEALTH_CHECK_INTERVAL, add socket_keepalive_options where possible * fix mypy complaint * pass through vars in docker compose * remove extra '=' * wrap in a try --- .../danswer/background/celery/celeryconfig.py | 21 +++++++++++++++++++ backend/danswer/configs/app_configs.py | 16 ++++++++++++++ backend/danswer/configs/constants.py | 12 +++++++++++ backend/danswer/redis/redis_pool.py | 11 ++++++++-- .../docker_compose/docker-compose.dev.yml | 1 + .../docker_compose/docker-compose.gpu-dev.yml | 3 ++- 6 files changed, 61 insertions(+), 3 deletions(-) diff --git a/backend/danswer/background/celery/celeryconfig.py b/backend/danswer/background/celery/celeryconfig.py index d0314adf8..1b1aa092d 100644 --- a/backend/danswer/background/celery/celeryconfig.py +++ b/backend/danswer/background/celery/celeryconfig.py @@ -1,7 +1,9 @@ # docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html +from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT from danswer.configs.app_configs import CELERY_RESULT_EXPIRES from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND +from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL from danswer.configs.app_configs import REDIS_HOST from danswer.configs.app_configs import REDIS_PASSWORD from danswer.configs.app_configs import REDIS_PORT @@ -9,6 +11,7 @@ from danswer.configs.app_configs import REDIS_SSL from danswer.configs.app_configs import REDIS_SSL_CA_CERTS from danswer.configs.app_configs import REDIS_SSL_CERT_REQS from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS CELERY_SEPARATOR = ":" @@ -36,12 +39,30 @@ result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PO # can stall other tasks. worker_prefetch_multiplier = 4 +broker_connection_retry_on_startup = True +broker_pool_limit = CELERY_BROKER_POOL_LIMIT + +# redis broker settings +# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html broker_transport_options = { "priority_steps": list(range(len(DanswerCeleryPriority))), "sep": CELERY_SEPARATOR, "queue_order_strategy": "priority", + "retry_on_timeout": True, + "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL, + "socket_keepalive": True, + "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS, } +# redis backend settings +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings + +# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend +redis_socket_keepalive = True +redis_retry_on_timeout = True +redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL + + task_default_priority = DanswerCeleryPriority.MEDIUM task_acks_late = True diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 460e15bd1..6096561b5 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -164,6 +164,12 @@ REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int( ) REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15)) # broker +# will propagate to both our redis client as well as celery's redis client +REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60)) + +# our redis client only, not celery's +REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128)) + # https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings # should be one of "required", "optional", or "none" REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none") @@ -171,6 +177,16 @@ REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None) CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400)) # seconds +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit +# Setting to None may help when there is a proxy in the way closing idle connections +CELERY_BROKER_POOL_LIMIT_DEFAULT = 10 +try: + CELERY_BROKER_POOL_LIMIT = int( + os.environ.get("CELERY_BROKER_POOL_LIMIT", CELERY_BROKER_POOL_LIMIT_DEFAULT) + ) +except ValueError: + CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT + ##### # Connector Configs ##### diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index a2b0f752f..d8470d171 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -1,3 +1,5 @@ +import platform +import socket from enum import auto from enum import Enum @@ -204,3 +206,13 @@ class DanswerCeleryPriority(int, Enum): MEDIUM = auto() LOW = auto() LOWEST = auto() + + +REDIS_SOCKET_KEEPALIVE_OPTIONS = {} +REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15 +REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3 + +if platform.system() == "Darwin": + REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60 # type: ignore +else: + REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60 # type: ignore diff --git a/backend/danswer/redis/redis_pool.py b/backend/danswer/redis/redis_pool.py index 233a51a84..fd08b9157 100644 --- a/backend/danswer/redis/redis_pool.py +++ b/backend/danswer/redis/redis_pool.py @@ -5,14 +5,15 @@ import redis from redis.client import Redis from danswer.configs.app_configs import REDIS_DB_NUMBER +from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL from danswer.configs.app_configs import REDIS_HOST from danswer.configs.app_configs import REDIS_PASSWORD +from danswer.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS from danswer.configs.app_configs import REDIS_PORT from danswer.configs.app_configs import REDIS_SSL from danswer.configs.app_configs import REDIS_SSL_CA_CERTS from danswer.configs.app_configs import REDIS_SSL_CERT_REQS - -REDIS_POOL_MAX_CONNECTIONS = 128 +from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS class RedisPool: @@ -59,6 +60,9 @@ class RedisPool: password=password, max_connections=max_connections, timeout=None, + health_check_interval=REDIS_HEALTH_CHECK_INTERVAL, + socket_keepalive=True, + socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS, connection_class=redis.SSLConnection, ssl_ca_certs=ssl_ca_certs, ssl_cert_reqs=ssl_cert_reqs, @@ -71,6 +75,9 @@ class RedisPool: password=password, max_connections=max_connections, timeout=None, + health_check_interval=REDIS_HEALTH_CHECK_INTERVAL, + socket_keepalive=True, + socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS, ) diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 639db923a..86d988e7d 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -87,6 +87,7 @@ services: - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-} - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-} - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-} + - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-} # Chat Configs - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-} diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 9a1d62c03..ebce01ead 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -80,7 +80,8 @@ services: # If set to `true` will enable additional logs about Vespa query performance # (time spent on finding the right docs + time spent fetching summaries from disk) - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} - + - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-} + # Chat Configs - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}