From 119aefba88d9935716b2916287b467379de09fab Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 18 Aug 2024 19:18:28 -0700 Subject: [PATCH] Add log files to containers (#2164) --- ...92fa265c_add_index_attempt_errors_table.py | 4 +- backend/danswer/background/update.py | 1 + backend/danswer/main.py | 4 +- backend/danswer/utils/logger.py | 46 +++++++++++-------- backend/shared_configs/configs.py | 9 +++- backend/supervisord.conf | 26 +++-------- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py index c49fea219..e4808042a 100644 --- a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py +++ b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "c5b692fa265c" down_revision = "4a951134c801" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 1949f7905..e05cfbca6 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST from shared_configs.configs import LOG_LEVEL from shared_configs.configs import MODEL_SERVER_PORT + logger = setup_logger() # If the indexing dies, it's most likely due to resource constraints, diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 76789e474..5e67bfd8d 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: verify_auth() if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET: - logger.info("Both OAuth Client ID and Secret are configured.") + logger.notice("Both OAuth Client ID and Secret are configured.") if DISABLE_GENERATIVE_AI: - logger.info("Generative AI Q&A disabled") + logger.notice("Generative AI Q&A disabled") # fill up Postgres connection pools await warm_up_connections() diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index 8938b822d..19fb14ae7 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -3,6 +3,8 @@ import os from collections.abc import MutableMapping from typing import Any +from shared_configs.configs import DEV_LOGGING_ENABLED +from shared_configs.configs import LOG_FILE_NAME from shared_configs.configs import LOG_LEVEL @@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int: return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE")) -class _IndexAttemptLoggingAdapter(logging.LoggerAdapter): - """This is used to globally add the index attempt id to all log messages - during indexing by workers. This is done so that the logs can be filtered - by index attempt ID to get a better idea of what happened during a specific - indexing attempt. If the index attempt ID is not set, then this adapter - is a no-op.""" - +class DanswerLoggingAdapter(logging.LoggerAdapter): def process( self, msg: str, kwargs: MutableMapping[str, Any] ) -> tuple[str, MutableMapping[str, Any]]: + # If this is an indexing job, add the attempt ID to the log message + # This helps filter the logs for this specific indexing attempt_id = IndexAttemptSingleton.get_index_attempt_id() if attempt_id is None: return msg, kwargs @@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter): return f"[Attempt ID: {attempt_id}] {msg}", kwargs def notice(self, msg: str, *args: Any, **kwargs: Any) -> None: - self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) + # Stacklevel is set to 2 to point to the actual caller of notice instead of here + self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2) class ColoredFormatter(logging.Formatter): @@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter: def setup_logger( name: str = __name__, log_level: int = get_log_level_from_str(), - logfile_name: str | None = None, -) -> _IndexAttemptLoggingAdapter: +) -> DanswerLoggingAdapter: logger = logging.getLogger(name) # If the logger already has handlers, assume it was already configured and return it. if logger.handlers: - return _IndexAttemptLoggingAdapter(logger) + return DanswerLoggingAdapter(logger) logger.setLevel(log_level) @@ -114,17 +112,27 @@ def setup_logger( logger.addHandler(handler) - if logfile_name: - is_containerized = os.path.exists("/.dockerenv") - file_name_template = ( - "/var/log/{name}.log" if is_containerized else "./log/{name}.log" - ) - file_handler = logging.FileHandler(file_name_template.format(name=logfile_name)) - logger.addHandler(file_handler) + is_containerized = os.path.exists("/.dockerenv") + if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED): + log_levels = ["debug", "info", "notice"] + for level in log_levels: + file_name = ( + f"/var/log/{LOG_FILE_NAME}_{level}.log" + if is_containerized + else f"./log/{LOG_FILE_NAME}_{level}.log" + ) + file_handler = logging.handlers.RotatingFileHandler( + file_name, + maxBytes=25 * 1024 * 1024, # 25 MB + backupCount=5, # Keep 5 backup files + ) + file_handler.setLevel(get_log_level_from_str(level)) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore - return _IndexAttemptLoggingAdapter(logger) + return DanswerLoggingAdapter(logger) def setup_uvicorn_logger() -> None: diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index 2311daca5..3e69eae01 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = ( os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true" ) - # This controls the minimum number of pytorch "threads" to allocate to the embedding # model. If torch finds more threads on its own, this value is not used. MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) @@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) # or intent classification INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true" -# notset, debug, info, warning, error, or critical +# The process needs to have this for the log file to write to +# otherwise, it will not create additional log files +LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer" + +# Enable generating persistent log files for local dev environments +DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true" +# notset, debug, info, notice, warning, error, or critical LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") diff --git a/backend/supervisord.conf b/backend/supervisord.conf index 6d725cf77..289bd8df8 100644 --- a/backend/supervisord.conf +++ b/backend/supervisord.conf @@ -1,15 +1,14 @@ [supervisord] nodaemon=true +user=root logfile=/var/log/supervisord.log # Indexing is the heaviest job, also requires some CPU intensive steps # Cannot place this in Celery for now because Celery must run as a single process (see note below) # Indexing uses multi-processing to speed things up [program:document_indexing] -environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true +environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing command=python danswer/background/update.py -stdout_logfile=/var/log/update.log -stdout_logfile_maxbytes=52428800 redirect_stderr=true autorestart=true @@ -26,16 +25,14 @@ autorestart=true # Vespa / Postgres) [program:celery_worker] command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log -stdout_logfile=/var/log/celery_worker_supervisor.log -stdout_logfile_maxbytes=52428800 +environment=LOG_FILE_NAME=celery_worker redirect_stderr=true autorestart=true # Job scheduler for periodic tasks [program:celery_beat] command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log -stdout_logfile=/var/log/celery_beat_supervisor.log -stdout_logfile_maxbytes=52428800 +environment=LOG_FILE_NAME=celery_beat redirect_stderr=true autorestart=true @@ -43,21 +40,10 @@ autorestart=true # for all channels that the DanswerBot has been added to. # If not setup, this will just fail 5 times and then stop. # More details on setup here: https://docs.danswer.dev/slack_bot_setup -[program:slack_bot_listener] +[program:slack_bot] command=python danswer/danswerbot/slack/listener.py -stdout_logfile=/var/log/slack_bot_listener.log -stdout_logfile_maxbytes=52428800 +environment=LOG_FILE_NAME=slack_bot redirect_stderr=true autorestart=true startretries=5 startsecs=60 - -# Pushes all logs from the above programs to stdout -# No log rotation here, since it's stdout it's handled by the Docker container loglevel -# To be standard across all the services -[program:log-redirect-handler] -command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -redirect_stderr=true -autorestart=true