Add log files to containers (#2164)

2025-09-20 13:05:49 +02:00 · 2024-08-18 19:18:28 -07:00
parent 12fccfeffd
commit 119aefba88
6 changed files with 45 additions and 45 deletions
--- a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
+++ b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "c5b692fa265c"
 down_revision = "4a951134c801"
-branch_labels = None
+branch_labels: None = None
-depends_on = None
+depends_on: None = None
 def upgrade() -> None:
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
 from shared_configs.configs import LOG_LEVEL
 from shared_configs.configs import MODEL_SERVER_PORT
 logger = setup_logger()
 # If the indexing dies, it's most likely due to resource constraints,
--- a/backend/danswer/main.py
+++ b/backend/danswer/main.py
@@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
    verify_auth()
    if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
-        logger.info("Both OAuth Client ID and Secret are configured.")
+        logger.notice("Both OAuth Client ID and Secret are configured.")
    if DISABLE_GENERATIVE_AI:
-        logger.info("Generative AI Q&A disabled")
+        logger.notice("Generative AI Q&A disabled")
    # fill up Postgres connection pools
    await warm_up_connections()
--- a/backend/danswer/utils/logger.py
+++ b/backend/danswer/utils/logger.py
@@ -3,6 +3,8 @@ import os
 from collections.abc import MutableMapping
 from typing import Any
 from shared_configs.configs import DEV_LOGGING_ENABLED
 from shared_configs.configs import LOG_FILE_NAME
 from shared_configs.configs import LOG_LEVEL
@@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
    return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))
-class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
+class DanswerLoggingAdapter(logging.LoggerAdapter):
    """This is used to globally add the index attempt id to all log messages
    during indexing by workers. This is done so that the logs can be filtered
    by index attempt ID to get a better idea of what happened during a specific
    indexing attempt. If the index attempt ID is not set, then this adapter
    is a no-op."""
    def process(
        self, msg: str, kwargs: MutableMapping[str, Any]
    ) -> tuple[str, MutableMapping[str, Any]]:
        # If this is an indexing job, add the attempt ID to the log message
        # This helps filter the logs for this specific indexing
        attempt_id = IndexAttemptSingleton.get_index_attempt_id()
        if attempt_id is None:
            return msg, kwargs
@@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
        return f"[Attempt ID: {attempt_id}] {msg}", kwargs
    def notice(self, msg: str, *args: Any, **kwargs: Any) -> None:
-        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)
+        # Stacklevel is set to 2 to point to the actual caller of notice instead of here
        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2)
 class ColoredFormatter(logging.Formatter):
@@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter:
 def setup_logger(
    name: str = __name__,
    log_level: int = get_log_level_from_str(),
-    logfile_name: str | None = None,
+) -> DanswerLoggingAdapter:
 ) -> _IndexAttemptLoggingAdapter:
    logger = logging.getLogger(name)
    # If the logger already has handlers, assume it was already configured and return it.
    if logger.handlers:
-        return _IndexAttemptLoggingAdapter(logger)
+        return DanswerLoggingAdapter(logger)
    logger.setLevel(log_level)
@@ -114,17 +112,27 @@ def setup_logger(
    logger.addHandler(handler)
-    if logfile_name:
+    is_containerized = os.path.exists("/.dockerenv")
-        is_containerized = os.path.exists("/.dockerenv")
+    if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
-        file_name_template = (
+        log_levels = ["debug", "info", "notice"]
-            "/var/log/{name}.log" if is_containerized else "./log/{name}.log"
+        for level in log_levels:
-        )
+            file_name = (
-        file_handler = logging.FileHandler(file_name_template.format(name=logfile_name))
+                f"/var/log/{LOG_FILE_NAME}_{level}.log"
-        logger.addHandler(file_handler)
+                if is_containerized
                else f"./log/{LOG_FILE_NAME}_{level}.log"
            )
            file_handler = logging.handlers.RotatingFileHandler(
                file_name,
                maxBytes=25 * 1024 * 1024,  # 25 MB
                backupCount=5,  # Keep 5 backup files
            )
            file_handler.setLevel(get_log_level_from_str(level))
            file_handler.setFormatter(formatter)
            logger.addHandler(file_handler)
    logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)  # type: ignore
-    return _IndexAttemptLoggingAdapter(logger)
+    return DanswerLoggingAdapter(logger)
 def setup_uvicorn_logger() -> None:
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = (
    os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
 )
 # This controls the minimum number of pytorch "threads" to allocate to the embedding
 # model. If torch finds more threads on its own, this value is not used.
 MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
@@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
 # or intent classification
 INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"
-# notset, debug, info, warning, error, or critical
+# The process needs to have this for the log file to write to
 # otherwise, it will not create additional log files
 LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer"
 # Enable generating persistent log files for local dev environments
 DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
 # notset, debug, info, notice, warning, error, or critical
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
--- a/backend/supervisord.conf
+++ b/backend/supervisord.conf
@@ -1,15 +1,14 @@
 [supervisord]
 nodaemon=true
 user=root
 logfile=/var/log/supervisord.log
 # Indexing is the heaviest job, also requires some CPU intensive steps
 # Cannot place this in Celery for now because Celery must run as a single process (see note below)
 # Indexing uses multi-processing to speed things up
 [program:document_indexing]
-environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
+environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
 command=python danswer/background/update.py
 stdout_logfile=/var/log/update.log
 stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true
@@ -26,16 +25,14 @@ autorestart=true
 # Vespa / Postgres)
 [program:celery_worker]
 command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log
-stdout_logfile=/var/log/celery_worker_supervisor.log
+environment=LOG_FILE_NAME=celery_worker
 stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true
 # Job scheduler for periodic tasks
 [program:celery_beat]
 command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log
-stdout_logfile=/var/log/celery_beat_supervisor.log
+environment=LOG_FILE_NAME=celery_beat
 stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true
@@ -43,21 +40,10 @@ autorestart=true
 # for all channels that the DanswerBot has been added to.
 # If not setup, this will just fail 5 times and then stop.
 # More details on setup here: https://docs.danswer.dev/slack_bot_setup
-[program:slack_bot_listener]
+[program:slack_bot]
 command=python danswer/danswerbot/slack/listener.py
-stdout_logfile=/var/log/slack_bot_listener.log
+environment=LOG_FILE_NAME=slack_bot
 stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true
 startretries=5
 startsecs=60
 # Pushes all logs from the above programs to stdout
 # No log rotation here, since it's stdout it's handled by the Docker container loglevel
 # To be standard across all the services
 [program:log-redirect-handler]
 command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 redirect_stderr=true
 autorestart=true