Add log files to containers (#2164)

2025-09-19 12:03:54 +02:00 · 2024-08-18 19:18:28 -07:00
parent 12fccfeffd
commit 119aefba88
6 changed files with 45 additions and 45 deletions
--- a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
+++ b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "c5b692fa265c"
 down_revision = "4a951134c801"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
 from shared_configs.configs import LOG_LEVEL
 from shared_configs.configs import MODEL_SERVER_PORT

+
 logger = setup_logger()

 # If the indexing dies, it's most likely due to resource constraints,
--- a/backend/danswer/main.py
+++ b/backend/danswer/main.py
@@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
    verify_auth()

    if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
-        logger.info("Both OAuth Client ID and Secret are configured.")
+        logger.notice("Both OAuth Client ID and Secret are configured.")

    if DISABLE_GENERATIVE_AI:
-        logger.info("Generative AI Q&A disabled")
+        logger.notice("Generative AI Q&A disabled")

    # fill up Postgres connection pools
    await warm_up_connections()
--- a/backend/danswer/utils/logger.py
+++ b/backend/danswer/utils/logger.py
@@ -3,6 +3,8 @@ import os
 from collections.abc import MutableMapping
 from typing import Any

+from shared_configs.configs import DEV_LOGGING_ENABLED
+from shared_configs.configs import LOG_FILE_NAME
 from shared_configs.configs import LOG_LEVEL


@@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
    return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))


-class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
-    """This is used to globally add the index attempt id to all log messages
-    during indexing by workers. This is done so that the logs can be filtered
-    by index attempt ID to get a better idea of what happened during a specific
-    indexing attempt. If the index attempt ID is not set, then this adapter
-    is a no-op."""
-
+class DanswerLoggingAdapter(logging.LoggerAdapter):
    def process(
        self, msg: str, kwargs: MutableMapping[str, Any]
    ) -> tuple[str, MutableMapping[str, Any]]:
+        # If this is an indexing job, add the attempt ID to the log message
+        # This helps filter the logs for this specific indexing
        attempt_id = IndexAttemptSingleton.get_index_attempt_id()
        if attempt_id is None:
            return msg, kwargs
@@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
        return f"[Attempt ID: {attempt_id}] {msg}", kwargs

    def notice(self, msg: str, *args: Any, **kwargs: Any) -> None:
-        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)
+        # Stacklevel is set to 2 to point to the actual caller of notice instead of here
+        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2)


 class ColoredFormatter(logging.Formatter):
@@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter:
 def setup_logger(
    name: str = __name__,
    log_level: int = get_log_level_from_str(),
-    logfile_name: str | None = None,
-) -> _IndexAttemptLoggingAdapter:
+) -> DanswerLoggingAdapter:
    logger = logging.getLogger(name)

    # If the logger already has handlers, assume it was already configured and return it.
    if logger.handlers:
-        return _IndexAttemptLoggingAdapter(logger)
+        return DanswerLoggingAdapter(logger)

    logger.setLevel(log_level)

@@ -114,17 +112,27 @@ def setup_logger(

    logger.addHandler(handler)

-    if logfile_name:
-        is_containerized = os.path.exists("/.dockerenv")
-        file_name_template = (
-            "/var/log/{name}.log" if is_containerized else "./log/{name}.log"
-        )
-        file_handler = logging.FileHandler(file_name_template.format(name=logfile_name))
-        logger.addHandler(file_handler)
+    is_containerized = os.path.exists("/.dockerenv")
+    if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
+        log_levels = ["debug", "info", "notice"]
+        for level in log_levels:
+            file_name = (
+                f"/var/log/{LOG_FILE_NAME}_{level}.log"
+                if is_containerized
+                else f"./log/{LOG_FILE_NAME}_{level}.log"
+            )
+            file_handler = logging.handlers.RotatingFileHandler(
+                file_name,
+                maxBytes=25 * 1024 * 1024,  # 25 MB
+                backupCount=5,  # Keep 5 backup files
+            )
+            file_handler.setLevel(get_log_level_from_str(level))
+            file_handler.setFormatter(formatter)
+            logger.addHandler(file_handler)

    logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)  # type: ignore

-    return _IndexAttemptLoggingAdapter(logger)
+    return DanswerLoggingAdapter(logger)


 def setup_uvicorn_logger() -> None:
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = (
    os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
 )

-
 # This controls the minimum number of pytorch "threads" to allocate to the embedding
 # model. If torch finds more threads on its own, this value is not used.
 MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
@@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
 # or intent classification
 INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"

-# notset, debug, info, warning, error, or critical
+# The process needs to have this for the log file to write to
+# otherwise, it will not create additional log files
+LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer"
+
+# Enable generating persistent log files for local dev environments
+DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
+# notset, debug, info, notice, warning, error, or critical
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
--- a/backend/supervisord.conf
+++ b/backend/supervisord.conf
@@ -1,15 +1,14 @@
 [supervisord]
 nodaemon=true
+user=root
 logfile=/var/log/supervisord.log

 # Indexing is the heaviest job, also requires some CPU intensive steps
 # Cannot place this in Celery for now because Celery must run as a single process (see note below)
 # Indexing uses multi-processing to speed things up
 [program:document_indexing]
-environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
+environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
 command=python danswer/background/update.py
-stdout_logfile=/var/log/update.log
-stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true

@@ -26,16 +25,14 @@ autorestart=true
 # Vespa / Postgres)
 [program:celery_worker]
 command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log
-stdout_logfile=/var/log/celery_worker_supervisor.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=celery_worker
 redirect_stderr=true
 autorestart=true

 # Job scheduler for periodic tasks
 [program:celery_beat]
 command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log
-stdout_logfile=/var/log/celery_beat_supervisor.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=celery_beat
 redirect_stderr=true
 autorestart=true

@@ -43,21 +40,10 @@ autorestart=true
 # for all channels that the DanswerBot has been added to.
 # If not setup, this will just fail 5 times and then stop.
 # More details on setup here: https://docs.danswer.dev/slack_bot_setup
-[program:slack_bot_listener]
+[program:slack_bot]
 command=python danswer/danswerbot/slack/listener.py
-stdout_logfile=/var/log/slack_bot_listener.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=slack_bot
 redirect_stderr=true
 autorestart=true
 startretries=5
 startsecs=60
-
-# Pushes all logs from the above programs to stdout
-# No log rotation here, since it's stdout it's handled by the Docker container loglevel
-# To be standard across all the services
-[program:log-redirect-handler]
-command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-redirect_stderr=true
-autorestart=true