From 119aefba88d9935716b2916287b467379de09fab Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Sun, 18 Aug 2024 19:18:28 -0700
Subject: [PATCH] Add log files to containers (#2164)

---
 ...92fa265c_add_index_attempt_errors_table.py |  4 +-
 backend/danswer/background/update.py          |  1 +
 backend/danswer/main.py                       |  4 +-
 backend/danswer/utils/logger.py               | 46 +++++++++++--------
 backend/shared_configs/configs.py             |  9 +++-
 backend/supervisord.conf                      | 26 +++--------
 6 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
index c49fea219..e4808042a 100644
--- a/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
+++ b/backend/alembic/versions/c5b692fa265c_add_index_attempt_errors_table.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "c5b692fa265c"
 down_revision = "4a951134c801"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None
 
 
 def upgrade() -> None:
diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py
index 1949f7905..e05cfbca6 100755
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
 from shared_configs.configs import LOG_LEVEL
 from shared_configs.configs import MODEL_SERVER_PORT
 
+
 logger = setup_logger()
 
 # If the indexing dies, it's most likely due to resource constraints,
diff --git a/backend/danswer/main.py b/backend/danswer/main.py
index 76789e474..5e67bfd8d 100644
--- a/backend/danswer/main.py
+++ b/backend/danswer/main.py
@@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
     verify_auth()
 
     if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
-        logger.info("Both OAuth Client ID and Secret are configured.")
+        logger.notice("Both OAuth Client ID and Secret are configured.")
 
     if DISABLE_GENERATIVE_AI:
-        logger.info("Generative AI Q&A disabled")
+        logger.notice("Generative AI Q&A disabled")
 
     # fill up Postgres connection pools
     await warm_up_connections()
diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py
index 8938b822d..19fb14ae7 100644
--- a/backend/danswer/utils/logger.py
+++ b/backend/danswer/utils/logger.py
@@ -3,6 +3,8 @@ import os
 from collections.abc import MutableMapping
 from typing import Any
 
+from shared_configs.configs import DEV_LOGGING_ENABLED
+from shared_configs.configs import LOG_FILE_NAME
 from shared_configs.configs import LOG_LEVEL
 
 
@@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
     return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))
 
 
-class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
-    """This is used to globally add the index attempt id to all log messages
-    during indexing by workers. This is done so that the logs can be filtered
-    by index attempt ID to get a better idea of what happened during a specific
-    indexing attempt. If the index attempt ID is not set, then this adapter
-    is a no-op."""
-
+class DanswerLoggingAdapter(logging.LoggerAdapter):
     def process(
         self, msg: str, kwargs: MutableMapping[str, Any]
     ) -> tuple[str, MutableMapping[str, Any]]:
+        # If this is an indexing job, add the attempt ID to the log message
+        # This helps filter the logs for this specific indexing
         attempt_id = IndexAttemptSingleton.get_index_attempt_id()
         if attempt_id is None:
             return msg, kwargs
@@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
         return f"[Attempt ID: {attempt_id}] {msg}", kwargs
 
     def notice(self, msg: str, *args: Any, **kwargs: Any) -> None:
-        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)
+        # Stacklevel is set to 2 to point to the actual caller of notice instead of here
+        self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2)
 
 
 class ColoredFormatter(logging.Formatter):
@@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter:
 def setup_logger(
     name: str = __name__,
     log_level: int = get_log_level_from_str(),
-    logfile_name: str | None = None,
-) -> _IndexAttemptLoggingAdapter:
+) -> DanswerLoggingAdapter:
     logger = logging.getLogger(name)
 
     # If the logger already has handlers, assume it was already configured and return it.
     if logger.handlers:
-        return _IndexAttemptLoggingAdapter(logger)
+        return DanswerLoggingAdapter(logger)
 
     logger.setLevel(log_level)
 
@@ -114,17 +112,27 @@ def setup_logger(
 
     logger.addHandler(handler)
 
-    if logfile_name:
-        is_containerized = os.path.exists("/.dockerenv")
-        file_name_template = (
-            "/var/log/{name}.log" if is_containerized else "./log/{name}.log"
-        )
-        file_handler = logging.FileHandler(file_name_template.format(name=logfile_name))
-        logger.addHandler(file_handler)
+    is_containerized = os.path.exists("/.dockerenv")
+    if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
+        log_levels = ["debug", "info", "notice"]
+        for level in log_levels:
+            file_name = (
+                f"/var/log/{LOG_FILE_NAME}_{level}.log"
+                if is_containerized
+                else f"./log/{LOG_FILE_NAME}_{level}.log"
+            )
+            file_handler = logging.handlers.RotatingFileHandler(
+                file_name,
+                maxBytes=25 * 1024 * 1024,  # 25 MB
+                backupCount=5,  # Keep 5 backup files
+            )
+            file_handler.setLevel(get_log_level_from_str(level))
+            file_handler.setFormatter(formatter)
+            logger.addHandler(file_handler)
 
     logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)  # type: ignore
 
-    return _IndexAttemptLoggingAdapter(logger)
+    return DanswerLoggingAdapter(logger)
 
 
 def setup_uvicorn_logger() -> None:
diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py
index 2311daca5..3e69eae01 100644
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = (
     os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
 )
 
-
 # This controls the minimum number of pytorch "threads" to allocate to the embedding
 # model. If torch finds more threads on its own, this value is not used.
 MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
@@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
 # or intent classification
 INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"
 
-# notset, debug, info, warning, error, or critical
+# The process needs to have this for the log file to write to
+# otherwise, it will not create additional log files
+LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer"
+
+# Enable generating persistent log files for local dev environments
+DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
+# notset, debug, info, notice, warning, error, or critical
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
diff --git a/backend/supervisord.conf b/backend/supervisord.conf
index 6d725cf77..289bd8df8 100644
--- a/backend/supervisord.conf
+++ b/backend/supervisord.conf
@@ -1,15 +1,14 @@
 [supervisord]
 nodaemon=true
+user=root
 logfile=/var/log/supervisord.log
 
 # Indexing is the heaviest job, also requires some CPU intensive steps
 # Cannot place this in Celery for now because Celery must run as a single process (see note below)
 # Indexing uses multi-processing to speed things up
 [program:document_indexing]
-environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
+environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
 command=python danswer/background/update.py
-stdout_logfile=/var/log/update.log
-stdout_logfile_maxbytes=52428800
 redirect_stderr=true
 autorestart=true
 
@@ -26,16 +25,14 @@ autorestart=true
 # Vespa / Postgres)
 [program:celery_worker]
 command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log
-stdout_logfile=/var/log/celery_worker_supervisor.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=celery_worker
 redirect_stderr=true
 autorestart=true
 
 # Job scheduler for periodic tasks
 [program:celery_beat]
 command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log
-stdout_logfile=/var/log/celery_beat_supervisor.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=celery_beat
 redirect_stderr=true
 autorestart=true
 
@@ -43,21 +40,10 @@ autorestart=true
 # for all channels that the DanswerBot has been added to.
 # If not setup, this will just fail 5 times and then stop.
 # More details on setup here: https://docs.danswer.dev/slack_bot_setup
-[program:slack_bot_listener]
+[program:slack_bot]
 command=python danswer/danswerbot/slack/listener.py
-stdout_logfile=/var/log/slack_bot_listener.log
-stdout_logfile_maxbytes=52428800
+environment=LOG_FILE_NAME=slack_bot
 redirect_stderr=true
 autorestart=true
 startretries=5
 startsecs=60
-
-# Pushes all logs from the above programs to stdout
-# No log rotation here, since it's stdout it's handled by the Docker container loglevel
-# To be standard across all the services
-[program:log-redirect-handler]
-command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log
-stdout_logfile=/dev/stdout
-stdout_logfile_maxbytes=0
-redirect_stderr=true
-autorestart=true