Add log files to containers (#2164)

This commit is contained in:
Yuhong Sun
2024-08-18 19:18:28 -07:00
committed by GitHub
parent 12fccfeffd
commit 119aefba88
6 changed files with 45 additions and 45 deletions

View File

@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = "c5b692fa265c" revision = "c5b692fa265c"
down_revision = "4a951134c801" down_revision = "4a951134c801"
branch_labels = None branch_labels: None = None
depends_on = None depends_on: None = None
def upgrade() -> None: def upgrade() -> None:

View File

@@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import LOG_LEVEL from shared_configs.configs import LOG_LEVEL
from shared_configs.configs import MODEL_SERVER_PORT from shared_configs.configs import MODEL_SERVER_PORT
logger = setup_logger() logger = setup_logger()
# If the indexing dies, it's most likely due to resource constraints, # If the indexing dies, it's most likely due to resource constraints,

View File

@@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
verify_auth() verify_auth()
if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET: if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
logger.info("Both OAuth Client ID and Secret are configured.") logger.notice("Both OAuth Client ID and Secret are configured.")
if DISABLE_GENERATIVE_AI: if DISABLE_GENERATIVE_AI:
logger.info("Generative AI Q&A disabled") logger.notice("Generative AI Q&A disabled")
# fill up Postgres connection pools # fill up Postgres connection pools
await warm_up_connections() await warm_up_connections()

View File

@@ -3,6 +3,8 @@ import os
from collections.abc import MutableMapping from collections.abc import MutableMapping
from typing import Any from typing import Any
from shared_configs.configs import DEV_LOGGING_ENABLED
from shared_configs.configs import LOG_FILE_NAME
from shared_configs.configs import LOG_LEVEL from shared_configs.configs import LOG_LEVEL
@@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE")) return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))
class _IndexAttemptLoggingAdapter(logging.LoggerAdapter): class DanswerLoggingAdapter(logging.LoggerAdapter):
"""This is used to globally add the index attempt id to all log messages
during indexing by workers. This is done so that the logs can be filtered
by index attempt ID to get a better idea of what happened during a specific
indexing attempt. If the index attempt ID is not set, then this adapter
is a no-op."""
def process( def process(
self, msg: str, kwargs: MutableMapping[str, Any] self, msg: str, kwargs: MutableMapping[str, Any]
) -> tuple[str, MutableMapping[str, Any]]: ) -> tuple[str, MutableMapping[str, Any]]:
# If this is an indexing job, add the attempt ID to the log message
# This helps filter the logs for this specific indexing
attempt_id = IndexAttemptSingleton.get_index_attempt_id() attempt_id = IndexAttemptSingleton.get_index_attempt_id()
if attempt_id is None: if attempt_id is None:
return msg, kwargs return msg, kwargs
@@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
return f"[Attempt ID: {attempt_id}] {msg}", kwargs return f"[Attempt ID: {attempt_id}] {msg}", kwargs
def notice(self, msg: str, *args: Any, **kwargs: Any) -> None: def notice(self, msg: str, *args: Any, **kwargs: Any) -> None:
self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # Stacklevel is set to 2 to point to the actual caller of notice instead of here
self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2)
class ColoredFormatter(logging.Formatter): class ColoredFormatter(logging.Formatter):
@@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter:
def setup_logger( def setup_logger(
name: str = __name__, name: str = __name__,
log_level: int = get_log_level_from_str(), log_level: int = get_log_level_from_str(),
logfile_name: str | None = None, ) -> DanswerLoggingAdapter:
) -> _IndexAttemptLoggingAdapter:
logger = logging.getLogger(name) logger = logging.getLogger(name)
# If the logger already has handlers, assume it was already configured and return it. # If the logger already has handlers, assume it was already configured and return it.
if logger.handlers: if logger.handlers:
return _IndexAttemptLoggingAdapter(logger) return DanswerLoggingAdapter(logger)
logger.setLevel(log_level) logger.setLevel(log_level)
@@ -114,17 +112,27 @@ def setup_logger(
logger.addHandler(handler) logger.addHandler(handler)
if logfile_name: is_containerized = os.path.exists("/.dockerenv")
is_containerized = os.path.exists("/.dockerenv") if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
file_name_template = ( log_levels = ["debug", "info", "notice"]
"/var/log/{name}.log" if is_containerized else "./log/{name}.log" for level in log_levels:
) file_name = (
file_handler = logging.FileHandler(file_name_template.format(name=logfile_name)) f"/var/log/{LOG_FILE_NAME}_{level}.log"
logger.addHandler(file_handler) if is_containerized
else f"./log/{LOG_FILE_NAME}_{level}.log"
)
file_handler = logging.handlers.RotatingFileHandler(
file_name,
maxBytes=25 * 1024 * 1024, # 25 MB
backupCount=5, # Keep 5 backup files
)
file_handler.setLevel(get_log_level_from_str(level))
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore
return _IndexAttemptLoggingAdapter(logger) return DanswerLoggingAdapter(logger)
def setup_uvicorn_logger() -> None: def setup_uvicorn_logger() -> None:

View File

@@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = (
os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true" os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
) )
# This controls the minimum number of pytorch "threads" to allocate to the embedding # This controls the minimum number of pytorch "threads" to allocate to the embedding
# model. If torch finds more threads on its own, this value is not used. # model. If torch finds more threads on its own, this value is not used.
MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
@@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
# or intent classification # or intent classification
INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true" INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"
# notset, debug, info, warning, error, or critical # The process needs to have this for the log file to write to
# otherwise, it will not create additional log files
LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer"
# Enable generating persistent log files for local dev environments
DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
# notset, debug, info, notice, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")

View File

@@ -1,15 +1,14 @@
[supervisord] [supervisord]
nodaemon=true nodaemon=true
user=root
logfile=/var/log/supervisord.log logfile=/var/log/supervisord.log
# Indexing is the heaviest job, also requires some CPU intensive steps # Indexing is the heaviest job, also requires some CPU intensive steps
# Cannot place this in Celery for now because Celery must run as a single process (see note below) # Cannot place this in Celery for now because Celery must run as a single process (see note below)
# Indexing uses multi-processing to speed things up # Indexing uses multi-processing to speed things up
[program:document_indexing] [program:document_indexing]
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
command=python danswer/background/update.py command=python danswer/background/update.py
stdout_logfile=/var/log/update.log
stdout_logfile_maxbytes=52428800
redirect_stderr=true redirect_stderr=true
autorestart=true autorestart=true
@@ -26,16 +25,14 @@ autorestart=true
# Vespa / Postgres) # Vespa / Postgres)
[program:celery_worker] [program:celery_worker]
command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log
stdout_logfile=/var/log/celery_worker_supervisor.log environment=LOG_FILE_NAME=celery_worker
stdout_logfile_maxbytes=52428800
redirect_stderr=true redirect_stderr=true
autorestart=true autorestart=true
# Job scheduler for periodic tasks # Job scheduler for periodic tasks
[program:celery_beat] [program:celery_beat]
command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log
stdout_logfile=/var/log/celery_beat_supervisor.log environment=LOG_FILE_NAME=celery_beat
stdout_logfile_maxbytes=52428800
redirect_stderr=true redirect_stderr=true
autorestart=true autorestart=true
@@ -43,21 +40,10 @@ autorestart=true
# for all channels that the DanswerBot has been added to. # for all channels that the DanswerBot has been added to.
# If not setup, this will just fail 5 times and then stop. # If not setup, this will just fail 5 times and then stop.
# More details on setup here: https://docs.danswer.dev/slack_bot_setup # More details on setup here: https://docs.danswer.dev/slack_bot_setup
[program:slack_bot_listener] [program:slack_bot]
command=python danswer/danswerbot/slack/listener.py command=python danswer/danswerbot/slack/listener.py
stdout_logfile=/var/log/slack_bot_listener.log environment=LOG_FILE_NAME=slack_bot
stdout_logfile_maxbytes=52428800
redirect_stderr=true redirect_stderr=true
autorestart=true autorestart=true
startretries=5 startretries=5
startsecs=60 startsecs=60
# Pushes all logs from the above programs to stdout
# No log rotation here, since it's stdout it's handled by the Docker container loglevel
# To be standard across all the services
[program:log-redirect-handler]
command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
redirect_stderr=true
autorestart=true