mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Add log files to containers (#2164)
This commit is contained in:
parent
12fccfeffd
commit
119aefba88
@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "c5b692fa265c"
|
||||
down_revision = "4a951134c801"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
|
@ -46,6 +46,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
|
||||
from shared_configs.configs import LOG_LEVEL
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# If the indexing dies, it's most likely due to resource constraints,
|
||||
|
@ -254,10 +254,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
|
||||
verify_auth()
|
||||
|
||||
if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET:
|
||||
logger.info("Both OAuth Client ID and Secret are configured.")
|
||||
logger.notice("Both OAuth Client ID and Secret are configured.")
|
||||
|
||||
if DISABLE_GENERATIVE_AI:
|
||||
logger.info("Generative AI Q&A disabled")
|
||||
logger.notice("Generative AI Q&A disabled")
|
||||
|
||||
# fill up Postgres connection pools
|
||||
await warm_up_connections()
|
||||
|
@ -3,6 +3,8 @@ import os
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any
|
||||
|
||||
from shared_configs.configs import DEV_LOGGING_ENABLED
|
||||
from shared_configs.configs import LOG_FILE_NAME
|
||||
from shared_configs.configs import LOG_LEVEL
|
||||
|
||||
|
||||
@ -39,16 +41,12 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
|
||||
return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))
|
||||
|
||||
|
||||
class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
|
||||
"""This is used to globally add the index attempt id to all log messages
|
||||
during indexing by workers. This is done so that the logs can be filtered
|
||||
by index attempt ID to get a better idea of what happened during a specific
|
||||
indexing attempt. If the index attempt ID is not set, then this adapter
|
||||
is a no-op."""
|
||||
|
||||
class DanswerLoggingAdapter(logging.LoggerAdapter):
|
||||
def process(
|
||||
self, msg: str, kwargs: MutableMapping[str, Any]
|
||||
) -> tuple[str, MutableMapping[str, Any]]:
|
||||
# If this is an indexing job, add the attempt ID to the log message
|
||||
# This helps filter the logs for this specific indexing
|
||||
attempt_id = IndexAttemptSingleton.get_index_attempt_id()
|
||||
if attempt_id is None:
|
||||
return msg, kwargs
|
||||
@ -56,7 +54,8 @@ class _IndexAttemptLoggingAdapter(logging.LoggerAdapter):
|
||||
return f"[Attempt ID: {attempt_id}] {msg}", kwargs
|
||||
|
||||
def notice(self, msg: str, *args: Any, **kwargs: Any) -> None:
|
||||
self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs)
|
||||
# Stacklevel is set to 2 to point to the actual caller of notice instead of here
|
||||
self.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs, stacklevel=2)
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
@ -96,13 +95,12 @@ def get_standard_formatter() -> ColoredFormatter:
|
||||
def setup_logger(
|
||||
name: str = __name__,
|
||||
log_level: int = get_log_level_from_str(),
|
||||
logfile_name: str | None = None,
|
||||
) -> _IndexAttemptLoggingAdapter:
|
||||
) -> DanswerLoggingAdapter:
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
# If the logger already has handlers, assume it was already configured and return it.
|
||||
if logger.handlers:
|
||||
return _IndexAttemptLoggingAdapter(logger)
|
||||
return DanswerLoggingAdapter(logger)
|
||||
|
||||
logger.setLevel(log_level)
|
||||
|
||||
@ -114,17 +112,27 @@ def setup_logger(
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
if logfile_name:
|
||||
is_containerized = os.path.exists("/.dockerenv")
|
||||
file_name_template = (
|
||||
"/var/log/{name}.log" if is_containerized else "./log/{name}.log"
|
||||
)
|
||||
file_handler = logging.FileHandler(file_name_template.format(name=logfile_name))
|
||||
logger.addHandler(file_handler)
|
||||
is_containerized = os.path.exists("/.dockerenv")
|
||||
if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
|
||||
log_levels = ["debug", "info", "notice"]
|
||||
for level in log_levels:
|
||||
file_name = (
|
||||
f"/var/log/{LOG_FILE_NAME}_{level}.log"
|
||||
if is_containerized
|
||||
else f"./log/{LOG_FILE_NAME}_{level}.log"
|
||||
)
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
file_name,
|
||||
maxBytes=25 * 1024 * 1024, # 25 MB
|
||||
backupCount=5, # Keep 5 backup files
|
||||
)
|
||||
file_handler.setLevel(get_log_level_from_str(level))
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore
|
||||
|
||||
return _IndexAttemptLoggingAdapter(logger)
|
||||
return DanswerLoggingAdapter(logger)
|
||||
|
||||
|
||||
def setup_uvicorn_logger() -> None:
|
||||
|
@ -36,7 +36,6 @@ DISABLE_RERANK_FOR_STREAMING = (
|
||||
os.environ.get("DISABLE_RERANK_FOR_STREAMING", "").lower() == "true"
|
||||
)
|
||||
|
||||
|
||||
# This controls the minimum number of pytorch "threads" to allocate to the embedding
|
||||
# model. If torch finds more threads on its own, this value is not used.
|
||||
MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
|
||||
@ -45,5 +44,11 @@ MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
|
||||
# or intent classification
|
||||
INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"
|
||||
|
||||
# notset, debug, info, warning, error, or critical
|
||||
# The process needs to have this for the log file to write to
|
||||
# otherwise, it will not create additional log files
|
||||
LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "danswer"
|
||||
|
||||
# Enable generating persistent log files for local dev environments
|
||||
DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true"
|
||||
# notset, debug, info, notice, warning, error, or critical
|
||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
|
||||
|
@ -1,15 +1,14 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
user=root
|
||||
logfile=/var/log/supervisord.log
|
||||
|
||||
# Indexing is the heaviest job, also requires some CPU intensive steps
|
||||
# Cannot place this in Celery for now because Celery must run as a single process (see note below)
|
||||
# Indexing uses multi-processing to speed things up
|
||||
[program:document_indexing]
|
||||
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
|
||||
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
|
||||
command=python danswer/background/update.py
|
||||
stdout_logfile=/var/log/update.log
|
||||
stdout_logfile_maxbytes=52428800
|
||||
redirect_stderr=true
|
||||
autorestart=true
|
||||
|
||||
@ -26,16 +25,14 @@ autorestart=true
|
||||
# Vespa / Postgres)
|
||||
[program:celery_worker]
|
||||
command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker.log
|
||||
stdout_logfile=/var/log/celery_worker_supervisor.log
|
||||
stdout_logfile_maxbytes=52428800
|
||||
environment=LOG_FILE_NAME=celery_worker
|
||||
redirect_stderr=true
|
||||
autorestart=true
|
||||
|
||||
# Job scheduler for periodic tasks
|
||||
[program:celery_beat]
|
||||
command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat.log
|
||||
stdout_logfile=/var/log/celery_beat_supervisor.log
|
||||
stdout_logfile_maxbytes=52428800
|
||||
environment=LOG_FILE_NAME=celery_beat
|
||||
redirect_stderr=true
|
||||
autorestart=true
|
||||
|
||||
@ -43,21 +40,10 @@ autorestart=true
|
||||
# for all channels that the DanswerBot has been added to.
|
||||
# If not setup, this will just fail 5 times and then stop.
|
||||
# More details on setup here: https://docs.danswer.dev/slack_bot_setup
|
||||
[program:slack_bot_listener]
|
||||
[program:slack_bot]
|
||||
command=python danswer/danswerbot/slack/listener.py
|
||||
stdout_logfile=/var/log/slack_bot_listener.log
|
||||
stdout_logfile_maxbytes=52428800
|
||||
environment=LOG_FILE_NAME=slack_bot
|
||||
redirect_stderr=true
|
||||
autorestart=true
|
||||
startretries=5
|
||||
startsecs=60
|
||||
|
||||
# Pushes all logs from the above programs to stdout
|
||||
# No log rotation here, since it's stdout it's handled by the Docker container loglevel
|
||||
# To be standard across all the services
|
||||
[program:log-redirect-handler]
|
||||
command=tail -qF /var/log/update.log /var/log/celery_worker.log /var/log/celery_worker_supervisor.log /var/log/celery_beat.log /var/log/celery_beat_supervisor.log /var/log/slack_bot_listener.log
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
redirect_stderr=true
|
||||
autorestart=true
|
||||
|
Loading…
x
Reference in New Issue
Block a user