mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 13:15:18 +02:00
Add basic memory logging (#4234)
* Add basic memory logging * Small tweaks * Switch to monotonic
This commit is contained in:
60
backend/onyx/background/celery/memory_monitoring.py
Normal file
60
backend/onyx/background/celery/memory_monitoring.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# backend/onyx/background/celery/memory_monitoring.py
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
from onyx.utils.logger import setup_logger
|
||||||
|
|
||||||
|
# Regular application logger
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
# Set up a dedicated memory monitoring logger
|
||||||
|
MEMORY_LOG_DIR = "/var/log/persisted-logs/memory"
|
||||||
|
MEMORY_LOG_FILE = os.path.join(MEMORY_LOG_DIR, "memory_usage.log")
|
||||||
|
MEMORY_LOG_MAX_BYTES = 10 * 1024 * 1024 # 10MB
|
||||||
|
MEMORY_LOG_BACKUP_COUNT = 5 # Keep 5 backup files
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(MEMORY_LOG_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Create a dedicated logger for memory monitoring
|
||||||
|
memory_logger = logging.getLogger("memory_monitoring")
|
||||||
|
memory_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# Create a rotating file handler
|
||||||
|
memory_handler = RotatingFileHandler(
|
||||||
|
MEMORY_LOG_FILE, maxBytes=MEMORY_LOG_MAX_BYTES, backupCount=MEMORY_LOG_BACKUP_COUNT
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a formatter that includes all relevant information
|
||||||
|
memory_formatter = logging.Formatter(
|
||||||
|
"%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
|
||||||
|
)
|
||||||
|
memory_handler.setFormatter(memory_formatter)
|
||||||
|
memory_logger.addHandler(memory_handler)
|
||||||
|
|
||||||
|
|
||||||
|
def emit_process_memory(
|
||||||
|
pid: int, process_name: str, additional_metadata: dict[str, str | int]
|
||||||
|
) -> None:
|
||||||
|
try:
|
||||||
|
process = psutil.Process(pid)
|
||||||
|
memory_info = process.memory_info()
|
||||||
|
cpu_percent = process.cpu_percent(interval=0.1)
|
||||||
|
|
||||||
|
# Build metadata string from additional_metadata dictionary
|
||||||
|
metadata_str = " ".join(
|
||||||
|
[f"{key}={value}" for key, value in additional_metadata.items()]
|
||||||
|
)
|
||||||
|
metadata_str = f" {metadata_str}" if metadata_str else ""
|
||||||
|
|
||||||
|
memory_logger.info(
|
||||||
|
f"PROCESS_MEMORY process_name={process_name} pid={pid} "
|
||||||
|
f"rss_mb={memory_info.rss / (1024 * 1024):.2f} "
|
||||||
|
f"vms_mb={memory_info.vms / (1024 * 1024):.2f} "
|
||||||
|
f"cpu={cpu_percent:.2f}{metadata_str}"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error monitoring process memory.")
|
@@ -23,6 +23,7 @@ from sqlalchemy.orm import Session
|
|||||||
|
|
||||||
from onyx.background.celery.apps.app_base import task_logger
|
from onyx.background.celery.apps.app_base import task_logger
|
||||||
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
|
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
|
||||||
|
from onyx.background.celery.memory_monitoring import emit_process_memory
|
||||||
from onyx.background.celery.tasks.indexing.utils import get_unfenced_index_attempt_ids
|
from onyx.background.celery.tasks.indexing.utils import get_unfenced_index_attempt_ids
|
||||||
from onyx.background.celery.tasks.indexing.utils import IndexingCallback
|
from onyx.background.celery.tasks.indexing.utils import IndexingCallback
|
||||||
from onyx.background.celery.tasks.indexing.utils import should_index
|
from onyx.background.celery.tasks.indexing.utils import should_index
|
||||||
@@ -984,6 +985,9 @@ def connector_indexing_proxy_task(
|
|||||||
redis_connector = RedisConnector(tenant_id, cc_pair_id)
|
redis_connector = RedisConnector(tenant_id, cc_pair_id)
|
||||||
redis_connector_index = redis_connector.new_index(search_settings_id)
|
redis_connector_index = redis_connector.new_index(search_settings_id)
|
||||||
|
|
||||||
|
# Track the last time memory info was emitted
|
||||||
|
last_memory_emit_time = 0.0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with get_session_with_current_tenant() as db_session:
|
with get_session_with_current_tenant() as db_session:
|
||||||
index_attempt = get_index_attempt(
|
index_attempt = get_index_attempt(
|
||||||
@@ -1024,6 +1028,23 @@ def connector_indexing_proxy_task(
|
|||||||
job.release()
|
job.release()
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# log the memory usage for tracking down memory leaks / connector-specific memory issues
|
||||||
|
pid = job.process.pid
|
||||||
|
if pid is not None:
|
||||||
|
# Only emit memory info once per minute (60 seconds)
|
||||||
|
current_time = time.monotonic()
|
||||||
|
if current_time - last_memory_emit_time >= 60.0:
|
||||||
|
emit_process_memory(
|
||||||
|
pid,
|
||||||
|
"indexing_worker",
|
||||||
|
{
|
||||||
|
"cc_pair_id": cc_pair_id,
|
||||||
|
"search_settings_id": search_settings_id,
|
||||||
|
"index_attempt_id": index_attempt_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
last_memory_emit_time = current_time
|
||||||
|
|
||||||
# if a termination signal is detected, break (exit point will clean up)
|
# if a termination signal is detected, break (exit point will clean up)
|
||||||
if self.request.id and redis_connector_index.terminating(self.request.id):
|
if self.request.id and redis_connector_index.terminating(self.request.id):
|
||||||
task_logger.warning(
|
task_logger.warning(
|
||||||
|
@@ -108,6 +108,7 @@ command=tail -qF
|
|||||||
/var/log/celery_worker_light.log
|
/var/log/celery_worker_light.log
|
||||||
/var/log/celery_worker_heavy.log
|
/var/log/celery_worker_heavy.log
|
||||||
/var/log/celery_worker_indexing.log
|
/var/log/celery_worker_indexing.log
|
||||||
|
/var/log/celery_worker_monitoring.log
|
||||||
/var/log/slack_bot.log
|
/var/log/slack_bot.log
|
||||||
stdout_logfile=/dev/stdout
|
stdout_logfile=/dev/stdout
|
||||||
stdout_logfile_maxbytes = 0 # must be set to 0 when stdout_logfile=/dev/stdout
|
stdout_logfile_maxbytes = 0 # must be set to 0 when stdout_logfile=/dev/stdout
|
||||||
|
@@ -254,6 +254,9 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
# optional, only for debugging purposes
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -431,3 +434,4 @@ volumes:
|
|||||||
|
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -209,6 +209,9 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
# optional, only for debugging purposes
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -384,3 +387,4 @@ volumes:
|
|||||||
# Created by the container itself
|
# Created by the container itself
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -244,6 +244,8 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -421,3 +423,4 @@ volumes:
|
|||||||
|
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -54,6 +54,9 @@ services:
|
|||||||
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
|
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
# optional, only for debugging purposes
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -233,3 +236,4 @@ volumes:
|
|||||||
# Created by the container itself
|
# Created by the container itself
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -68,6 +68,8 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -229,3 +231,4 @@ volumes:
|
|||||||
# Created by the container itself
|
# Created by the container itself
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -32,6 +32,8 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -73,6 +75,8 @@ services:
|
|||||||
# - ./bundle.pem:/app/bundle.pem:ro
|
# - ./bundle.pem:/app/bundle.pem:ro
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -260,3 +264,4 @@ volumes:
|
|||||||
# Created by the container itself
|
# Created by the container itself
|
||||||
model_cache_huggingface:
|
model_cache_huggingface:
|
||||||
indexing_huggingface_model_cache:
|
indexing_huggingface_model_cache:
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
@@ -62,6 +62,8 @@ services:
|
|||||||
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
|
volumes:
|
||||||
|
- log_store:/var/log/persisted-logs
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -221,3 +223,4 @@ volumes:
|
|||||||
type: none
|
type: none
|
||||||
o: bind
|
o: bind
|
||||||
device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
|
device: ${DANSWER_VESPA_DATA_DIR:-./vespa_data}
|
||||||
|
log_store: # for logs that we don't want to lose on container restarts
|
||||||
|
Reference in New Issue
Block a user