mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-30 01:30:21 +02:00
Feature/postgres connection names (#1998)
* avoid reindexing secondary indexes after they succeed * use postgres application names to facilitate connection debugging * centralize all postgres application_name constants in the constants file * missed a couple of files * mypy fixes * update dev background script
This commit is contained in:
parent
40226678af
commit
7c283b090d
@ -14,6 +14,7 @@ from danswer.background.task_utils import name_cc_cleanup_task
|
||||
from danswer.background.task_utils import name_cc_prune_task
|
||||
from danswer.background.task_utils import name_document_set_sync_task
|
||||
from danswer.configs.app_configs import JOB_TIMEOUT
|
||||
from danswer.configs.constants import POSTGRES_CELERY_APP_NAME
|
||||
from danswer.connectors.factory import instantiate_connector
|
||||
from danswer.connectors.models import InputType
|
||||
from danswer.db.connector_credential_pair import get_connector_credential_pair
|
||||
@ -38,7 +39,9 @@ from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
connection_string = build_connection_string(db_api=SYNC_DB_API)
|
||||
connection_string = build_connection_string(
|
||||
db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME
|
||||
)
|
||||
celery_broker_url = f"sqla+{connection_string}"
|
||||
celery_backend_url = f"db+{connection_string}"
|
||||
celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)
|
||||
|
@ -17,12 +17,14 @@ from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
||||
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
|
||||
from danswer.configs.app_configs import NUM_INDEXING_WORKERS
|
||||
from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS
|
||||
from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME
|
||||
from danswer.db.connector import fetch_connectors
|
||||
from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
||||
from danswer.db.engine import get_db_current_time
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.engine import init_sqlalchemy_engine
|
||||
from danswer.db.index_attempt import create_index_attempt
|
||||
from danswer.db.index_attempt import get_index_attempt
|
||||
from danswer.db.index_attempt import get_inprogress_index_attempts
|
||||
@ -418,6 +420,7 @@ def update_loop(
|
||||
|
||||
def update__main() -> None:
|
||||
set_is_ee_based_on_env_variable()
|
||||
init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)
|
||||
|
||||
logger.info("Starting Indexing Loop")
|
||||
update_loop()
|
||||
|
@ -59,6 +59,14 @@ DISABLED_GEN_AI_MSG = (
|
||||
"You can still use Danswer as a search engine."
|
||||
)
|
||||
|
||||
# Postgres connection constants for application_name
|
||||
POSTGRES_WEB_APP_NAME = "web"
|
||||
POSTGRES_INDEXER_APP_NAME = "indexer"
|
||||
POSTGRES_CELERY_APP_NAME = "celery"
|
||||
POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat"
|
||||
POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker"
|
||||
POSTGRES_PERMISSIONS_APP_NAME = "permissions"
|
||||
POSTGRES_UNKNOWN_APP_NAME = "unknown"
|
||||
|
||||
# API Keys
|
||||
DANSWER_API_KEY_PREFIX = "API_KEY__"
|
||||
|
@ -18,6 +18,7 @@ from danswer.configs.app_configs import POSTGRES_HOST
|
||||
from danswer.configs.app_configs import POSTGRES_PASSWORD
|
||||
from danswer.configs.app_configs import POSTGRES_PORT
|
||||
from danswer.configs.app_configs import POSTGRES_USER
|
||||
from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@ -25,12 +26,18 @@ logger = setup_logger()
|
||||
SYNC_DB_API = "psycopg2"
|
||||
ASYNC_DB_API = "asyncpg"
|
||||
|
||||
POSTGRES_APP_NAME = (
|
||||
POSTGRES_UNKNOWN_APP_NAME # helps to diagnose open connections in postgres
|
||||
)
|
||||
|
||||
# global so we don't create more than one engine per process
|
||||
# outside of being best practice, this is needed so we can properly pool
|
||||
# connections and not create a new pool on every request
|
||||
_SYNC_ENGINE: Engine | None = None
|
||||
_ASYNC_ENGINE: AsyncEngine | None = None
|
||||
|
||||
SessionFactory = None
|
||||
|
||||
|
||||
def get_db_current_time(db_session: Session) -> datetime:
|
||||
"""Get the current time from Postgres representing the start of the transaction
|
||||
@ -51,14 +58,25 @@ def build_connection_string(
|
||||
host: str = POSTGRES_HOST,
|
||||
port: str = POSTGRES_PORT,
|
||||
db: str = POSTGRES_DB,
|
||||
app_name: str | None = None,
|
||||
) -> str:
|
||||
if app_name:
|
||||
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
|
||||
|
||||
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
|
||||
|
||||
|
||||
def init_sqlalchemy_engine(app_name: str) -> None:
|
||||
global POSTGRES_APP_NAME
|
||||
POSTGRES_APP_NAME = app_name
|
||||
|
||||
|
||||
def get_sqlalchemy_engine() -> Engine:
|
||||
global _SYNC_ENGINE
|
||||
if _SYNC_ENGINE is None:
|
||||
connection_string = build_connection_string(db_api=SYNC_DB_API)
|
||||
connection_string = build_connection_string(
|
||||
db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync"
|
||||
)
|
||||
_SYNC_ENGINE = create_engine(connection_string, pool_size=40, max_overflow=10)
|
||||
return _SYNC_ENGINE
|
||||
|
||||
@ -66,9 +84,16 @@ def get_sqlalchemy_engine() -> Engine:
|
||||
def get_sqlalchemy_async_engine() -> AsyncEngine:
|
||||
global _ASYNC_ENGINE
|
||||
if _ASYNC_ENGINE is None:
|
||||
# underlying asyncpg cannot accept application_name directly in the connection string
|
||||
# https://github.com/MagicStack/asyncpg/issues/798
|
||||
connection_string = build_connection_string()
|
||||
_ASYNC_ENGINE = create_async_engine(
|
||||
connection_string, pool_size=40, max_overflow=10
|
||||
connection_string,
|
||||
connect_args={
|
||||
"server_settings": {"application_name": POSTGRES_APP_NAME + "_async"}
|
||||
},
|
||||
pool_size=40,
|
||||
max_overflow=10,
|
||||
)
|
||||
return _ASYNC_ENGINE
|
||||
|
||||
@ -115,4 +140,8 @@ async def warm_up_connections(
|
||||
await async_conn.close()
|
||||
|
||||
|
||||
SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
|
||||
def get_session_factory() -> sessionmaker[Session]:
|
||||
global SessionFactory
|
||||
if SessionFactory is None:
|
||||
SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
|
||||
return SessionFactory
|
||||
|
@ -8,7 +8,7 @@ from typing import cast
|
||||
from filelock import FileLock
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.db.engine import SessionFactory
|
||||
from danswer.db.engine import get_session_factory
|
||||
from danswer.db.models import KVStore
|
||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
from danswer.dynamic_configs.interface import DynamicConfigStore
|
||||
@ -56,7 +56,8 @@ class FileSystemBackedDynamicConfigStore(DynamicConfigStore):
|
||||
class PostgresBackedDynamicConfigStore(DynamicConfigStore):
|
||||
@contextmanager
|
||||
def get_session(self) -> Iterator[Session]:
|
||||
session: Session = SessionFactory()
|
||||
factory = get_session_factory()
|
||||
session: Session = factory()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
|
@ -34,6 +34,7 @@ from danswer.configs.app_configs import USER_AUTH_SECRET
|
||||
from danswer.configs.app_configs import WEB_DOMAIN
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.constants import AuthType
|
||||
from danswer.configs.constants import POSTGRES_WEB_APP_NAME
|
||||
from danswer.db.connector import create_initial_default_connector
|
||||
from danswer.db.connector_credential_pair import associate_default_cc_pair
|
||||
from danswer.db.connector_credential_pair import get_connector_credential_pairs
|
||||
@ -42,6 +43,7 @@ from danswer.db.credentials import create_initial_public_credential
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.engine import init_sqlalchemy_engine
|
||||
from danswer.db.engine import warm_up_connections
|
||||
from danswer.db.index_attempt import cancel_indexing_attempts_past_model
|
||||
from danswer.db.index_attempt import expire_index_attempts
|
||||
@ -154,6 +156,7 @@ def include_router_with_global_prefix_prepended(
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
||||
init_sqlalchemy_engine(POSTGRES_WEB_APP_NAME)
|
||||
engine = get_sqlalchemy_engine()
|
||||
|
||||
verify_auth = fetch_versioned_implementation(
|
||||
|
@ -1,12 +1,18 @@
|
||||
from datetime import timedelta
|
||||
from typing import Any
|
||||
|
||||
from celery.signals import beat_init
|
||||
from celery.signals import worker_init
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.background.celery.celery_app import celery_app
|
||||
from danswer.background.task_utils import build_celery_task_wrapper
|
||||
from danswer.configs.app_configs import JOB_TIMEOUT
|
||||
from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
|
||||
from danswer.configs.constants import POSTGRES_CELERY_WORKER_APP_NAME
|
||||
from danswer.db.chat import delete_chat_sessions_older_than
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.engine import init_sqlalchemy_engine
|
||||
from danswer.server.settings.store import load_settings
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.variable_functionality import global_version
|
||||
@ -95,6 +101,16 @@ def autogenerate_usage_report_task() -> None:
|
||||
)
|
||||
|
||||
|
||||
@beat_init.connect
|
||||
def on_beat_init(sender: Any, **kwargs: Any) -> None:
|
||||
init_sqlalchemy_engine(POSTGRES_CELERY_BEAT_APP_NAME)
|
||||
|
||||
|
||||
@worker_init.connect
|
||||
def on_worker_init(sender: Any, **kwargs: Any) -> None:
|
||||
init_sqlalchemy_engine(POSTGRES_CELERY_WORKER_APP_NAME)
|
||||
|
||||
|
||||
#####
|
||||
# Celery Beat (Periodic Tasks) Settings
|
||||
#####
|
||||
|
@ -14,7 +14,9 @@ from danswer.background.indexing.job_client import SimpleJobClient
|
||||
from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
|
||||
from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import POSTGRES_PERMISSIONS_APP_NAME
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.engine import init_sqlalchemy_engine
|
||||
from danswer.db.models import PermissionSyncStatus
|
||||
from danswer.utils.logger import setup_logger
|
||||
from ee.danswer.configs.app_configs import NUM_PERMISSION_WORKERS
|
||||
@ -214,6 +216,7 @@ def permission_loop(delay: int = 60, num_workers: int = NUM_PERMISSION_WORKERS)
|
||||
|
||||
def update__main() -> None:
|
||||
logger.info("Starting Permission Syncing Loop")
|
||||
init_sqlalchemy_engine(POSTGRES_PERMISSIONS_APP_NAME)
|
||||
permission_loop()
|
||||
|
||||
|
||||
|
@ -24,7 +24,7 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
"ee.danswer.background.celery.celery_app",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=16",
|
||||
"--concurrency=6",
|
||||
"--loglevel=INFO",
|
||||
]
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user