mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-22 22:11:03 +02:00
Feature/postgres connection names (#1998)
* avoid reindexing secondary indexes after they succeed * use postgres application names to facilitate connection debugging * centralize all postgres application_name constants in the constants file * missed a couple of files * mypy fixes * update dev background script
This commit is contained in:
parent
40226678af
commit
7c283b090d
@ -14,6 +14,7 @@ from danswer.background.task_utils import name_cc_cleanup_task
|
|||||||
from danswer.background.task_utils import name_cc_prune_task
|
from danswer.background.task_utils import name_cc_prune_task
|
||||||
from danswer.background.task_utils import name_document_set_sync_task
|
from danswer.background.task_utils import name_document_set_sync_task
|
||||||
from danswer.configs.app_configs import JOB_TIMEOUT
|
from danswer.configs.app_configs import JOB_TIMEOUT
|
||||||
|
from danswer.configs.constants import POSTGRES_CELERY_APP_NAME
|
||||||
from danswer.connectors.factory import instantiate_connector
|
from danswer.connectors.factory import instantiate_connector
|
||||||
from danswer.connectors.models import InputType
|
from danswer.connectors.models import InputType
|
||||||
from danswer.db.connector_credential_pair import get_connector_credential_pair
|
from danswer.db.connector_credential_pair import get_connector_credential_pair
|
||||||
@ -38,7 +39,9 @@ from danswer.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
connection_string = build_connection_string(db_api=SYNC_DB_API)
|
connection_string = build_connection_string(
|
||||||
|
db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME
|
||||||
|
)
|
||||||
celery_broker_url = f"sqla+{connection_string}"
|
celery_broker_url = f"sqla+{connection_string}"
|
||||||
celery_backend_url = f"db+{connection_string}"
|
celery_backend_url = f"db+{connection_string}"
|
||||||
celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)
|
celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)
|
||||||
|
@ -17,12 +17,14 @@ from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
|||||||
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
|
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
|
||||||
from danswer.configs.app_configs import NUM_INDEXING_WORKERS
|
from danswer.configs.app_configs import NUM_INDEXING_WORKERS
|
||||||
from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS
|
from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS
|
||||||
|
from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME
|
||||||
from danswer.db.connector import fetch_connectors
|
from danswer.db.connector import fetch_connectors
|
||||||
from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
|
from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
|
||||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||||
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
||||||
from danswer.db.engine import get_db_current_time
|
from danswer.db.engine import get_db_current_time
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
|
from danswer.db.engine import init_sqlalchemy_engine
|
||||||
from danswer.db.index_attempt import create_index_attempt
|
from danswer.db.index_attempt import create_index_attempt
|
||||||
from danswer.db.index_attempt import get_index_attempt
|
from danswer.db.index_attempt import get_index_attempt
|
||||||
from danswer.db.index_attempt import get_inprogress_index_attempts
|
from danswer.db.index_attempt import get_inprogress_index_attempts
|
||||||
@ -418,6 +420,7 @@ def update_loop(
|
|||||||
|
|
||||||
def update__main() -> None:
|
def update__main() -> None:
|
||||||
set_is_ee_based_on_env_variable()
|
set_is_ee_based_on_env_variable()
|
||||||
|
init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)
|
||||||
|
|
||||||
logger.info("Starting Indexing Loop")
|
logger.info("Starting Indexing Loop")
|
||||||
update_loop()
|
update_loop()
|
||||||
|
@ -59,6 +59,14 @@ DISABLED_GEN_AI_MSG = (
|
|||||||
"You can still use Danswer as a search engine."
|
"You can still use Danswer as a search engine."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Postgres connection constants for application_name
|
||||||
|
POSTGRES_WEB_APP_NAME = "web"
|
||||||
|
POSTGRES_INDEXER_APP_NAME = "indexer"
|
||||||
|
POSTGRES_CELERY_APP_NAME = "celery"
|
||||||
|
POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat"
|
||||||
|
POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker"
|
||||||
|
POSTGRES_PERMISSIONS_APP_NAME = "permissions"
|
||||||
|
POSTGRES_UNKNOWN_APP_NAME = "unknown"
|
||||||
|
|
||||||
# API Keys
|
# API Keys
|
||||||
DANSWER_API_KEY_PREFIX = "API_KEY__"
|
DANSWER_API_KEY_PREFIX = "API_KEY__"
|
||||||
|
@ -18,6 +18,7 @@ from danswer.configs.app_configs import POSTGRES_HOST
|
|||||||
from danswer.configs.app_configs import POSTGRES_PASSWORD
|
from danswer.configs.app_configs import POSTGRES_PASSWORD
|
||||||
from danswer.configs.app_configs import POSTGRES_PORT
|
from danswer.configs.app_configs import POSTGRES_PORT
|
||||||
from danswer.configs.app_configs import POSTGRES_USER
|
from danswer.configs.app_configs import POSTGRES_USER
|
||||||
|
from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
@ -25,12 +26,18 @@ logger = setup_logger()
|
|||||||
SYNC_DB_API = "psycopg2"
|
SYNC_DB_API = "psycopg2"
|
||||||
ASYNC_DB_API = "asyncpg"
|
ASYNC_DB_API = "asyncpg"
|
||||||
|
|
||||||
|
POSTGRES_APP_NAME = (
|
||||||
|
POSTGRES_UNKNOWN_APP_NAME # helps to diagnose open connections in postgres
|
||||||
|
)
|
||||||
|
|
||||||
# global so we don't create more than one engine per process
|
# global so we don't create more than one engine per process
|
||||||
# outside of being best practice, this is needed so we can properly pool
|
# outside of being best practice, this is needed so we can properly pool
|
||||||
# connections and not create a new pool on every request
|
# connections and not create a new pool on every request
|
||||||
_SYNC_ENGINE: Engine | None = None
|
_SYNC_ENGINE: Engine | None = None
|
||||||
_ASYNC_ENGINE: AsyncEngine | None = None
|
_ASYNC_ENGINE: AsyncEngine | None = None
|
||||||
|
|
||||||
|
SessionFactory = None
|
||||||
|
|
||||||
|
|
||||||
def get_db_current_time(db_session: Session) -> datetime:
|
def get_db_current_time(db_session: Session) -> datetime:
|
||||||
"""Get the current time from Postgres representing the start of the transaction
|
"""Get the current time from Postgres representing the start of the transaction
|
||||||
@ -51,14 +58,25 @@ def build_connection_string(
|
|||||||
host: str = POSTGRES_HOST,
|
host: str = POSTGRES_HOST,
|
||||||
port: str = POSTGRES_PORT,
|
port: str = POSTGRES_PORT,
|
||||||
db: str = POSTGRES_DB,
|
db: str = POSTGRES_DB,
|
||||||
|
app_name: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
if app_name:
|
||||||
|
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
|
||||||
|
|
||||||
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
|
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"
|
||||||
|
|
||||||
|
|
||||||
|
def init_sqlalchemy_engine(app_name: str) -> None:
|
||||||
|
global POSTGRES_APP_NAME
|
||||||
|
POSTGRES_APP_NAME = app_name
|
||||||
|
|
||||||
|
|
||||||
def get_sqlalchemy_engine() -> Engine:
|
def get_sqlalchemy_engine() -> Engine:
|
||||||
global _SYNC_ENGINE
|
global _SYNC_ENGINE
|
||||||
if _SYNC_ENGINE is None:
|
if _SYNC_ENGINE is None:
|
||||||
connection_string = build_connection_string(db_api=SYNC_DB_API)
|
connection_string = build_connection_string(
|
||||||
|
db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync"
|
||||||
|
)
|
||||||
_SYNC_ENGINE = create_engine(connection_string, pool_size=40, max_overflow=10)
|
_SYNC_ENGINE = create_engine(connection_string, pool_size=40, max_overflow=10)
|
||||||
return _SYNC_ENGINE
|
return _SYNC_ENGINE
|
||||||
|
|
||||||
@ -66,9 +84,16 @@ def get_sqlalchemy_engine() -> Engine:
|
|||||||
def get_sqlalchemy_async_engine() -> AsyncEngine:
|
def get_sqlalchemy_async_engine() -> AsyncEngine:
|
||||||
global _ASYNC_ENGINE
|
global _ASYNC_ENGINE
|
||||||
if _ASYNC_ENGINE is None:
|
if _ASYNC_ENGINE is None:
|
||||||
|
# underlying asyncpg cannot accept application_name directly in the connection string
|
||||||
|
# https://github.com/MagicStack/asyncpg/issues/798
|
||||||
connection_string = build_connection_string()
|
connection_string = build_connection_string()
|
||||||
_ASYNC_ENGINE = create_async_engine(
|
_ASYNC_ENGINE = create_async_engine(
|
||||||
connection_string, pool_size=40, max_overflow=10
|
connection_string,
|
||||||
|
connect_args={
|
||||||
|
"server_settings": {"application_name": POSTGRES_APP_NAME + "_async"}
|
||||||
|
},
|
||||||
|
pool_size=40,
|
||||||
|
max_overflow=10,
|
||||||
)
|
)
|
||||||
return _ASYNC_ENGINE
|
return _ASYNC_ENGINE
|
||||||
|
|
||||||
@ -115,4 +140,8 @@ async def warm_up_connections(
|
|||||||
await async_conn.close()
|
await async_conn.close()
|
||||||
|
|
||||||
|
|
||||||
SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
|
def get_session_factory() -> sessionmaker[Session]:
|
||||||
|
global SessionFactory
|
||||||
|
if SessionFactory is None:
|
||||||
|
SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
|
||||||
|
return SessionFactory
|
||||||
|
@ -8,7 +8,7 @@ from typing import cast
|
|||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from danswer.db.engine import SessionFactory
|
from danswer.db.engine import get_session_factory
|
||||||
from danswer.db.models import KVStore
|
from danswer.db.models import KVStore
|
||||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||||
from danswer.dynamic_configs.interface import DynamicConfigStore
|
from danswer.dynamic_configs.interface import DynamicConfigStore
|
||||||
@ -56,7 +56,8 @@ class FileSystemBackedDynamicConfigStore(DynamicConfigStore):
|
|||||||
class PostgresBackedDynamicConfigStore(DynamicConfigStore):
|
class PostgresBackedDynamicConfigStore(DynamicConfigStore):
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def get_session(self) -> Iterator[Session]:
|
def get_session(self) -> Iterator[Session]:
|
||||||
session: Session = SessionFactory()
|
factory = get_session_factory()
|
||||||
|
session: Session = factory()
|
||||||
try:
|
try:
|
||||||
yield session
|
yield session
|
||||||
finally:
|
finally:
|
||||||
|
@ -34,6 +34,7 @@ from danswer.configs.app_configs import USER_AUTH_SECRET
|
|||||||
from danswer.configs.app_configs import WEB_DOMAIN
|
from danswer.configs.app_configs import WEB_DOMAIN
|
||||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
from danswer.configs.constants import AuthType
|
from danswer.configs.constants import AuthType
|
||||||
|
from danswer.configs.constants import POSTGRES_WEB_APP_NAME
|
||||||
from danswer.db.connector import create_initial_default_connector
|
from danswer.db.connector import create_initial_default_connector
|
||||||
from danswer.db.connector_credential_pair import associate_default_cc_pair
|
from danswer.db.connector_credential_pair import associate_default_cc_pair
|
||||||
from danswer.db.connector_credential_pair import get_connector_credential_pairs
|
from danswer.db.connector_credential_pair import get_connector_credential_pairs
|
||||||
@ -42,6 +43,7 @@ from danswer.db.credentials import create_initial_public_credential
|
|||||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||||
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
from danswer.db.embedding_model import get_secondary_db_embedding_model
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
|
from danswer.db.engine import init_sqlalchemy_engine
|
||||||
from danswer.db.engine import warm_up_connections
|
from danswer.db.engine import warm_up_connections
|
||||||
from danswer.db.index_attempt import cancel_indexing_attempts_past_model
|
from danswer.db.index_attempt import cancel_indexing_attempts_past_model
|
||||||
from danswer.db.index_attempt import expire_index_attempts
|
from danswer.db.index_attempt import expire_index_attempts
|
||||||
@ -154,6 +156,7 @@ def include_router_with_global_prefix_prepended(
|
|||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
||||||
|
init_sqlalchemy_engine(POSTGRES_WEB_APP_NAME)
|
||||||
engine = get_sqlalchemy_engine()
|
engine = get_sqlalchemy_engine()
|
||||||
|
|
||||||
verify_auth = fetch_versioned_implementation(
|
verify_auth = fetch_versioned_implementation(
|
||||||
|
@ -1,12 +1,18 @@
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from celery.signals import beat_init
|
||||||
|
from celery.signals import worker_init
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from danswer.background.celery.celery_app import celery_app
|
from danswer.background.celery.celery_app import celery_app
|
||||||
from danswer.background.task_utils import build_celery_task_wrapper
|
from danswer.background.task_utils import build_celery_task_wrapper
|
||||||
from danswer.configs.app_configs import JOB_TIMEOUT
|
from danswer.configs.app_configs import JOB_TIMEOUT
|
||||||
|
from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
|
||||||
|
from danswer.configs.constants import POSTGRES_CELERY_WORKER_APP_NAME
|
||||||
from danswer.db.chat import delete_chat_sessions_older_than
|
from danswer.db.chat import delete_chat_sessions_older_than
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
|
from danswer.db.engine import init_sqlalchemy_engine
|
||||||
from danswer.server.settings.store import load_settings
|
from danswer.server.settings.store import load_settings
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
from danswer.utils.variable_functionality import global_version
|
from danswer.utils.variable_functionality import global_version
|
||||||
@ -95,6 +101,16 @@ def autogenerate_usage_report_task() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@beat_init.connect
|
||||||
|
def on_beat_init(sender: Any, **kwargs: Any) -> None:
|
||||||
|
init_sqlalchemy_engine(POSTGRES_CELERY_BEAT_APP_NAME)
|
||||||
|
|
||||||
|
|
||||||
|
@worker_init.connect
|
||||||
|
def on_worker_init(sender: Any, **kwargs: Any) -> None:
|
||||||
|
init_sqlalchemy_engine(POSTGRES_CELERY_WORKER_APP_NAME)
|
||||||
|
|
||||||
|
|
||||||
#####
|
#####
|
||||||
# Celery Beat (Periodic Tasks) Settings
|
# Celery Beat (Periodic Tasks) Settings
|
||||||
#####
|
#####
|
||||||
|
@ -14,7 +14,9 @@ from danswer.background.indexing.job_client import SimpleJobClient
|
|||||||
from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
|
from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
|
||||||
from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.configs.constants import POSTGRES_PERMISSIONS_APP_NAME
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
|
from danswer.db.engine import init_sqlalchemy_engine
|
||||||
from danswer.db.models import PermissionSyncStatus
|
from danswer.db.models import PermissionSyncStatus
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
from ee.danswer.configs.app_configs import NUM_PERMISSION_WORKERS
|
from ee.danswer.configs.app_configs import NUM_PERMISSION_WORKERS
|
||||||
@ -214,6 +216,7 @@ def permission_loop(delay: int = 60, num_workers: int = NUM_PERMISSION_WORKERS)
|
|||||||
|
|
||||||
def update__main() -> None:
|
def update__main() -> None:
|
||||||
logger.info("Starting Permission Syncing Loop")
|
logger.info("Starting Permission Syncing Loop")
|
||||||
|
init_sqlalchemy_engine(POSTGRES_PERMISSIONS_APP_NAME)
|
||||||
permission_loop()
|
permission_loop()
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ def run_jobs(exclude_indexing: bool) -> None:
|
|||||||
"ee.danswer.background.celery.celery_app",
|
"ee.danswer.background.celery.celery_app",
|
||||||
"worker",
|
"worker",
|
||||||
"--pool=threads",
|
"--pool=threads",
|
||||||
"--concurrency=16",
|
"--concurrency=6",
|
||||||
"--loglevel=INFO",
|
"--loglevel=INFO",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user