Add tenant context (#2596)

* add proper tenant context to background tasks * update for new session logic * remove unnecessary functions * add additional tenant context * update ports * proper format / directory structure * update ports * ensure tenant context properly passed to ee bg tasks * add user provisioning * nit * validated for multi tenant * auth * nit * nit * nit * nit * validate pruning * evaluate integration tests * at long last, validated celery beat * nit: minor edge case patched * minor * validate update * nit
2025-08-03 21:52:29 +02:00 · 2024-10-10 09:34:32 -07:00
parent 9be54a2b4c
commit f40c5ca9bd
52 changed files with 1319 additions and 389 deletions
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -1,21 +1,22 @@
+from typing import Any
 import asyncio
 from logging.config import fileConfig

 from alembic import context
-from danswer.db.engine import build_connection_string
-from danswer.db.models import Base
 from sqlalchemy import pool
 from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import create_async_engine
-from celery.backends.database.session import ResultModelBase  # type: ignore
-from sqlalchemy.schema import SchemaItem
 from sqlalchemy.sql import text

+from danswer.configs.app_configs import MULTI_TENANT
+from danswer.db.engine import build_connection_string
+from danswer.db.models import Base
+from celery.backends.database.session import ResultModelBase  # type: ignore
+
 # Alembic Config object
 config = context.config

 # Interpret the config file for Python logging.
-# This line sets up loggers basically.
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
@@ -35,8 +36,7 @@ def get_schema_options() -> tuple[str, bool]:
        for pair in arg.split(","):
            if "=" in pair:
                key, value = pair.split("=", 1)
-                x_args[key] = value
-
+                x_args[key.strip()] = value.strip()
    schema_name = x_args.get("schema", "public")
    create_schema = x_args.get("create_schema", "true").lower() == "true"
    return schema_name, create_schema
@@ -46,11 +46,7 @@ EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}


 def include_object(
-    object: SchemaItem,
-    name: str,
-    type_: str,
-    reflected: bool,
-    compare_to: SchemaItem | None,
+    object: Any, name: str, type_: str, reflected: bool, compare_to: Any
 ) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
@@ -59,7 +55,6 @@ def include_object(

 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.
-
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
@@ -67,17 +62,18 @@ def run_migrations_offline() -> None:
    Calls to context.execute() here emit the given string to the
    script output.
    """
+    schema_name, _ = get_schema_options()
    url = build_connection_string()
-    schema, _ = get_schema_options()

    context.configure(
        url=url,
        target_metadata=target_metadata,  # type: ignore
        literal_binds=True,
        include_object=include_object,
-        dialect_opts={"paramstyle": "named"},
-        version_table_schema=schema,
+        version_table_schema=schema_name,
        include_schemas=True,
+        script_location=config.get_main_option("script_location"),
+        dialect_opts={"paramstyle": "named"},
    )

    with context.begin_transaction():
@@ -85,20 +81,30 @@ def run_migrations_offline() -> None:


 def do_run_migrations(connection: Connection) -> None:
-    schema, create_schema = get_schema_options()
+    schema_name, create_schema = get_schema_options()
+
+    if MULTI_TENANT and schema_name == "public":
+        raise ValueError(
+            "Cannot run default migrations in public schema when multi-tenancy is enabled. "
+            "Please specify a tenant-specific schema."
+        )
+
    if create_schema:
-        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
+        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
        connection.execute(text("COMMIT"))

-    connection.execute(text(f'SET search_path TO "{schema}"'))
+    # Set search_path to the target schema
+    connection.execute(text(f'SET search_path TO "{schema_name}"'))

    context.configure(
        connection=connection,
        target_metadata=target_metadata,  # type: ignore
-        version_table_schema=schema,
+        include_object=include_object,
+        version_table_schema=schema_name,
        include_schemas=True,
        compare_type=True,
        compare_server_default=True,
+        script_location=config.get_main_option("script_location"),
    )

    with context.begin_transaction():
@@ -106,7 +112,6 @@ def do_run_migrations(connection: Connection) -> None:


 async def run_async_migrations() -> None:
-    """Run migrations in 'online' mode."""
    connectable = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
@@ -119,7 +124,6 @@ async def run_async_migrations() -> None:


 def run_migrations_online() -> None:
-    """Run migrations in 'online' mode."""
    asyncio.run(run_async_migrations())