From 599db71238ca2d52470daefb5a3a1213a8b48038 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 14 Apr 2024 23:29:32 -0700 Subject: [PATCH] Permission Sync Models (#1334) --- ...c9929a46_permission_auto_sync_framework.py | 81 ++++++++++++++++++ backend/danswer/db/models.py | 84 +++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py diff --git a/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py b/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py new file mode 100644 index 000000000..0e04478f2 --- /dev/null +++ b/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py @@ -0,0 +1,81 @@ +"""Permission Auto Sync Framework + +Revision ID: 72bdc9929a46 +Revises: 475fcefe8826 +Create Date: 2024-04-14 21:15:28.659634 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "72bdc9929a46" +down_revision = "475fcefe8826" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "email_to_external_user_cache", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("external_user_id", sa.String(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "external_permission", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("external_permission_group", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "permission_sync_run", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("update_type", sa.String(), nullable=False), + sa.Column("cc_pair_id", sa.Integer(), nullable=True), + sa.Column( + "status", + sa.String(), + nullable=False, + ), + sa.Column("error_msg", sa.Text(), nullable=True), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["cc_pair_id"], + ["connector_credential_pair.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade() -> None: + op.drop_table("permission_sync_run") + op.drop_table("external_permission") + op.drop_table("email_to_external_user_cache") diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index a4bb525d7..004025d7e 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -1041,3 +1041,87 @@ class UserGroup(Base): secondary=DocumentSet__UserGroup.__table__, viewonly=True, ) + + +"""Tables related to Permission Sync""" + + +class PermissionSyncStatus(str, PyEnum): + IN_PROGRESS = "in_progress" + SUCCESS = "success" + FAILED = "failed" + + +class PermissionSyncJobType(str, PyEnum): + USER_LEVEL = "user_level" + GROUP_LEVEL = "group_level" + + +class PermissionSyncRun(Base): + """Represents one run of a permission sync job. For some given cc_pair, it is either sync-ing + the users or it is sync-ing the groups""" + + __tablename__ = "permission_sync_run" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + # Not strictly needed but makes it easy to use without fetching from cc_pair + source_type: Mapped[DocumentSource] = mapped_column(Enum(DocumentSource)) + # Currently all sync jobs are handled as a group permission sync or a user permission sync + update_type: Mapped[PermissionSyncJobType] = mapped_column( + Enum(PermissionSyncJobType) + ) + cc_pair_id: Mapped[int | None] = mapped_column( + ForeignKey("connector_credential_pair.id"), nullable=True + ) + status: Mapped[PermissionSyncStatus] = mapped_column(Enum(PermissionSyncStatus)) + error_msg: Mapped[str | None] = mapped_column(Text, default=None) + updated_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + cc_pair: Mapped[ConnectorCredentialPair] = relationship("ConnectorCredentialPair") + + +class ExternalPermission(Base): + """Maps user info both internal and external to the name of the external group + This maps the user to all of their external groups so that the external group name can be + attached to the ACL list matching during query time. User level permissions can be handled by + directly adding the Danswer user to the doc ACL list""" + + __tablename__ = "external_permission" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + # Email is needed because we want to keep track of users not in Danswer to simplify process + # when the user joins + user_email: Mapped[str] = mapped_column(String) + source_type: Mapped[DocumentSource] = mapped_column(Enum(DocumentSource)) + external_permission_group: Mapped[str] = mapped_column(String) + user = relationship("User") + + +class EmailToExternalUserCache(Base): + """A way to map users IDs in the external tool to a user in Danswer or at least an email for + when the user joins. Used as a cache for when fetching external groups which have their own + user ids, this can easily be mapped back to users already known in Danswer without needing + to call external APIs to get the user emails. + + This way when groups are updated in the external tool and we need to update the mapping of + internal users to the groups, we can sync the internal users to the external groups they are + part of using this. + + Ie. User Chris is part of groups alpha, beta, and we can update this if Chris is no longer + part of alpha in some external tool. + """ + + __tablename__ = "email_to_external_user_cache" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + external_user_id: Mapped[str] = mapped_column(String) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + # Email is needed because we want to keep track of users not in Danswer to simplify process + # when the user joins + user_email: Mapped[str] = mapped_column(String) + source_type: Mapped[DocumentSource] = mapped_column(Enum(DocumentSource)) + + user = relationship("User")