Auto-populate ACL fields on server startup

This commit is contained in:
Weves 2023-09-26 22:14:05 -07:00 committed by Chris Weaver
parent c09f00990e
commit 7afcf3489f
3 changed files with 48 additions and 40 deletions

View File

@ -44,6 +44,7 @@ from danswer.server.manage import router as admin_router
from danswer.server.search_backend import router as backend_router
from danswer.server.slack_bot_management import router as slack_bot_management_router
from danswer.server.users import router as user_router
from danswer.utils.acl import set_acl_for_vespa
from danswer.utils.logger import setup_logger
@ -213,6 +214,10 @@ def get_application() -> FastAPI:
logger.info("Verifying Document Index(s) is/are available.")
get_default_document_index().ensure_indices_exist()
# TODO: remove this once everyone is migrated to ACL
logger.info("Populating Access Control List fields in Vespa")
set_acl_for_vespa()
return application

View File

@ -0,0 +1,38 @@
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.access.models import DocumentAccess
from danswer.datastores.document_index import get_default_document_index
from danswer.datastores.interfaces import UpdateRequest
from danswer.datastores.vespa.store import VespaIndex
from danswer.db.document import get_acccess_info_for_documents
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import Document
from danswer.utils.logger import setup_logger
logger = setup_logger()
def set_acl_for_vespa() -> None:
"""Updates the ACL for all documents based on the state of Postgres."""
vespa_index = get_default_document_index()
if not isinstance(vespa_index, VespaIndex):
raise ValueError("This script is only for Vespa indexes")
with Session(get_sqlalchemy_engine()) as db_session:
# for all documents, set the `access_control_list` field apporpriately
# based on the state of Postgres
documents = db_session.scalars(select(Document)).all()
document_access_info = get_acccess_info_for_documents(
db_session=db_session,
document_ids=[document.id for document in documents],
)
vespa_index.update(
update_requests=[
UpdateRequest(
document_ids=[document_id],
access=DocumentAccess.build(user_ids, is_public),
)
for document_id, user_ids, is_public in document_access_info
],
)

View File

@ -1,44 +1,9 @@
"""Script which updates Vespa to align with the access described in Postgres.
Should be run wehn a user who has docs already indexed switches over to the new
access control system. This allows them to not have to re-index all documents."""
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.access.models import DocumentAccess
from danswer.datastores.document_index import get_default_document_index
from danswer.datastores.interfaces import UpdateRequest
from danswer.datastores.vespa.store import VespaIndex
from danswer.db.document import get_acccess_info_for_documents
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import Document
from danswer.utils.logger import setup_logger
logger = setup_logger()
def _migrate_vespa_to_acl() -> None:
vespa_index = get_default_document_index()
if not isinstance(vespa_index, VespaIndex):
raise ValueError("This script is only for Vespa indexes")
with Session(get_sqlalchemy_engine()) as db_session:
# for all documents, set the `access_control_list` field apporpriately
# based on the state of Postgres
documents = db_session.scalars(select(Document)).all()
document_access_info = get_acccess_info_for_documents(
db_session=db_session,
document_ids=[document.id for document in documents],
)
vespa_index.update(
update_requests=[
UpdateRequest(
document_ids=[document_id],
access=DocumentAccess.build(user_ids, is_public),
)
for document_id, user_ids, is_public in document_access_info
],
)
Should be run when a user who has docs already indexed switches over to the new
access control system. This allows them to not have to re-index all documents.
NOTE: this is auto-run on server startup, so should not be necessary in most cases."""
from danswer.utils.acl import set_acl_for_vespa
if __name__ == "__main__":
_migrate_vespa_to_acl()
set_acl_for_vespa()