mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-26 03:48:49 +02:00
Auto-populate ACL fields on server startup
This commit is contained in:
@@ -44,6 +44,7 @@ from danswer.server.manage import router as admin_router
|
|||||||
from danswer.server.search_backend import router as backend_router
|
from danswer.server.search_backend import router as backend_router
|
||||||
from danswer.server.slack_bot_management import router as slack_bot_management_router
|
from danswer.server.slack_bot_management import router as slack_bot_management_router
|
||||||
from danswer.server.users import router as user_router
|
from danswer.server.users import router as user_router
|
||||||
|
from danswer.utils.acl import set_acl_for_vespa
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
|
|
||||||
@@ -213,6 +214,10 @@ def get_application() -> FastAPI:
|
|||||||
logger.info("Verifying Document Index(s) is/are available.")
|
logger.info("Verifying Document Index(s) is/are available.")
|
||||||
get_default_document_index().ensure_indices_exist()
|
get_default_document_index().ensure_indices_exist()
|
||||||
|
|
||||||
|
# TODO: remove this once everyone is migrated to ACL
|
||||||
|
logger.info("Populating Access Control List fields in Vespa")
|
||||||
|
set_acl_for_vespa()
|
||||||
|
|
||||||
return application
|
return application
|
||||||
|
|
||||||
|
|
||||||
|
38
backend/danswer/utils/acl.py
Normal file
38
backend/danswer/utils/acl.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from danswer.access.models import DocumentAccess
|
||||||
|
from danswer.datastores.document_index import get_default_document_index
|
||||||
|
from danswer.datastores.interfaces import UpdateRequest
|
||||||
|
from danswer.datastores.vespa.store import VespaIndex
|
||||||
|
from danswer.db.document import get_acccess_info_for_documents
|
||||||
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
|
from danswer.db.models import Document
|
||||||
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def set_acl_for_vespa() -> None:
|
||||||
|
"""Updates the ACL for all documents based on the state of Postgres."""
|
||||||
|
vespa_index = get_default_document_index()
|
||||||
|
if not isinstance(vespa_index, VespaIndex):
|
||||||
|
raise ValueError("This script is only for Vespa indexes")
|
||||||
|
|
||||||
|
with Session(get_sqlalchemy_engine()) as db_session:
|
||||||
|
# for all documents, set the `access_control_list` field apporpriately
|
||||||
|
# based on the state of Postgres
|
||||||
|
documents = db_session.scalars(select(Document)).all()
|
||||||
|
document_access_info = get_acccess_info_for_documents(
|
||||||
|
db_session=db_session,
|
||||||
|
document_ids=[document.id for document in documents],
|
||||||
|
)
|
||||||
|
vespa_index.update(
|
||||||
|
update_requests=[
|
||||||
|
UpdateRequest(
|
||||||
|
document_ids=[document_id],
|
||||||
|
access=DocumentAccess.build(user_ids, is_public),
|
||||||
|
)
|
||||||
|
for document_id, user_ids, is_public in document_access_info
|
||||||
|
],
|
||||||
|
)
|
@@ -1,44 +1,9 @@
|
|||||||
"""Script which updates Vespa to align with the access described in Postgres.
|
"""Script which updates Vespa to align with the access described in Postgres.
|
||||||
Should be run wehn a user who has docs already indexed switches over to the new
|
Should be run when a user who has docs already indexed switches over to the new
|
||||||
access control system. This allows them to not have to re-index all documents."""
|
access control system. This allows them to not have to re-index all documents.
|
||||||
from sqlalchemy import select
|
NOTE: this is auto-run on server startup, so should not be necessary in most cases."""
|
||||||
from sqlalchemy.orm import Session
|
from danswer.utils.acl import set_acl_for_vespa
|
||||||
|
|
||||||
from danswer.access.models import DocumentAccess
|
|
||||||
from danswer.datastores.document_index import get_default_document_index
|
|
||||||
from danswer.datastores.interfaces import UpdateRequest
|
|
||||||
from danswer.datastores.vespa.store import VespaIndex
|
|
||||||
from danswer.db.document import get_acccess_info_for_documents
|
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
|
||||||
from danswer.db.models import Document
|
|
||||||
from danswer.utils.logger import setup_logger
|
|
||||||
|
|
||||||
logger = setup_logger()
|
|
||||||
|
|
||||||
|
|
||||||
def _migrate_vespa_to_acl() -> None:
|
|
||||||
vespa_index = get_default_document_index()
|
|
||||||
if not isinstance(vespa_index, VespaIndex):
|
|
||||||
raise ValueError("This script is only for Vespa indexes")
|
|
||||||
|
|
||||||
with Session(get_sqlalchemy_engine()) as db_session:
|
|
||||||
# for all documents, set the `access_control_list` field apporpriately
|
|
||||||
# based on the state of Postgres
|
|
||||||
documents = db_session.scalars(select(Document)).all()
|
|
||||||
document_access_info = get_acccess_info_for_documents(
|
|
||||||
db_session=db_session,
|
|
||||||
document_ids=[document.id for document in documents],
|
|
||||||
)
|
|
||||||
vespa_index.update(
|
|
||||||
update_requests=[
|
|
||||||
UpdateRequest(
|
|
||||||
document_ids=[document_id],
|
|
||||||
access=DocumentAccess.build(user_ids, is_public),
|
|
||||||
)
|
|
||||||
for document_id, user_ids, is_public in document_access_info
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
_migrate_vespa_to_acl()
|
set_acl_for_vespa()
|
||||||
|
Reference in New Issue
Block a user