mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 03:48:14 +02:00
Auto-populate ACL fields on server startup
This commit is contained in:
parent
c09f00990e
commit
7afcf3489f
@ -44,6 +44,7 @@ from danswer.server.manage import router as admin_router
|
||||
from danswer.server.search_backend import router as backend_router
|
||||
from danswer.server.slack_bot_management import router as slack_bot_management_router
|
||||
from danswer.server.users import router as user_router
|
||||
from danswer.utils.acl import set_acl_for_vespa
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
@ -213,6 +214,10 @@ def get_application() -> FastAPI:
|
||||
logger.info("Verifying Document Index(s) is/are available.")
|
||||
get_default_document_index().ensure_indices_exist()
|
||||
|
||||
# TODO: remove this once everyone is migrated to ACL
|
||||
logger.info("Populating Access Control List fields in Vespa")
|
||||
set_acl_for_vespa()
|
||||
|
||||
return application
|
||||
|
||||
|
||||
|
38
backend/danswer/utils/acl.py
Normal file
38
backend/danswer/utils/acl.py
Normal file
@ -0,0 +1,38 @@
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.access.models import DocumentAccess
|
||||
from danswer.datastores.document_index import get_default_document_index
|
||||
from danswer.datastores.interfaces import UpdateRequest
|
||||
from danswer.datastores.vespa.store import VespaIndex
|
||||
from danswer.db.document import get_acccess_info_for_documents
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.models import Document
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def set_acl_for_vespa() -> None:
|
||||
"""Updates the ACL for all documents based on the state of Postgres."""
|
||||
vespa_index = get_default_document_index()
|
||||
if not isinstance(vespa_index, VespaIndex):
|
||||
raise ValueError("This script is only for Vespa indexes")
|
||||
|
||||
with Session(get_sqlalchemy_engine()) as db_session:
|
||||
# for all documents, set the `access_control_list` field apporpriately
|
||||
# based on the state of Postgres
|
||||
documents = db_session.scalars(select(Document)).all()
|
||||
document_access_info = get_acccess_info_for_documents(
|
||||
db_session=db_session,
|
||||
document_ids=[document.id for document in documents],
|
||||
)
|
||||
vespa_index.update(
|
||||
update_requests=[
|
||||
UpdateRequest(
|
||||
document_ids=[document_id],
|
||||
access=DocumentAccess.build(user_ids, is_public),
|
||||
)
|
||||
for document_id, user_ids, is_public in document_access_info
|
||||
],
|
||||
)
|
@ -1,44 +1,9 @@
|
||||
"""Script which updates Vespa to align with the access described in Postgres.
|
||||
Should be run wehn a user who has docs already indexed switches over to the new
|
||||
access control system. This allows them to not have to re-index all documents."""
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.access.models import DocumentAccess
|
||||
from danswer.datastores.document_index import get_default_document_index
|
||||
from danswer.datastores.interfaces import UpdateRequest
|
||||
from danswer.datastores.vespa.store import VespaIndex
|
||||
from danswer.db.document import get_acccess_info_for_documents
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.models import Document
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _migrate_vespa_to_acl() -> None:
|
||||
vespa_index = get_default_document_index()
|
||||
if not isinstance(vespa_index, VespaIndex):
|
||||
raise ValueError("This script is only for Vespa indexes")
|
||||
|
||||
with Session(get_sqlalchemy_engine()) as db_session:
|
||||
# for all documents, set the `access_control_list` field apporpriately
|
||||
# based on the state of Postgres
|
||||
documents = db_session.scalars(select(Document)).all()
|
||||
document_access_info = get_acccess_info_for_documents(
|
||||
db_session=db_session,
|
||||
document_ids=[document.id for document in documents],
|
||||
)
|
||||
vespa_index.update(
|
||||
update_requests=[
|
||||
UpdateRequest(
|
||||
document_ids=[document_id],
|
||||
access=DocumentAccess.build(user_ids, is_public),
|
||||
)
|
||||
for document_id, user_ids, is_public in document_access_info
|
||||
],
|
||||
)
|
||||
Should be run when a user who has docs already indexed switches over to the new
|
||||
access control system. This allows them to not have to re-index all documents.
|
||||
NOTE: this is auto-run on server startup, so should not be necessary in most cases."""
|
||||
from danswer.utils.acl import set_acl_for_vespa
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_migrate_vespa_to_acl()
|
||||
set_acl_for_vespa()
|
||||
|
Loading…
x
Reference in New Issue
Block a user