diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 5ec8b4770..586ed2d8d 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -37,6 +37,7 @@ from danswer.db.models import Connector from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.search.search_utils import warm_up_models +from danswer.utils.acl import set_acl_for_vespa_nonblocking from danswer.utils.logger import IndexAttemptSingleton from danswer.utils.logger import setup_logger @@ -452,6 +453,12 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non # This ensures that bad states get cleaned up mark_all_in_progress_cc_pairs_failed(db_session) + # TODO: remove this once everyone is migrated to ACL + # does nothing if this has been successfully run before + # NOTE: is done in another thread, to not block indexing runs from + # getting kicked off + set_acl_for_vespa_nonblocking(should_check_if_already_done=True) + while True: start = time.time() start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S") diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 5b2d1a06e..24d4e99fc 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -43,7 +43,6 @@ from danswer.server.search_backend import router as backend_router from danswer.server.slack_bot_management import router as slack_bot_management_router from danswer.server.state import router as state_router from danswer.server.users import router as user_router -from danswer.utils.acl import set_acl_for_vespa from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import fetch_versioned_implementation @@ -197,11 +196,6 @@ def get_application() -> FastAPI: logger.info("Verifying Document Index(s) is/are available.") get_default_document_index().ensure_indices_exist() - # TODO: remove this once everyone is migrated to ACL - logger.info("Populating Access Control List fields in Vespa") - # does nothing if this has been successfully run before - set_acl_for_vespa(should_check_if_already_done=True) - application.add_middleware( CORSMiddleware, allow_origins=["*"], # Change this to the list of allowed origins if needed diff --git a/backend/danswer/utils/acl.py b/backend/danswer/utils/acl.py index d88b1cff8..bed2683f8 100644 --- a/backend/danswer/utils/acl.py +++ b/backend/danswer/utils/acl.py @@ -1,3 +1,5 @@ +from threading import Thread + from sqlalchemy import select from sqlalchemy.orm import Session @@ -33,6 +35,7 @@ def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None: if not isinstance(vespa_index, VespaIndex): raise ValueError("This script is only for Vespa indexes") + logger.info("Populating Access Control List fields in Vespa") with Session(get_sqlalchemy_engine()) as db_session: # for all documents, set the `access_control_list` field apporpriately # based on the state of Postgres @@ -52,3 +55,11 @@ def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None: ) dynamic_config_store.store(_COMPLETED_ACL_UPDATE_KEY, True) + + +def set_acl_for_vespa_nonblocking(should_check_if_already_done: bool = False) -> None: + """Kick off the ACL update in a separate thread so that other work can continue.""" + Thread( + target=set_acl_for_vespa, + args=[should_check_if_already_done], + ).start()