Move auto-ACL update to background job

This commit is contained in:
Weves 2023-10-02 00:05:17 -07:00 committed by Chris Weaver
parent 829d04c904
commit dbe33959c0
3 changed files with 18 additions and 6 deletions

View File

@ -37,6 +37,7 @@ from danswer.db.models import Connector
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.search.search_utils import warm_up_models
from danswer.utils.acl import set_acl_for_vespa_nonblocking
from danswer.utils.logger import IndexAttemptSingleton
from danswer.utils.logger import setup_logger
@ -452,6 +453,12 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
# This ensures that bad states get cleaned up
mark_all_in_progress_cc_pairs_failed(db_session)
# TODO: remove this once everyone is migrated to ACL
# does nothing if this has been successfully run before
# NOTE: is done in another thread, to not block indexing runs from
# getting kicked off
set_acl_for_vespa_nonblocking(should_check_if_already_done=True)
while True:
start = time.time()
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")

View File

@ -43,7 +43,6 @@ from danswer.server.search_backend import router as backend_router
from danswer.server.slack_bot_management import router as slack_bot_management_router
from danswer.server.state import router as state_router
from danswer.server.users import router as user_router
from danswer.utils.acl import set_acl_for_vespa
from danswer.utils.logger import setup_logger
from danswer.utils.variable_functionality import fetch_versioned_implementation
@ -197,11 +196,6 @@ def get_application() -> FastAPI:
logger.info("Verifying Document Index(s) is/are available.")
get_default_document_index().ensure_indices_exist()
# TODO: remove this once everyone is migrated to ACL
logger.info("Populating Access Control List fields in Vespa")
# does nothing if this has been successfully run before
set_acl_for_vespa(should_check_if_already_done=True)
application.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Change this to the list of allowed origins if needed

View File

@ -1,3 +1,5 @@
from threading import Thread
from sqlalchemy import select
from sqlalchemy.orm import Session
@ -33,6 +35,7 @@ def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None:
if not isinstance(vespa_index, VespaIndex):
raise ValueError("This script is only for Vespa indexes")
logger.info("Populating Access Control List fields in Vespa")
with Session(get_sqlalchemy_engine()) as db_session:
# for all documents, set the `access_control_list` field apporpriately
# based on the state of Postgres
@ -52,3 +55,11 @@ def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None:
)
dynamic_config_store.store(_COMPLETED_ACL_UPDATE_KEY, True)
def set_acl_for_vespa_nonblocking(should_check_if_already_done: bool = False) -> None:
"""Kick off the ACL update in a separate thread so that other work can continue."""
Thread(
target=set_acl_for_vespa,
args=[should_check_if_already_done],
).start()