diff --git a/backend/danswer/auth/invited_users.py b/backend/danswer/auth/invited_users.py index 56a02fc60..efce858f2 100644 --- a/backend/danswer/auth/invited_users.py +++ b/backend/danswer/auth/invited_users.py @@ -1,21 +1,20 @@ from typing import cast +from danswer.configs.constants import KV_USER_STORE_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import JSON_ro -USER_STORE_KEY = "INVITED_USERS" - def get_invited_users() -> list[str]: try: store = get_dynamic_config_store() - return cast(list, store.load(USER_STORE_KEY)) + return cast(list, store.load(KV_USER_STORE_KEY)) except ConfigNotFoundError: return list() def write_invited_users(emails: list[str]) -> int: store = get_dynamic_config_store() - store.store(USER_STORE_KEY, cast(JSON_ro, emails)) + store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails)) return len(emails) diff --git a/backend/danswer/auth/noauth_user.py b/backend/danswer/auth/noauth_user.py index 4744c4a64..55fdbe4a5 100644 --- a/backend/danswer/auth/noauth_user.py +++ b/backend/danswer/auth/noauth_user.py @@ -3,25 +3,23 @@ from typing import Any from typing import cast from danswer.auth.schemas import UserRole +from danswer.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY from danswer.dynamic_configs.store import ConfigNotFoundError from danswer.dynamic_configs.store import DynamicConfigStore from danswer.server.manage.models import UserInfo from danswer.server.manage.models import UserPreferences -NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" - - def set_no_auth_user_preferences( store: DynamicConfigStore, preferences: UserPreferences ) -> None: - store.store(NO_AUTH_USER_PREFERENCES_KEY, preferences.dict()) + store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.dict()) def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences: try: preferences_data = cast( - Mapping[str, Any], store.load(NO_AUTH_USER_PREFERENCES_KEY) + Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY) ) return UserPreferences(**preferences_data) except ConfigNotFoundError: diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 8c9570c46..319d066be 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -67,6 +67,14 @@ from danswer.utils.variable_functionality import ( logger = setup_logger() +def is_user_admin(user: User | None) -> bool: + if AUTH_TYPE == AuthType.DISABLED: + return True + if user and user.role == UserRole.ADMIN: + return True + return False + + def verify_auth_setting() -> None: if AUTH_TYPE not in [AuthType.DISABLED, AuthType.BASIC, AuthType.GOOGLE_OAUTH]: raise ValueError( diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 1351dd788..4cdc726a0 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -73,6 +73,22 @@ DANSWER_API_KEY_PREFIX = "API_KEY__" DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "danswerapikey.ai" UNNAMED_KEY_PLACEHOLDER = "Unnamed" +# Key-Value store keys +KV_REINDEX_KEY = "needs_reindexing" +KV_USER_STORE_KEY = "INVITED_USERS" +KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" +KV_CRED_KEY = "credential_id_{}" +KV_GMAIL_CRED_KEY = "gmail_app_credential" +KV_GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" +KV_GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential" +KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" +KV_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key" +KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" +KV_SETTINGS_KEY = "danswer_settings" +KV_CUSTOMER_UUID_KEY = "customer_uuid" +KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" +KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" + class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py index 20b47f452..9d01ac5f4 100644 --- a/backend/danswer/connectors/gmail/connector_auth.py +++ b/backend/danswer/connectors/gmail/connector_auth.py @@ -12,16 +12,16 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import DocumentSource -from danswer.connectors.gmail.constants import CRED_KEY +from danswer.configs.constants import KV_CRED_KEY +from danswer.configs.constants import KV_GMAIL_CRED_KEY +from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY from danswer.connectors.gmail.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.gmail.constants import GMAIL_CRED_KEY from danswer.connectors.gmail.constants import ( GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) -from danswer.connectors.gmail.constants import GMAIL_SERVICE_ACCOUNT_KEY from danswer.connectors.gmail.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User @@ -72,7 +72,7 @@ def get_gmail_creds_for_service_account( def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id))) + csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) if csrf != state: raise PermissionError( "State from Gmail Connector callback does not match expected" @@ -80,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None: def get_gmail_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -92,12 +92,14 @@ def get_gmail_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -109,7 +111,9 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) @@ -154,22 +158,22 @@ def build_service_account_creds( def get_google_app_gmail_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) return GoogleAppCredentials(**json.loads(creds_str)) def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None: get_dynamic_config_store().store( - GMAIL_CRED_KEY, app_credentials.json(), encrypt=True + KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True ) def delete_google_app_gmail_cred() -> None: - get_dynamic_config_store().delete(GMAIL_CRED_KEY) + get_dynamic_config_store().delete(KV_GMAIL_CRED_KEY) def get_gmail_service_account_key() -> GoogleServiceAccountKey: - creds_str = str(get_dynamic_config_store().load(GMAIL_SERVICE_ACCOUNT_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY)) return GoogleServiceAccountKey(**json.loads(creds_str)) @@ -177,19 +181,19 @@ def upsert_gmail_service_account_key( service_account_key: GoogleServiceAccountKey, ) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def delete_gmail_service_account_key() -> None: - get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) def delete_service_account_key() -> None: - get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/gmail/constants.py b/backend/danswer/connectors/gmail/constants.py index 1660f54be..36eff0818 100644 --- a/backend/danswer/connectors/gmail/constants.py +++ b/backend/danswer/connectors/gmail/constants.py @@ -1,7 +1,4 @@ DB_CREDENTIALS_DICT_TOKEN_KEY = "gmail_tokens" GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "gmail_delegated_user" -CRED_KEY = "credential_id_{}" -GMAIL_CRED_KEY = "gmail_app_credential" -GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"] diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index 94cf24f79..2aae8401a 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -12,7 +12,9 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import DocumentSource -from danswer.connectors.google_drive.constants import CRED_KEY +from danswer.configs.constants import KV_CRED_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) @@ -20,8 +22,6 @@ from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.google_drive.constants import GOOGLE_DRIVE_CRED_KEY -from danswer.connectors.google_drive.constants import GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY from danswer.connectors.google_drive.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User @@ -72,7 +72,7 @@ def get_google_drive_creds_for_service_account( def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id))) + csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) if csrf != state: raise PermissionError( "State from Google Drive Connector callback does not match expected" @@ -80,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None: def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, @@ -92,7 +92,9 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore + get_dynamic_config_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore return str(auth_url) @@ -138,30 +140,32 @@ def build_service_account_creds( def get_google_app_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_CRED_KEY)) + creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) return GoogleAppCredentials(**json.loads(creds_str)) def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None: get_dynamic_config_store().store( - GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True + KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True ) def delete_google_app_cred() -> None: - get_dynamic_config_store().delete(GOOGLE_DRIVE_CRED_KEY) + get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_CRED_KEY) def get_service_account_key() -> GoogleServiceAccountKey: - creds_str = str(get_dynamic_config_store().load(GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)) + creds_str = str( + get_dynamic_config_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) + ) return GoogleServiceAccountKey(**json.loads(creds_str)) def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def delete_service_account_key() -> None: - get_dynamic_config_store().delete(GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) + get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/google_drive/constants.py b/backend/danswer/connectors/google_drive/constants.py index 47dc402a3..214bfd5cb 100644 --- a/backend/danswer/connectors/google_drive/constants.py +++ b/backend/danswer/connectors/google_drive/constants.py @@ -1,9 +1,6 @@ DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens" DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user" -CRED_KEY = "credential_id_{}" -GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential" -GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" SCOPES = [ "https://www.googleapis.com/auth/drive.readonly", "https://www.googleapis.com/auth/drive.metadata.readonly", diff --git a/backend/danswer/danswerbot/slack/tokens.py b/backend/danswer/danswerbot/slack/tokens.py index 16014574a..5de3a6a01 100644 --- a/backend/danswer/danswerbot/slack/tokens.py +++ b/backend/danswer/danswerbot/slack/tokens.py @@ -1,13 +1,11 @@ import os from typing import cast +from danswer.configs.constants import KV_SLACK_BOT_TOKENS_CONFIG_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.manage.models import SlackBotTokens -_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key" - - def fetch_tokens() -> SlackBotTokens: # first check env variables app_token = os.environ.get("DANSWER_BOT_SLACK_APP_TOKEN") @@ -17,7 +15,7 @@ def fetch_tokens() -> SlackBotTokens: dynamic_config_store = get_dynamic_config_store() return SlackBotTokens( - **cast(dict, dynamic_config_store.load(key=_SLACK_BOT_TOKENS_CONFIG_KEY)) + **cast(dict, dynamic_config_store.load(key=KV_SLACK_BOT_TOKENS_CONFIG_KEY)) ) @@ -26,5 +24,5 @@ def save_tokens( ) -> None: dynamic_config_store = get_dynamic_config_store() dynamic_config_store.store( - key=_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True + key=KV_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True ) diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py index e63d6c048..4dd1f421f 100644 --- a/backend/danswer/db/connector.py +++ b/backend/danswer/db/connector.py @@ -2,6 +2,7 @@ from typing import cast from fastapi import HTTPException from sqlalchemy import and_ +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import select from sqlalchemy.orm import aliased @@ -21,6 +22,14 @@ from danswer.utils.logger import setup_logger logger = setup_logger() +def check_connectors_exist(db_session: Session) -> bool: + # Connector 0 is created on server startup as a default for ingestion + # it will always exist and we don't need to count it for this + stmt = select(exists(Connector).where(Connector.id > 0)) + result = db_session.execute(stmt) + return result.scalar() or False + + def fetch_connectors( db_session: Session, sources: list[DocumentSource] | None = None, diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index 80281c38b..9cf63720a 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -7,6 +7,7 @@ from uuid import UUID from sqlalchemy import and_ from sqlalchemy import delete +from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import or_ from sqlalchemy import select @@ -30,6 +31,12 @@ from danswer.utils.logger import setup_logger logger = setup_logger() +def check_docs_exist(db_session: Session) -> bool: + stmt = select(exists(DbDocument)) + result = db_session.execute(stmt) + return result.scalar() or False + + def get_documents_for_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, limit: int | None = None ) -> Sequence[DbDocument]: diff --git a/backend/danswer/dynamic_configs/port_configs.py b/backend/danswer/dynamic_configs/port_configs.py deleted file mode 100644 index ce1092bdb..000000000 --- a/backend/danswer/dynamic_configs/port_configs.py +++ /dev/null @@ -1,117 +0,0 @@ -import json -from pathlib import Path -from typing import cast - -from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_API_KEY -from danswer.configs.model_configs import GEN_AI_API_VERSION -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.db.engine import get_session_context_manager -from danswer.db.llm import fetch_existing_llm_providers -from danswer.db.llm import update_default_provider -from danswer.db.llm import upsert_llm_provider -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.factory import PostgresBackedDynamicConfigStore -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.server.manage.llm.models import LLMProviderUpsertRequest -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def read_file_system_store(directory_path: str) -> dict: - store = {} - base_path = Path(directory_path) - for file_path in base_path.iterdir(): - if file_path.is_file() and "." not in file_path.name: - with open(file_path, "r") as file: - key = file_path.stem - value = json.load(file) - - if value: - store[key] = value - return store - - -def insert_into_postgres(store_data: dict) -> None: - port_once_key = "file_store_ported" - config_store = PostgresBackedDynamicConfigStore() - try: - config_store.load(port_once_key) - return - except ConfigNotFoundError: - pass - - for key, value in store_data.items(): - config_store.store(key, value) - - config_store.store(port_once_key, True) - - -def port_filesystem_to_postgres(directory_path: str) -> None: - store_data = read_file_system_store(directory_path) - insert_into_postgres(store_data) - - -def port_api_key_to_postgres() -> None: - # can't port over custom, no longer supported - if GEN_AI_MODEL_PROVIDER == "custom": - return - - with get_session_context_manager() as db_session: - # if we already have ported things over / setup providers in the db, don't do anything - if len(fetch_existing_llm_providers(db_session)) > 0: - return - - api_key = GEN_AI_API_KEY - try: - api_key = cast( - str, get_dynamic_config_store().load(GEN_AI_API_KEY_STORAGE_KEY) - ) - except ConfigNotFoundError: - pass - - # if no API key set, don't port anything over - if not api_key: - return - - default_model_name = GEN_AI_MODEL_VERSION - if GEN_AI_MODEL_PROVIDER == "openai" and not default_model_name: - default_model_name = "gpt-4" - - # if no default model name found, don't port anything over - if not default_model_name: - return - - default_fast_model_name = FAST_GEN_AI_MODEL_VERSION - if GEN_AI_MODEL_PROVIDER == "openai" and not default_fast_model_name: - default_fast_model_name = "gpt-3.5-turbo" - - llm_provider_upsert = LLMProviderUpsertRequest( - name=GEN_AI_MODEL_PROVIDER, - provider=GEN_AI_MODEL_PROVIDER, - api_key=api_key, - api_base=GEN_AI_API_ENDPOINT, - api_version=GEN_AI_API_VERSION, - # can't port over any custom configs, since we don't know - # all the possible keys and values that could be in there - custom_config=None, - default_model_name=default_model_name, - fast_default_model_name=default_fast_model_name, - model_names=None, - display_model_names=[], - is_public=True, - ) - llm_provider = upsert_llm_provider(db_session, llm_provider_upsert) - update_default_provider(db_session, llm_provider.id) - logger.info(f"Ported over LLM provider:\n\n{llm_provider}") - - # delete the old API key - try: - get_dynamic_config_store().delete(GEN_AI_API_KEY_STORAGE_KEY) - except ConfigNotFoundError: - pass diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 52e6d92f1..da41e6393 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -34,6 +34,7 @@ from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.constants import AuthType +from danswer.configs.constants import KV_REINDEX_KEY from danswer.configs.constants import POSTGRES_WEB_APP_NAME from danswer.db.connector import create_initial_default_connector from danswer.db.connector_credential_pair import associate_default_cc_pair @@ -53,6 +54,8 @@ from danswer.db.standard_answer import create_initial_default_standard_answer_ca from danswer.db.swap_index import check_index_swap from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import DocumentIndex +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.llm.llm_initialization import load_llm_providers from danswer.natural_language_processing.search_nlp_models import warm_up_encoders from danswer.search.retrieval.search_runner import download_nltk_data @@ -182,6 +185,26 @@ def setup_postgres(db_session: Session) -> None: auto_add_search_tool_to_personas(db_session) +def mark_reindex_flag(db_session: Session) -> None: + kv_store = get_dynamic_config_store() + try: + kv_store.load(KV_REINDEX_KEY) + return + except ConfigNotFoundError: + # Only need to update the flag if it hasn't been set + pass + + # If their first deployment is after the changes, it will + # TODO enable this when the other changes go in, need to avoid + # this being set to False, then the user indexes things on the old version + # docs_exist = check_docs_exist(db_session) + # connectors_exist = check_connectors_exist(db_session) + # if docs_exist or connectors_exist: + # kv_store.store(KV_REINDEX_KEY, True) + # else: + # kv_store.store(KV_REINDEX_KEY, False) + + def setup_vespa( document_index: DocumentIndex, db_embedding_model: EmbeddingModel, @@ -261,6 +284,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # setup Postgres with default credential, llm providers, etc. setup_postgres(db_session) + # Does the user need to trigger a reindexing to bring the document index + # into a good state, marked in the kv store + mark_reindex_flag(db_session) + # ensure Vespa is setup correctly logger.info("Verifying Document Index(s) is/are available.") document_index = get_default_document_index( diff --git a/backend/danswer/server/manage/administrative.py b/backend/danswer/server/manage/administrative.py index d6a52917f..e2ec1d2a5 100644 --- a/backend/danswer/server/manage/administrative.py +++ b/backend/danswer/server/manage/administrative.py @@ -11,6 +11,7 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_GEN_AI_KEY_CHECK_TIME from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.engine import get_session @@ -35,8 +36,6 @@ from danswer.utils.logger import setup_logger router = APIRouter(prefix="/manage") logger = setup_logger() -GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" - """Admin only API endpoints""" @@ -116,7 +115,7 @@ def validate_existing_genai_api_key( curr_time = datetime.now(tz=timezone.utc) try: last_check = datetime.fromtimestamp( - cast(float, kv_store.load(GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc + cast(float, kv_store.load(KV_GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc ) check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ) if curr_time - last_check < check_freq_sec: @@ -136,7 +135,7 @@ def validate_existing_genai_api_key( # Mark check as successful curr_time = datetime.now(tz=timezone.utc) - kv_store.store(GEN_AI_KEY_CHECK_TIME, curr_time.timestamp()) + kv_store.store(KV_GEN_AI_KEY_CHECK_TIME, curr_time.timestamp()) @router.post("/admin/deletion-attempt") diff --git a/backend/danswer/server/settings/api.py b/backend/danswer/server/settings/api.py index 422e268c1..25cf1b6c6 100644 --- a/backend/danswer/server/settings/api.py +++ b/backend/danswer/server/settings/api.py @@ -1,13 +1,25 @@ +from typing import cast + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException from danswer.auth.users import current_admin_user from danswer.auth.users import current_user +from danswer.auth.users import is_user_admin +from danswer.configs.constants import KV_REINDEX_KEY from danswer.db.models import User +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.server.settings.models import Notification from danswer.server.settings.models import Settings +from danswer.server.settings.models import UserSettings from danswer.server.settings.store import load_settings from danswer.server.settings.store import store_settings +from danswer.utils.logger import setup_logger + + +logger = setup_logger() admin_router = APIRouter(prefix="/admin/settings") @@ -26,5 +38,23 @@ def put_settings( @basic_router.get("") -def fetch_settings(_: User | None = Depends(current_user)) -> Settings: - return load_settings() +def fetch_settings(user: User | None = Depends(current_user)) -> UserSettings: + general_settings = load_settings() + user_notifications = get_user_notifications(user) + return UserSettings(**general_settings.dict(), **user_notifications.dict()) + + +def get_user_notifications(user: User | None) -> Notification: + """Get any notification names, currently the only one is the reindexing flag""" + is_admin = is_user_admin(user) + if not is_admin: + return Notification(notif_name=None) + kv_store = get_dynamic_config_store() + try: + need_index = cast(bool, kv_store.load(KV_REINDEX_KEY)) + return Notification(notif_name=KV_REINDEX_KEY if need_index else None) + except ConfigNotFoundError: + # If something goes wrong and the flag is gone, better to not start a reindexing + # it's a heavyweight long running job and maybe this flag is cleaned up later + logger.warning("Could not find reindex flag") + return Notification(notif_name=None) diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py index 9afacf5ad..1547c469b 100644 --- a/backend/danswer/server/settings/models.py +++ b/backend/danswer/server/settings/models.py @@ -8,6 +8,10 @@ class PageType(str, Enum): SEARCH = "search" +class Notification(BaseModel): + notif_name: str | None + + class Settings(BaseModel): """General settings""" @@ -35,3 +39,7 @@ class Settings(BaseModel): raise ValueError( "The default page cannot be 'search' if the search page is disabled." ) + + +class UserSettings(Notification, Settings): + """User-specific settings combining Notification and general Settings""" diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py index ead1e3652..dcf31c46f 100644 --- a/backend/danswer/server/settings/store.py +++ b/backend/danswer/server/settings/store.py @@ -1,23 +1,21 @@ from typing import cast +from danswer.configs.constants import KV_SETTINGS_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.server.settings.models import Settings -_SETTINGS_KEY = "danswer_settings" - - def load_settings() -> Settings: dynamic_config_store = get_dynamic_config_store() try: - settings = Settings(**cast(dict, dynamic_config_store.load(_SETTINGS_KEY))) + settings = Settings(**cast(dict, dynamic_config_store.load(KV_SETTINGS_KEY))) except ConfigNotFoundError: settings = Settings() - dynamic_config_store.store(_SETTINGS_KEY, settings.dict()) + dynamic_config_store.store(KV_SETTINGS_KEY, settings.dict()) return settings def store_settings(settings: Settings) -> None: - get_dynamic_config_store().store(_SETTINGS_KEY, settings.dict()) + get_dynamic_config_store().store(KV_SETTINGS_KEY, settings.dict()) diff --git a/backend/danswer/utils/acl.py b/backend/danswer/utils/acl.py deleted file mode 100644 index 5608530fa..000000000 --- a/backend/danswer/utils/acl.py +++ /dev/null @@ -1,69 +0,0 @@ -from threading import Thread - -from sqlalchemy import select -from sqlalchemy.orm import Session - -from danswer.access.access import get_access_for_documents -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.models import Document -from danswer.document_index.document_index_utils import get_both_index_names -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import UpdateRequest -from danswer.document_index.vespa.index import VespaIndex -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -_COMPLETED_ACL_UPDATE_KEY = "completed_acl_update" - - -def set_acl_for_vespa(should_check_if_already_done: bool = False) -> None: - """Updates the ACL for all documents based on the state of Postgres.""" - dynamic_config_store = get_dynamic_config_store() - if should_check_if_already_done: - try: - # if entry is found, then we've already done this - dynamic_config_store.load(_COMPLETED_ACL_UPDATE_KEY) - return - except ConfigNotFoundError: - pass - - logger.info("Populating Access Control List fields in Vespa") - with Session(get_sqlalchemy_engine()) as db_session: - # for all documents, set the `access_control_list` field appropriately - # based on the state of Postgres - documents = db_session.scalars(select(Document)).all() - document_access_dict = get_access_for_documents( - db_session=db_session, - document_ids=[document.id for document in documents], - ) - - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - vespa_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - - if not isinstance(vespa_index, VespaIndex): - raise ValueError("This script is only for Vespa indexes") - - update_requests = [ - UpdateRequest( - document_ids=[document_id], - access=access, - ) - for document_id, access in document_access_dict.items() - ] - vespa_index.update(update_requests=update_requests) - - dynamic_config_store.store(_COMPLETED_ACL_UPDATE_KEY, True) - - -def set_acl_for_vespa_nonblocking(should_check_if_already_done: bool = False) -> None: - """Kick off the ACL update in a separate thread so that other work can continue.""" - Thread( - target=set_acl_for_vespa, - args=[should_check_if_already_done], - ).start() diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index f2dbb8915..80fcba65a 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -6,10 +6,10 @@ from typing import cast import requests from danswer.configs.app_configs import DISABLE_TELEMETRY +from danswer.configs.constants import KV_CUSTOMER_UUID_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError -CUSTOMER_UUID_KEY = "customer_uuid" DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" @@ -24,10 +24,10 @@ class RecordType(str, Enum): def get_or_generate_uuid() -> str: kv_store = get_dynamic_config_store() try: - return cast(str, kv_store.load(CUSTOMER_UUID_KEY)) + return cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) except ConfigNotFoundError: customer_id = str(uuid.uuid4()) - kv_store.store(CUSTOMER_UUID_KEY, customer_id, encrypt=True) + kv_store.store(KV_CUSTOMER_UUID_KEY, customer_id, encrypt=True) return customer_id diff --git a/backend/ee/danswer/server/enterprise_settings/store.py b/backend/ee/danswer/server/enterprise_settings/store.py index 99fb1cc90..e1418f022 100644 --- a/backend/ee/danswer/server/enterprise_settings/store.py +++ b/backend/ee/danswer/server/enterprise_settings/store.py @@ -9,6 +9,8 @@ from fastapi import UploadFile from sqlalchemy.orm import Session from danswer.configs.constants import FileOrigin +from danswer.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY +from danswer.configs.constants import KV_ENTERPRISE_SETTINGS_KEY from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store @@ -17,7 +19,6 @@ from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings -_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" logger = setup_logger() @@ -25,27 +26,26 @@ def load_settings() -> EnterpriseSettings: dynamic_config_store = get_dynamic_config_store() try: settings = EnterpriseSettings( - **cast(dict, dynamic_config_store.load(_ENTERPRISE_SETTINGS_KEY)) + **cast(dict, dynamic_config_store.load(KV_ENTERPRISE_SETTINGS_KEY)) ) except ConfigNotFoundError: settings = EnterpriseSettings() - dynamic_config_store.store(_ENTERPRISE_SETTINGS_KEY, settings.dict()) + dynamic_config_store.store(KV_ENTERPRISE_SETTINGS_KEY, settings.dict()) return settings def store_settings(settings: EnterpriseSettings) -> None: - get_dynamic_config_store().store(_ENTERPRISE_SETTINGS_KEY, settings.dict()) + get_dynamic_config_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.dict()) -_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" _CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY") def load_analytics_script() -> str | None: dynamic_config_store = get_dynamic_config_store() try: - return cast(str, dynamic_config_store.load(_CUSTOM_ANALYTICS_SCRIPT_KEY)) + return cast(str, dynamic_config_store.load(KV_CUSTOM_ANALYTICS_SCRIPT_KEY)) except ConfigNotFoundError: return None @@ -58,7 +58,7 @@ def store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> No raise ValueError("Invalid secret key") get_dynamic_config_store().store( - _CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script + KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script ) diff --git a/backend/scripts/migrate_vespa_to_acl.py b/backend/scripts/migrate_vespa_to_acl.py deleted file mode 100644 index a0dce3361..000000000 --- a/backend/scripts/migrate_vespa_to_acl.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Script which updates Vespa to align with the access described in Postgres. -Should be run when a user who has docs already indexed switches over to the new -access control system. This allows them to not have to re-index all documents. -NOTE: this is auto-run on server startup, so should not be necessary in most cases.""" -from danswer.utils.acl import set_acl_for_vespa - - -if __name__ == "__main__": - set_acl_for_vespa()