diff --git a/.github/workflows/run-it.yml b/.github/workflows/run-it.yml index 45d57493b9ad..9941c1172007 100644 --- a/.github/workflows/run-it.yml +++ b/.github/workflows/run-it.yml @@ -94,6 +94,7 @@ jobs: ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ AUTH_TYPE=basic \ REQUIRE_EMAIL_VERIFICATION=false \ + DISABLE_TELEMETRY=true \ IMAGE_TAG=it \ docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build id: start_docker diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 64c162d7bef5..eff8ee30a63f 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -57,6 +57,7 @@ KV_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key" KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" KV_SETTINGS_KEY = "danswer_settings" KV_CUSTOMER_UUID_KEY = "customer_uuid" +KV_INSTANCE_DOMAIN_KEY = "instance_domain" KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 64cd005782ef..df1995c2c42c 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -112,6 +112,7 @@ from danswer.tools.built_in_tools import load_builtin_tools from danswer.tools.built_in_tools import refresh_built_in_tools_cache from danswer.utils.gpu_utils import gpu_status_request from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import get_or_generate_uuid from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation @@ -324,6 +325,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # fill up Postgres connection pools await warm_up_connections() + # We cache this at the beginning so there is no delay in the first telemtry + get_or_generate_uuid() + with Session(engine) as db_session: check_index_swap(db_session=db_session) search_settings = get_current_search_settings(db_session) diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index 80fcba65a169..d8a021877e6b 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -4,13 +4,20 @@ from enum import Enum from typing import cast import requests +from sqlalchemy.orm import Session from danswer.configs.app_configs import DISABLE_TELEMETRY +from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED from danswer.configs.constants import KV_CUSTOMER_UUID_KEY +from danswer.configs.constants import KV_INSTANCE_DOMAIN_KEY +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import User from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError -DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" +_DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" +_CACHED_UUID: str | None = None +_CACHED_INSTANCE_DOMAIN: str | None = None class RecordType(str, Enum): @@ -22,13 +29,42 @@ class RecordType(str, Enum): def get_or_generate_uuid() -> str: + global _CACHED_UUID + + if _CACHED_UUID is not None: + return _CACHED_UUID + kv_store = get_dynamic_config_store() + try: - return cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) + _CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) except ConfigNotFoundError: - customer_id = str(uuid.uuid4()) - kv_store.store(KV_CUSTOMER_UUID_KEY, customer_id, encrypt=True) - return customer_id + _CACHED_UUID = str(uuid.uuid4()) + kv_store.store(KV_CUSTOMER_UUID_KEY, _CACHED_UUID, encrypt=True) + + return _CACHED_UUID + + +def _get_or_generate_instance_domain() -> str | None: + global _CACHED_INSTANCE_DOMAIN + + if _CACHED_INSTANCE_DOMAIN is not None: + return _CACHED_INSTANCE_DOMAIN + + kv_store = get_dynamic_config_store() + + try: + _CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY)) + except ConfigNotFoundError: + with Session(get_sqlalchemy_engine()) as db_session: + first_user = db_session.query(User).first() + if first_user: + _CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1] + kv_store.store( + KV_INSTANCE_DOMAIN_KEY, _CACHED_INSTANCE_DOMAIN, encrypt=True + ) + + return _CACHED_INSTANCE_DOMAIN def optional_telemetry( @@ -41,16 +77,19 @@ def optional_telemetry( def telemetry_logic() -> None: try: + customer_uuid = get_or_generate_uuid() payload = { "data": data, "record": record_type, # If None then it's a flow that doesn't include a user # For cases where the User itself is None, a string is provided instead "user_id": user_id, - "customer_uuid": get_or_generate_uuid(), + "customer_uuid": customer_uuid, } + if ENTERPRISE_EDITION_ENABLED: + payload["instance_domain"] = _get_or_generate_instance_domain() requests.post( - DANSWER_TELEMETRY_ENDPOINT, + _DANSWER_TELEMETRY_ENDPOINT, headers={"Content-Type": "application/json"}, json=payload, )