Added instance domain to telemetry (#2310)

This commit is contained in:
hagen-danswer
2024-09-03 21:04:40 -07:00
committed by GitHub
parent 2b7915f33b
commit c358c91e4c
4 changed files with 52 additions and 7 deletions

View File

@@ -94,6 +94,7 @@ jobs:
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \ AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \ REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=it \ IMAGE_TAG=it \
docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build
id: start_docker id: start_docker

View File

@@ -57,6 +57,7 @@ KV_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key"
KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time"
KV_SETTINGS_KEY = "danswer_settings" KV_SETTINGS_KEY = "danswer_settings"
KV_CUSTOMER_UUID_KEY = "customer_uuid" KV_CUSTOMER_UUID_KEY = "customer_uuid"
KV_INSTANCE_DOMAIN_KEY = "instance_domain"
KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings"
KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__"

View File

@@ -112,6 +112,7 @@ from danswer.tools.built_in_tools import load_builtin_tools
from danswer.tools.built_in_tools import refresh_built_in_tools_cache from danswer.tools.built_in_tools import refresh_built_in_tools_cache
from danswer.utils.gpu_utils import gpu_status_request from danswer.utils.gpu_utils import gpu_status_request
from danswer.utils.logger import setup_logger from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import get_or_generate_uuid
from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType from danswer.utils.telemetry import RecordType
from danswer.utils.variable_functionality import fetch_versioned_implementation from danswer.utils.variable_functionality import fetch_versioned_implementation
@@ -324,6 +325,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
# fill up Postgres connection pools # fill up Postgres connection pools
await warm_up_connections() await warm_up_connections()
# We cache this at the beginning so there is no delay in the first telemtry
get_or_generate_uuid()
with Session(engine) as db_session: with Session(engine) as db_session:
check_index_swap(db_session=db_session) check_index_swap(db_session=db_session)
search_settings = get_current_search_settings(db_session) search_settings = get_current_search_settings(db_session)

View File

@@ -4,13 +4,20 @@ from enum import Enum
from typing import cast from typing import cast
import requests import requests
from sqlalchemy.orm import Session
from danswer.configs.app_configs import DISABLE_TELEMETRY from danswer.configs.app_configs import DISABLE_TELEMETRY
from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED
from danswer.configs.constants import KV_CUSTOMER_UUID_KEY from danswer.configs.constants import KV_CUSTOMER_UUID_KEY
from danswer.configs.constants import KV_INSTANCE_DOMAIN_KEY
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import User
from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.factory import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import ConfigNotFoundError
DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" _DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry"
_CACHED_UUID: str | None = None
_CACHED_INSTANCE_DOMAIN: str | None = None
class RecordType(str, Enum): class RecordType(str, Enum):
@@ -22,13 +29,42 @@ class RecordType(str, Enum):
def get_or_generate_uuid() -> str: def get_or_generate_uuid() -> str:
global _CACHED_UUID
if _CACHED_UUID is not None:
return _CACHED_UUID
kv_store = get_dynamic_config_store() kv_store = get_dynamic_config_store()
try: try:
return cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) _CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY))
except ConfigNotFoundError: except ConfigNotFoundError:
customer_id = str(uuid.uuid4()) _CACHED_UUID = str(uuid.uuid4())
kv_store.store(KV_CUSTOMER_UUID_KEY, customer_id, encrypt=True) kv_store.store(KV_CUSTOMER_UUID_KEY, _CACHED_UUID, encrypt=True)
return customer_id
return _CACHED_UUID
def _get_or_generate_instance_domain() -> str | None:
global _CACHED_INSTANCE_DOMAIN
if _CACHED_INSTANCE_DOMAIN is not None:
return _CACHED_INSTANCE_DOMAIN
kv_store = get_dynamic_config_store()
try:
_CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY))
except ConfigNotFoundError:
with Session(get_sqlalchemy_engine()) as db_session:
first_user = db_session.query(User).first()
if first_user:
_CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1]
kv_store.store(
KV_INSTANCE_DOMAIN_KEY, _CACHED_INSTANCE_DOMAIN, encrypt=True
)
return _CACHED_INSTANCE_DOMAIN
def optional_telemetry( def optional_telemetry(
@@ -41,16 +77,19 @@ def optional_telemetry(
def telemetry_logic() -> None: def telemetry_logic() -> None:
try: try:
customer_uuid = get_or_generate_uuid()
payload = { payload = {
"data": data, "data": data,
"record": record_type, "record": record_type,
# If None then it's a flow that doesn't include a user # If None then it's a flow that doesn't include a user
# For cases where the User itself is None, a string is provided instead # For cases where the User itself is None, a string is provided instead
"user_id": user_id, "user_id": user_id,
"customer_uuid": get_or_generate_uuid(), "customer_uuid": customer_uuid,
} }
if ENTERPRISE_EDITION_ENABLED:
payload["instance_domain"] = _get_or_generate_instance_domain()
requests.post( requests.post(
DANSWER_TELEMETRY_ENDPOINT, _DANSWER_TELEMETRY_ENDPOINT,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
json=payload, json=payload,
) )