diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index c02ee7a46a..6b0cfe57b4 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -48,6 +48,8 @@ from danswer.db.engine import get_session from danswer.db.models import AccessToken from danswer.db.models import User from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import optional_telemetry +from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation @@ -170,6 +172,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): self, user: User, request: Optional[Request] = None ) -> None: logger.info(f"User {user.id} has registered.") + optional_telemetry(record_type=RecordType.SIGN_UP, data={"user": "create"}) async def on_after_forgot_password( self, user: User, token: str, request: Optional[Request] = None diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 8c731e9d81..dc7faeb1f4 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -238,5 +238,7 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default LOG_ALL_MODEL_INTERACTIONS = ( os.environ.get("LOG_ALL_MODEL_INTERACTIONS", "").lower() == "true" ) +# Anonymous usage telemetry +DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true" # notset, debug, info, warning, error, or critical LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index ce67f84d4c..527b151639 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -13,7 +13,8 @@ from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.server.models import ConnectorCredentialPairIdentifier from danswer.utils.logger import setup_logger - +from danswer.utils.telemetry import optional_telemetry +from danswer.utils.telemetry import RecordType logger = setup_logger() @@ -88,6 +89,9 @@ def mark_attempt_failed( db_session.add(index_attempt) db_session.commit() + source = index_attempt.connector.source + optional_telemetry(record_type=RecordType.FAILURE, data={"connector": source}) + def update_docs_indexed( db_session: Session, diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 11131a1042..7e37bb0970 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -48,6 +48,8 @@ from danswer.server.slack_bot_management import router as slack_bot_management_r from danswer.server.state import router as state_router from danswer.server.users import router as user_router from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import optional_telemetry +from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation @@ -212,6 +214,10 @@ def get_application() -> FastAPI: logger.info("Verifying Document Index(s) is/are available.") get_default_document_index().ensure_indices_exist() + optional_telemetry( + record_type=RecordType.VERSION, data={"version": __version__} + ) + application.add_middleware( CORSMiddleware, allow_origins=["*"], # Change this to the list of allowed origins if needed diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py new file mode 100644 index 0000000000..8311ce2823 --- /dev/null +++ b/backend/danswer/utils/telemetry.py @@ -0,0 +1,56 @@ +import threading +import uuid +from enum import Enum +from typing import cast + +import requests + +from danswer.configs.app_configs import DISABLE_TELEMETRY +from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError + +CUSTOMER_UUID_KEY = "customer_uuid" +DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" + + +class RecordType(str, Enum): + VERSION = "version" + SIGN_UP = "sign_up" + LATENCY = "latency" + FAILURE = "failure" + + +def get_or_generate_uuid() -> str: + kv_store = get_dynamic_config_store() + try: + return cast(str, kv_store.load(CUSTOMER_UUID_KEY)) + except ConfigNotFoundError: + customer_id = str(uuid.uuid4()) + kv_store.store(CUSTOMER_UUID_KEY, customer_id) + return customer_id + + +def optional_telemetry(record_type: RecordType, data: dict) -> None: + if DISABLE_TELEMETRY: + return + + try: + + def telemetry_logic() -> None: + payload = { + "data": data, + "record": record_type, + "customer_uuid": get_or_generate_uuid(), + } + requests.post( + DANSWER_TELEMETRY_ENDPOINT, + headers={"Content-Type": "application/json"}, + json=payload, + ) + + # Run in separate thread to have minimal overhead in main flows + thread = threading.Thread(target=telemetry_logic, daemon=True) + thread.start() + except Exception: + # Should never interfere with normal functions of Danswer + pass diff --git a/backend/danswer/utils/timing.py b/backend/danswer/utils/timing.py index f01ec33c89..192cf1dc62 100644 --- a/backend/danswer/utils/timing.py +++ b/backend/danswer/utils/timing.py @@ -8,6 +8,8 @@ from typing import cast from typing import TypeVar from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import optional_telemetry +from danswer.utils.telemetry import RecordType logger = setup_logger() @@ -21,8 +23,13 @@ def log_function_time(func_name: str | None = None) -> Callable[[F], F]: def wrapped_func(*args: Any, **kwargs: Any) -> Any: start_time = time.time() result = func(*args, **kwargs) - elapsed_time = time.time() - start_time - logger.info(f"{func_name or func.__name__} took {elapsed_time} seconds") + elapsed_time_str = str(time.time() - start_time) + log_name = func_name or func.__name__ + logger.info(f"{log_name} took {elapsed_time_str} seconds") + optional_telemetry( + record_type=RecordType.LATENCY, + data={"function": log_name, "latency": str(elapsed_time_str)}, + ) return result return cast(F, wrapped_func) @@ -44,8 +51,13 @@ def log_generator_function_time(func_name: str | None = None) -> Callable[[FG], except StopIteration: pass finally: - elapsed_time = time.time() - start_time - logger.info(f"{func_name or func.__name__} took {elapsed_time} seconds") + elapsed_time_str = str(time.time() - start_time) + log_name = func_name or func.__name__ + logger.info(f"{log_name} took {elapsed_time_str} seconds") + optional_telemetry( + record_type=RecordType.LATENCY, + data={"function": log_name, "latency": str(elapsed_time_str)}, + ) return cast(FG, wrapped_func) diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index caaebd23b9..df2f999528 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -48,6 +48,9 @@ services: - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} + # Leave this on pretty please? Nothing sensitive is collected! + # https://docs.danswer.dev/more/telemetry + - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} volumes: