diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 04e75694a711..02234c30242b 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -184,7 +184,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): self, user: User, request: Optional[Request] = None ) -> None: logger.info(f"User {user.id} has registered.") - optional_telemetry(record_type=RecordType.SIGN_UP, data={"user": "create"}) + optional_telemetry( + record_type=RecordType.SIGN_UP, + data={"action": "create"}, + user_id=str(user.id), + ) async def on_after_forgot_password( self, user: User, token: str, request: Optional[Request] = None diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index 0c76ac34f20b..2bd7c7e5b80c 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -28,6 +28,7 @@ from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.utils import ChannelIdAdapter from danswer.danswerbot.slack.utils import fetch_userids_from_emails from danswer.danswerbot.slack.utils import respond_in_thread +from danswer.danswerbot.slack.utils import slack_usage_report from danswer.danswerbot.slack.utils import SlackRateLimiter from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine @@ -39,8 +40,6 @@ from danswer.search.models import BaseFilters from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.utils.logger import setup_logger -from danswer.utils.telemetry import optional_telemetry -from danswer.utils.telemetry import RecordType logger_base = setup_logger() @@ -121,8 +120,6 @@ def handle_message( is_bot_msg = message_info.is_bot_msg is_bot_dm = message_info.is_bot_dm - engine = get_sqlalchemy_engine() - document_set_names: list[str] | None = None persona = channel_config.persona if channel_config else None prompt = None @@ -215,12 +212,10 @@ def handle_message( action = "slack_tag_message" elif is_bot_dm: action = "slack_dm_message" - optional_telemetry( - record_type=RecordType.USAGE, - data={"action": action}, - ) - with Session(engine, expire_on_commit=False) as db_session: + slack_usage_report(action=action, sender_id=sender_id, client=client) + + with Session(get_sqlalchemy_engine()) as db_session: # This also handles creating the query event in postgres answer = get_search_answer( query_req=new_message_request, diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index b54c14a426a7..b4ebc05cc1fc 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -13,7 +13,9 @@ from slack_sdk import WebClient from slack_sdk.errors import SlackApiError from slack_sdk.models.blocks import Block from slack_sdk.models.metadata import Metadata +from sqlalchemy.orm import Session +from danswer.configs.app_configs import DISABLE_TELEMETRY from danswer.configs.constants import ID_SEPARATOR from danswer.configs.constants import MessageType from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_QPM @@ -23,8 +25,12 @@ from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import SlackTextCleaner from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.tokens import fetch_tokens +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.users import get_user_by_email from danswer.one_shot_answer.models import ThreadMessage from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import optional_telemetry +from danswer.utils.telemetry import RecordType from danswer.utils.text_processing import replace_whitespaces_w_space logger = setup_logger() @@ -353,6 +359,28 @@ def read_slack_thread( return thread_messages +def slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> None: + if DISABLE_TELEMETRY: + return + + danswer_user = None + sender_email = None + try: + sender_email = client.users_info(user=sender_id).data["user"]["profile"]["email"] # type: ignore + except Exception: + logger.warning("Unable to find sender email") + + if sender_email is not None: + with Session(get_sqlalchemy_engine()) as db_session: + danswer_user = get_user_by_email(email=sender_email, db_session=db_session) + + optional_telemetry( + record_type=RecordType.USAGE, + data={"action": action}, + user_id=str(danswer_user.id) if danswer_user else "Non-Danswer-Or-No-Auth-User", + ) + + class SlackRateLimiter: def __init__(self) -> None: self.max_qpm: int | None = DANSWER_BOT_MAX_QPM diff --git a/backend/danswer/db/users.py b/backend/danswer/db/users.py index c5f5a5c73099..fa15aa4b789b 100644 --- a/backend/danswer/db/users.py +++ b/backend/danswer/db/users.py @@ -10,3 +10,9 @@ def list_users(db_session: Session) -> Sequence[User]: """List all users. No pagination as of now, as the # of users is assumed to be relatively small (<< 1 million)""" return db_session.scalars(select(User)).unique().all() + + +def get_user_by_email(email: str, db_session: Session) -> User | None: + user = db_session.query(User).filter(User.email == email).first() # type: ignore + + return user diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index 6079ddc14e11..65e9f4709fe5 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -31,7 +31,9 @@ def get_or_generate_uuid() -> str: return customer_id -def optional_telemetry(record_type: RecordType, data: dict) -> None: +def optional_telemetry( + record_type: RecordType, data: dict, user_id: str | None = None +) -> None: if DISABLE_TELEMETRY: return @@ -42,6 +44,9 @@ def optional_telemetry(record_type: RecordType, data: dict) -> None: payload = { "data": data, "record": record_type, + # If None then it's a flow that doesn't include a user + # For cases where the User itself is None, a string is provided instead + "user_id": user_id, "customer_uuid": get_or_generate_uuid(), } requests.post( diff --git a/backend/danswer/utils/timing.py b/backend/danswer/utils/timing.py index 22aa52f64b2e..664656aa799a 100644 --- a/backend/danswer/utils/timing.py +++ b/backend/danswer/utils/timing.py @@ -24,6 +24,7 @@ def log_function_time( @wraps(func) def wrapped_func(*args: Any, **kwargs: Any) -> Any: start_time = time.time() + user = kwargs.get("user") result = func(*args, **kwargs) elapsed_time_str = str(time.time() - start_time) log_name = func_name or func.__name__ @@ -33,6 +34,7 @@ def log_function_time( optional_telemetry( record_type=RecordType.LATENCY, data={"function": log_name, "latency": str(elapsed_time_str)}, + user_id=str(user.id) if user else "Unknown", ) return result @@ -49,6 +51,7 @@ def log_generator_function_time( @wraps(func) def wrapped_func(*args: Any, **kwargs: Any) -> Any: start_time = time.time() + user = kwargs.get("user") gen = func(*args, **kwargs) try: value = next(gen) @@ -65,6 +68,7 @@ def log_generator_function_time( optional_telemetry( record_type=RecordType.LATENCY, data={"function": log_name, "latency": str(elapsed_time_str)}, + user_id=str(user.id) if user else "Unknown", ) return cast(FG, wrapped_func)