Anonymous User Telem (#1041)

This commit is contained in:
Yuhong Sun
2024-02-04 13:41:19 -08:00
committed by GitHub
parent e54ce779fd
commit b3b88f05d3
6 changed files with 53 additions and 11 deletions

View File

@@ -184,7 +184,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
self, user: User, request: Optional[Request] = None
) -> None:
logger.info(f"User {user.id} has registered.")
optional_telemetry(record_type=RecordType.SIGN_UP, data={"user": "create"})
optional_telemetry(
record_type=RecordType.SIGN_UP,
data={"action": "create"},
user_id=str(user.id),
)
async def on_after_forgot_password(
self, user: User, token: str, request: Optional[Request] = None

View File

@@ -28,6 +28,7 @@ from danswer.danswerbot.slack.models import SlackMessageInfo
from danswer.danswerbot.slack.utils import ChannelIdAdapter
from danswer.danswerbot.slack.utils import fetch_userids_from_emails
from danswer.danswerbot.slack.utils import respond_in_thread
from danswer.danswerbot.slack.utils import slack_usage_report
from danswer.danswerbot.slack.utils import SlackRateLimiter
from danswer.danswerbot.slack.utils import update_emote_react
from danswer.db.engine import get_sqlalchemy_engine
@@ -39,8 +40,6 @@ from danswer.search.models import BaseFilters
from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
logger_base = setup_logger()
@@ -121,8 +120,6 @@ def handle_message(
is_bot_msg = message_info.is_bot_msg
is_bot_dm = message_info.is_bot_dm
engine = get_sqlalchemy_engine()
document_set_names: list[str] | None = None
persona = channel_config.persona if channel_config else None
prompt = None
@@ -215,12 +212,10 @@ def handle_message(
action = "slack_tag_message"
elif is_bot_dm:
action = "slack_dm_message"
optional_telemetry(
record_type=RecordType.USAGE,
data={"action": action},
)
with Session(engine, expire_on_commit=False) as db_session:
slack_usage_report(action=action, sender_id=sender_id, client=client)
with Session(get_sqlalchemy_engine()) as db_session:
# This also handles creating the query event in postgres
answer = get_search_answer(
query_req=new_message_request,

View File

@@ -13,7 +13,9 @@ from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.models.blocks import Block
from slack_sdk.models.metadata import Metadata
from sqlalchemy.orm import Session
from danswer.configs.app_configs import DISABLE_TELEMETRY
from danswer.configs.constants import ID_SEPARATOR
from danswer.configs.constants import MessageType
from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_QPM
@@ -23,8 +25,12 @@ from danswer.connectors.slack.utils import make_slack_api_rate_limited
from danswer.connectors.slack.utils import SlackTextCleaner
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
from danswer.danswerbot.slack.tokens import fetch_tokens
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.users import get_user_by_email
from danswer.one_shot_answer.models import ThreadMessage
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
from danswer.utils.text_processing import replace_whitespaces_w_space
logger = setup_logger()
@@ -353,6 +359,28 @@ def read_slack_thread(
return thread_messages
def slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> None:
if DISABLE_TELEMETRY:
return
danswer_user = None
sender_email = None
try:
sender_email = client.users_info(user=sender_id).data["user"]["profile"]["email"] # type: ignore
except Exception:
logger.warning("Unable to find sender email")
if sender_email is not None:
with Session(get_sqlalchemy_engine()) as db_session:
danswer_user = get_user_by_email(email=sender_email, db_session=db_session)
optional_telemetry(
record_type=RecordType.USAGE,
data={"action": action},
user_id=str(danswer_user.id) if danswer_user else "Non-Danswer-Or-No-Auth-User",
)
class SlackRateLimiter:
def __init__(self) -> None:
self.max_qpm: int | None = DANSWER_BOT_MAX_QPM

View File

@@ -10,3 +10,9 @@ def list_users(db_session: Session) -> Sequence[User]:
"""List all users. No pagination as of now, as the # of users
is assumed to be relatively small (<< 1 million)"""
return db_session.scalars(select(User)).unique().all()
def get_user_by_email(email: str, db_session: Session) -> User | None:
user = db_session.query(User).filter(User.email == email).first() # type: ignore
return user

View File

@@ -31,7 +31,9 @@ def get_or_generate_uuid() -> str:
return customer_id
def optional_telemetry(record_type: RecordType, data: dict) -> None:
def optional_telemetry(
record_type: RecordType, data: dict, user_id: str | None = None
) -> None:
if DISABLE_TELEMETRY:
return
@@ -42,6 +44,9 @@ def optional_telemetry(record_type: RecordType, data: dict) -> None:
payload = {
"data": data,
"record": record_type,
# If None then it's a flow that doesn't include a user
# For cases where the User itself is None, a string is provided instead
"user_id": user_id,
"customer_uuid": get_or_generate_uuid(),
}
requests.post(

View File

@@ -24,6 +24,7 @@ def log_function_time(
@wraps(func)
def wrapped_func(*args: Any, **kwargs: Any) -> Any:
start_time = time.time()
user = kwargs.get("user")
result = func(*args, **kwargs)
elapsed_time_str = str(time.time() - start_time)
log_name = func_name or func.__name__
@@ -33,6 +34,7 @@ def log_function_time(
optional_telemetry(
record_type=RecordType.LATENCY,
data={"function": log_name, "latency": str(elapsed_time_str)},
user_id=str(user.id) if user else "Unknown",
)
return result
@@ -49,6 +51,7 @@ def log_generator_function_time(
@wraps(func)
def wrapped_func(*args: Any, **kwargs: Any) -> Any:
start_time = time.time()
user = kwargs.get("user")
gen = func(*args, **kwargs)
try:
value = next(gen)
@@ -65,6 +68,7 @@ def log_generator_function_time(
optional_telemetry(
record_type=RecordType.LATENCY,
data={"function": log_name, "latency": str(elapsed_time_str)},
user_id=str(user.id) if user else "Unknown",
)
return cast(FG, wrapped_func)