Optional Anonymous Telemetry (#727)

This commit is contained in:
Yuhong Sun 2023-11-16 09:22:36 -08:00 committed by GitHub
parent 0ed8f14015
commit be5ef77896
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 91 additions and 5 deletions

View File

@ -48,6 +48,8 @@ from danswer.db.engine import get_session
from danswer.db.models import AccessToken
from danswer.db.models import User
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
from danswer.utils.variable_functionality import fetch_versioned_implementation
@ -170,6 +172,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
self, user: User, request: Optional[Request] = None
) -> None:
logger.info(f"User {user.id} has registered.")
optional_telemetry(record_type=RecordType.SIGN_UP, data={"user": "create"})
async def on_after_forgot_password(
self, user: User, token: str, request: Optional[Request] = None

View File

@ -238,5 +238,7 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default
LOG_ALL_MODEL_INTERACTIONS = (
os.environ.get("LOG_ALL_MODEL_INTERACTIONS", "").lower() == "true"
)
# Anonymous usage telemetry
DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
# notset, debug, info, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")

View File

@ -13,7 +13,8 @@ from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.server.models import ConnectorCredentialPairIdentifier
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
logger = setup_logger()
@ -88,6 +89,9 @@ def mark_attempt_failed(
db_session.add(index_attempt)
db_session.commit()
source = index_attempt.connector.source
optional_telemetry(record_type=RecordType.FAILURE, data={"connector": source})
def update_docs_indexed(
db_session: Session,

View File

@ -48,6 +48,8 @@ from danswer.server.slack_bot_management import router as slack_bot_management_r
from danswer.server.state import router as state_router
from danswer.server.users import router as user_router
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
from danswer.utils.variable_functionality import fetch_versioned_implementation
@ -212,6 +214,10 @@ def get_application() -> FastAPI:
logger.info("Verifying Document Index(s) is/are available.")
get_default_document_index().ensure_indices_exist()
optional_telemetry(
record_type=RecordType.VERSION, data={"version": __version__}
)
application.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Change this to the list of allowed origins if needed

View File

@ -0,0 +1,56 @@
import threading
import uuid
from enum import Enum
from typing import cast
import requests
from danswer.configs.app_configs import DISABLE_TELEMETRY
from danswer.dynamic_configs import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError
CUSTOMER_UUID_KEY = "customer_uuid"
DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry"
class RecordType(str, Enum):
VERSION = "version"
SIGN_UP = "sign_up"
LATENCY = "latency"
FAILURE = "failure"
def get_or_generate_uuid() -> str:
kv_store = get_dynamic_config_store()
try:
return cast(str, kv_store.load(CUSTOMER_UUID_KEY))
except ConfigNotFoundError:
customer_id = str(uuid.uuid4())
kv_store.store(CUSTOMER_UUID_KEY, customer_id)
return customer_id
def optional_telemetry(record_type: RecordType, data: dict) -> None:
if DISABLE_TELEMETRY:
return
try:
def telemetry_logic() -> None:
payload = {
"data": data,
"record": record_type,
"customer_uuid": get_or_generate_uuid(),
}
requests.post(
DANSWER_TELEMETRY_ENDPOINT,
headers={"Content-Type": "application/json"},
json=payload,
)
# Run in separate thread to have minimal overhead in main flows
thread = threading.Thread(target=telemetry_logic, daemon=True)
thread.start()
except Exception:
# Should never interfere with normal functions of Danswer
pass

View File

@ -8,6 +8,8 @@ from typing import cast
from typing import TypeVar
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
logger = setup_logger()
@ -21,8 +23,13 @@ def log_function_time(func_name: str | None = None) -> Callable[[F], F]:
def wrapped_func(*args: Any, **kwargs: Any) -> Any:
start_time = time.time()
result = func(*args, **kwargs)
elapsed_time = time.time() - start_time
logger.info(f"{func_name or func.__name__} took {elapsed_time} seconds")
elapsed_time_str = str(time.time() - start_time)
log_name = func_name or func.__name__
logger.info(f"{log_name} took {elapsed_time_str} seconds")
optional_telemetry(
record_type=RecordType.LATENCY,
data={"function": log_name, "latency": str(elapsed_time_str)},
)
return result
return cast(F, wrapped_func)
@ -44,8 +51,13 @@ def log_generator_function_time(func_name: str | None = None) -> Callable[[FG],
except StopIteration:
pass
finally:
elapsed_time = time.time() - start_time
logger.info(f"{func_name or func.__name__} took {elapsed_time} seconds")
elapsed_time_str = str(time.time() - start_time)
log_name = func_name or func.__name__
logger.info(f"{log_name} took {elapsed_time_str} seconds")
optional_telemetry(
record_type=RecordType.LATENCY,
data={"function": log_name, "latency": str(elapsed_time_str)},
)
return cast(FG, wrapped_func)

View File

@ -48,6 +48,9 @@ services:
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
# Leave this on pretty please? Nothing sensitive is collected!
# https://docs.danswer.dev/more/telemetry
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
volumes: