Add is_cloud info to telemetry + get consistent customer_uuid's for a… (#3684)

* Add is_cloud info to telemetry + get consistent customer_uuid's for a given tenant

* Address Richard's comments
This commit is contained in:
Chris Weaver
2025-01-15 18:43:21 -08:00
committed by GitHub
parent 8a4d762798
commit a05addec19
4 changed files with 33 additions and 7 deletions

View File

@@ -67,7 +67,7 @@ class Metric(BaseModel):
}
task_logger.info(json.dumps(data))
def emit(self) -> None:
def emit(self, tenant_id: str | None) -> None:
# Convert value to appropriate type
float_value = (
float(self.value) if isinstance(self.value, (int, float)) else None
@@ -104,6 +104,7 @@ class Metric(BaseModel):
optional_telemetry(
record_type=RecordType.METRIC,
data=data,
tenant_id=tenant_id,
)
@@ -432,7 +433,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
metrics = metric_fn()
for metric in metrics:
metric.log()
metric.emit()
metric.emit(tenant_id)
if metric.key:
_mark_metric_as_emitted(redis_std, metric.key)

View File

@@ -215,7 +215,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
else:
setup_multitenant_onyx()
optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__})
if not MULTI_TENANT:
# don't emit a metric for every pod rollover/restart
optional_telemetry(
record_type=RecordType.VERSION, data={"version": __version__}
)
if AUTH_RATE_LIMITING_ENABLED:
await setup_auth_limiter()

View File

@@ -590,6 +590,7 @@ def slack_usage_report(
record_type=RecordType.USAGE,
data={"action": action},
user_id=str(onyx_user.id) if onyx_user else "Non-Onyx-Or-No-Auth-User",
tenant_id=tenant_id,
)

View File

@@ -21,6 +21,7 @@ from onyx.utils.variable_functionality import (
)
from onyx.utils.variable_functionality import noop_fallback
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
_DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.onyx.app/anonymous_telemetry"
_CACHED_UUID: str | None = None
@@ -36,7 +37,16 @@ class RecordType(str, Enum):
METRIC = "metric"
def get_or_generate_uuid() -> str:
def _get_or_generate_customer_id_mt(tenant_id: str) -> str:
return str(uuid.uuid5(uuid.NAMESPACE_X500, tenant_id))
def get_or_generate_uuid(tenant_id: str | None = None) -> str:
# TODO: split out the whole "instance UUID" generation logic into a separate
# utility function. Telemetry should not be aware at all of how the UUID is
# generated/stored.
# TODO: handle potential race condition for UUID generation. Doesn't matter for
# the telemetry case, but if this is used generally it should be handled.
global _CACHED_UUID
if _CACHED_UUID is not None:
@@ -53,7 +63,7 @@ def get_or_generate_uuid() -> str:
return _CACHED_UUID
def _get_or_generate_instance_domain() -> str | None:
def _get_or_generate_instance_domain() -> str | None: #
global _CACHED_INSTANCE_DOMAIN
if _CACHED_INSTANCE_DOMAIN is not None:
@@ -76,7 +86,10 @@ def _get_or_generate_instance_domain() -> str | None:
def optional_telemetry(
record_type: RecordType, data: dict, user_id: str | None = None
record_type: RecordType,
data: dict,
user_id: str | None = None,
tenant_id: str | None = None,
) -> None:
if DISABLE_TELEMETRY:
return
@@ -85,7 +98,13 @@ def optional_telemetry(
def telemetry_logic() -> None:
try:
customer_uuid = get_or_generate_uuid()
customer_uuid = (
_get_or_generate_customer_id_mt(
tenant_id or CURRENT_TENANT_ID_CONTEXTVAR.get()
)
if MULTI_TENANT
else get_or_generate_uuid()
)
payload = {
"data": data,
"record": record_type,
@@ -93,6 +112,7 @@ def optional_telemetry(
# For cases where the User itself is None, a string is provided instead
"user_id": user_id,
"customer_uuid": customer_uuid,
"is_cloud": MULTI_TENANT,
}
if ENTERPRISE_EDITION_ENABLED:
payload["instance_domain"] = _get_or_generate_instance_domain()