mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-08-03 13:43:18 +02:00
Fix airtable connector w/ mt cloud + move telem logic to match new st… (#3868)
* Fix airtable connector w/ mt cloud + move telem logic to match new standard * Address Greptile comment * Small fixes/improvements * Revert back monitoring frequency * Small monitoring fix
This commit is contained in:
@@ -39,6 +39,7 @@ from onyx.redis.redis_pool import get_redis_client
|
|||||||
from onyx.redis.redis_pool import redis_lock_dump
|
from onyx.redis.redis_pool import redis_lock_dump
|
||||||
from onyx.utils.telemetry import optional_telemetry
|
from onyx.utils.telemetry import optional_telemetry
|
||||||
from onyx.utils.telemetry import RecordType
|
from onyx.utils.telemetry import RecordType
|
||||||
|
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||||
|
|
||||||
|
|
||||||
_MONITORING_SOFT_TIME_LIMIT = 60 * 5 # 5 minutes
|
_MONITORING_SOFT_TIME_LIMIT = 60 * 5 # 5 minutes
|
||||||
@@ -657,6 +658,9 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
|
|||||||
- Syncing speed metrics
|
- Syncing speed metrics
|
||||||
- Worker status and task counts
|
- Worker status and task counts
|
||||||
"""
|
"""
|
||||||
|
if tenant_id is not None:
|
||||||
|
CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
|
||||||
|
|
||||||
task_logger.info("Starting background monitoring")
|
task_logger.info("Starting background monitoring")
|
||||||
r = get_redis_client(tenant_id=tenant_id)
|
r = get_redis_client(tenant_id=tenant_id)
|
||||||
|
|
||||||
@@ -688,11 +692,13 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
|
|||||||
metrics = metric_fn()
|
metrics = metric_fn()
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
# double check to make sure we aren't double-emitting metrics
|
# double check to make sure we aren't double-emitting metrics
|
||||||
if metric.key is not None and not _has_metric_been_emitted(
|
if metric.key is None or not _has_metric_been_emitted(
|
||||||
redis_std, metric.key
|
redis_std, metric.key
|
||||||
):
|
):
|
||||||
metric.log()
|
metric.log()
|
||||||
metric.emit(tenant_id)
|
metric.emit(tenant_id)
|
||||||
|
|
||||||
|
if metric.key is not None:
|
||||||
_mark_metric_as_emitted(redis_std, metric.key)
|
_mark_metric_as_emitted(redis_std, metric.key)
|
||||||
|
|
||||||
task_logger.info("Successfully collected background metrics")
|
task_logger.info("Successfully collected background metrics")
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
|
import contextvars
|
||||||
from concurrent.futures import as_completed
|
from concurrent.futures import as_completed
|
||||||
|
from concurrent.futures import Future
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -347,15 +349,19 @@ class AirtableConnector(LoadConnector):
|
|||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
# Submit batch tasks
|
# Submit batch tasks
|
||||||
future_to_record = {
|
future_to_record: dict[Future, RecordDict] = {}
|
||||||
executor.submit(
|
for record in batch_records:
|
||||||
self._process_record,
|
# Capture the current context so that the thread gets the current tenant ID
|
||||||
record=record,
|
current_context = contextvars.copy_context()
|
||||||
table_schema=table_schema,
|
future_to_record[
|
||||||
primary_field_name=primary_field_name,
|
executor.submit(
|
||||||
): record
|
current_context.run,
|
||||||
for record in batch_records
|
self._process_record,
|
||||||
}
|
record=record,
|
||||||
|
table_schema=table_schema,
|
||||||
|
primary_field_name=primary_field_name,
|
||||||
|
)
|
||||||
|
] = record
|
||||||
|
|
||||||
# Wait for all tasks in this batch to complete
|
# Wait for all tasks in this batch to complete
|
||||||
for future in as_completed(future_to_record):
|
for future in as_completed(future_to_record):
|
||||||
|
@@ -2,7 +2,7 @@ from onyx.key_value_store.interface import KeyValueStore
|
|||||||
from onyx.key_value_store.store import PgRedisKVStore
|
from onyx.key_value_store.store import PgRedisKVStore
|
||||||
|
|
||||||
|
|
||||||
def get_kv_store(tenant_id: str | None = None) -> KeyValueStore:
|
def get_kv_store() -> KeyValueStore:
|
||||||
# In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in
|
# In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in
|
||||||
# It's read from the global thread level variable
|
# It's read from the global thread level variable
|
||||||
return PgRedisKVStore(tenant_id=tenant_id)
|
return PgRedisKVStore()
|
||||||
|
@@ -18,7 +18,7 @@ from onyx.utils.logger import setup_logger
|
|||||||
from onyx.utils.special_types import JSON_ro
|
from onyx.utils.special_types import JSON_ro
|
||||||
from shared_configs.configs import MULTI_TENANT
|
from shared_configs.configs import MULTI_TENANT
|
||||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
from shared_configs.contextvars import get_current_tenant_id
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
@@ -28,10 +28,8 @@ KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24 # 1 Day
|
|||||||
|
|
||||||
|
|
||||||
class PgRedisKVStore(KeyValueStore):
|
class PgRedisKVStore(KeyValueStore):
|
||||||
def __init__(
|
def __init__(self, redis_client: Redis | None = None) -> None:
|
||||||
self, redis_client: Redis | None = None, tenant_id: str | None = None
|
self.tenant_id = get_current_tenant_id()
|
||||||
) -> None:
|
|
||||||
self.tenant_id = tenant_id or CURRENT_TENANT_ID_CONTEXTVAR.get()
|
|
||||||
|
|
||||||
# If no redis_client is provided, fall back to the context var
|
# If no redis_client is provided, fall back to the context var
|
||||||
if redis_client is not None:
|
if redis_client is not None:
|
||||||
|
@@ -109,7 +109,9 @@ from onyx.utils.variable_functionality import global_version
|
|||||||
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
|
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
|
||||||
from shared_configs.configs import CORS_ALLOWED_ORIGIN
|
from shared_configs.configs import CORS_ALLOWED_ORIGIN
|
||||||
from shared_configs.configs import MULTI_TENANT
|
from shared_configs.configs import MULTI_TENANT
|
||||||
|
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||||
from shared_configs.configs import SENTRY_DSN
|
from shared_configs.configs import SENTRY_DSN
|
||||||
|
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
@@ -212,7 +214,8 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|||||||
|
|
||||||
if not MULTI_TENANT:
|
if not MULTI_TENANT:
|
||||||
# We cache this at the beginning so there is no delay in the first telemetry
|
# We cache this at the beginning so there is no delay in the first telemetry
|
||||||
get_or_generate_uuid(tenant_id=None)
|
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
|
||||||
|
get_or_generate_uuid()
|
||||||
|
|
||||||
# If we are multi-tenant, we need to only set up initial public tables
|
# If we are multi-tenant, we need to only set up initial public tables
|
||||||
with Session(engine) as db_session:
|
with Session(engine) as db_session:
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
import contextvars
|
||||||
import threading
|
import threading
|
||||||
import uuid
|
import uuid
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
@@ -41,7 +42,7 @@ def _get_or_generate_customer_id_mt(tenant_id: str) -> str:
|
|||||||
return str(uuid.uuid5(uuid.NAMESPACE_X500, tenant_id))
|
return str(uuid.uuid5(uuid.NAMESPACE_X500, tenant_id))
|
||||||
|
|
||||||
|
|
||||||
def get_or_generate_uuid(tenant_id: str | None) -> str:
|
def get_or_generate_uuid() -> str:
|
||||||
# TODO: split out the whole "instance UUID" generation logic into a separate
|
# TODO: split out the whole "instance UUID" generation logic into a separate
|
||||||
# utility function. Telemetry should not be aware at all of how the UUID is
|
# utility function. Telemetry should not be aware at all of how the UUID is
|
||||||
# generated/stored.
|
# generated/stored.
|
||||||
@@ -52,7 +53,7 @@ def get_or_generate_uuid(tenant_id: str | None) -> str:
|
|||||||
if _CACHED_UUID is not None:
|
if _CACHED_UUID is not None:
|
||||||
return _CACHED_UUID
|
return _CACHED_UUID
|
||||||
|
|
||||||
kv_store = get_kv_store(tenant_id=tenant_id)
|
kv_store = get_kv_store()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY))
|
_CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY))
|
||||||
@@ -63,18 +64,18 @@ def get_or_generate_uuid(tenant_id: str | None) -> str:
|
|||||||
return _CACHED_UUID
|
return _CACHED_UUID
|
||||||
|
|
||||||
|
|
||||||
def _get_or_generate_instance_domain(tenant_id: str | None = None) -> str | None: #
|
def _get_or_generate_instance_domain() -> str | None: #
|
||||||
global _CACHED_INSTANCE_DOMAIN
|
global _CACHED_INSTANCE_DOMAIN
|
||||||
|
|
||||||
if _CACHED_INSTANCE_DOMAIN is not None:
|
if _CACHED_INSTANCE_DOMAIN is not None:
|
||||||
return _CACHED_INSTANCE_DOMAIN
|
return _CACHED_INSTANCE_DOMAIN
|
||||||
|
|
||||||
kv_store = get_kv_store(tenant_id=tenant_id)
|
kv_store = get_kv_store()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY))
|
_CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY))
|
||||||
except KvKeyNotFoundError:
|
except KvKeyNotFoundError:
|
||||||
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
|
with get_session_with_tenant() as db_session:
|
||||||
first_user = db_session.query(User).first()
|
first_user = db_session.query(User).first()
|
||||||
if first_user:
|
if first_user:
|
||||||
_CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1]
|
_CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1]
|
||||||
@@ -103,7 +104,7 @@ def optional_telemetry(
|
|||||||
customer_uuid = (
|
customer_uuid = (
|
||||||
_get_or_generate_customer_id_mt(tenant_id)
|
_get_or_generate_customer_id_mt(tenant_id)
|
||||||
if MULTI_TENANT
|
if MULTI_TENANT
|
||||||
else get_or_generate_uuid(tenant_id)
|
else get_or_generate_uuid()
|
||||||
)
|
)
|
||||||
payload = {
|
payload = {
|
||||||
"data": data,
|
"data": data,
|
||||||
@@ -115,9 +116,7 @@ def optional_telemetry(
|
|||||||
"is_cloud": MULTI_TENANT,
|
"is_cloud": MULTI_TENANT,
|
||||||
}
|
}
|
||||||
if ENTERPRISE_EDITION_ENABLED:
|
if ENTERPRISE_EDITION_ENABLED:
|
||||||
payload["instance_domain"] = _get_or_generate_instance_domain(
|
payload["instance_domain"] = _get_or_generate_instance_domain()
|
||||||
tenant_id
|
|
||||||
)
|
|
||||||
requests.post(
|
requests.post(
|
||||||
_DANSWER_TELEMETRY_ENDPOINT,
|
_DANSWER_TELEMETRY_ENDPOINT,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
@@ -128,8 +127,12 @@ def optional_telemetry(
|
|||||||
# This way it silences all thread level logging as well
|
# This way it silences all thread level logging as well
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Run in separate thread to have minimal overhead in main flows
|
# Run in separate thread with the same context as the current thread
|
||||||
thread = threading.Thread(target=telemetry_logic, daemon=True)
|
# This is to ensure that the thread gets the current tenant ID
|
||||||
|
current_context = contextvars.copy_context()
|
||||||
|
thread = threading.Thread(
|
||||||
|
target=lambda: current_context.run(telemetry_logic), daemon=True
|
||||||
|
)
|
||||||
thread.start()
|
thread.start()
|
||||||
except Exception:
|
except Exception:
|
||||||
# Should never interfere with normal functions of Onyx
|
# Should never interfere with normal functions of Onyx
|
||||||
|
@@ -6,3 +6,13 @@ from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
|||||||
CURRENT_TENANT_ID_CONTEXTVAR = contextvars.ContextVar(
|
CURRENT_TENANT_ID_CONTEXTVAR = contextvars.ContextVar(
|
||||||
"current_tenant_id", default=POSTGRES_DEFAULT_SCHEMA
|
"current_tenant_id", default=POSTGRES_DEFAULT_SCHEMA
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
"""Utils related to contextvars"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_tenant_id() -> str:
|
||||||
|
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
|
||||||
|
if tenant_id is None:
|
||||||
|
raise RuntimeError("Tenant ID is not set. This should never happen.")
|
||||||
|
return tenant_id
|
||||||
|
Reference in New Issue
Block a user