first cut at anonymizing query history (#4123)

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
This commit is contained in:
rkuo-danswer
2025-02-26 13:32:01 -08:00
committed by GitHub
parent ad0721ecd8
commit 25389c5120
8 changed files with 91 additions and 16 deletions

View File

@@ -2,6 +2,7 @@ import csv
import io import io
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from http import HTTPStatus
from uuid import UUID from uuid import UUID
from fastapi import APIRouter from fastapi import APIRouter
@@ -21,8 +22,10 @@ from ee.onyx.server.query_history.models import QuestionAnswerPairSnapshot
from onyx.auth.users import current_admin_user from onyx.auth.users import current_admin_user
from onyx.auth.users import get_display_email from onyx.auth.users import get_display_email
from onyx.chat.chat_utils import create_chat_chain from onyx.chat.chat_utils import create_chat_chain
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
from onyx.configs.constants import QAFeedbackType from onyx.configs.constants import QAFeedbackType
from onyx.configs.constants import QueryHistoryType
from onyx.configs.constants import SessionType from onyx.configs.constants import SessionType
from onyx.db.chat import get_chat_session_by_id from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_chat_sessions_by_user from onyx.db.chat import get_chat_sessions_by_user
@@ -35,6 +38,8 @@ from onyx.server.query_and_chat.models import ChatSessionsResponse
router = APIRouter() router = APIRouter()
ONYX_ANONYMIZED_EMAIL = "anonymous@anonymous.invalid"
def fetch_and_process_chat_session_history( def fetch_and_process_chat_session_history(
db_session: Session, db_session: Session,
@@ -107,6 +112,17 @@ def get_user_chat_sessions(
_: User | None = Depends(current_admin_user), _: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> ChatSessionsResponse: ) -> ChatSessionsResponse:
# we specifically don't allow this endpoint if "anonymized" since
# this is a direct query on the user id
if ONYX_QUERY_HISTORY_TYPE in [
QueryHistoryType.DISABLED,
QueryHistoryType.ANONYMIZED,
]:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="Per user query history has been disabled by the administrator.",
)
try: try:
chat_sessions = get_chat_sessions_by_user( chat_sessions = get_chat_sessions_by_user(
user_id=user_id, deleted=False, db_session=db_session, limit=0 user_id=user_id, deleted=False, db_session=db_session, limit=0
@@ -141,6 +157,12 @@ def get_chat_session_history(
_: User | None = Depends(current_admin_user), _: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> PaginatedReturn[ChatSessionMinimal]: ) -> PaginatedReturn[ChatSessionMinimal]:
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="Query history has been disabled by the administrator.",
)
page_of_chat_sessions = get_page_of_chat_sessions( page_of_chat_sessions = get_page_of_chat_sessions(
page_num=page_num, page_num=page_num,
page_size=page_size, page_size=page_size,
@@ -157,11 +179,16 @@ def get_chat_session_history(
feedback_filter=feedback_type, feedback_filter=feedback_type,
) )
minimal_chat_sessions: list[ChatSessionMinimal] = []
for chat_session in page_of_chat_sessions:
minimal_chat_session = ChatSessionMinimal.from_chat_session(chat_session)
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
minimal_chat_session.user_email = ONYX_ANONYMIZED_EMAIL
minimal_chat_sessions.append(minimal_chat_session)
return PaginatedReturn( return PaginatedReturn(
items=[ items=minimal_chat_sessions,
ChatSessionMinimal.from_chat_session(chat_session)
for chat_session in page_of_chat_sessions
],
total_items=total_filtered_chat_sessions_count, total_items=total_filtered_chat_sessions_count,
) )
@@ -172,6 +199,12 @@ def get_chat_session_admin(
_: User | None = Depends(current_admin_user), _: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> ChatSessionSnapshot: ) -> ChatSessionSnapshot:
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="Query history has been disabled by the administrator.",
)
try: try:
chat_session = get_chat_session_by_id( chat_session = get_chat_session_by_id(
chat_session_id=chat_session_id, chat_session_id=chat_session_id,
@@ -193,6 +226,9 @@ def get_chat_session_admin(
f"Could not create snapshot for chat session with id '{chat_session_id}'", f"Could not create snapshot for chat session with id '{chat_session_id}'",
) )
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
snapshot.user_email = ONYX_ANONYMIZED_EMAIL
return snapshot return snapshot
@@ -203,6 +239,12 @@ def get_query_history_as_csv(
end: datetime | None = None, end: datetime | None = None,
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> StreamingResponse: ) -> StreamingResponse:
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="Query history has been disabled by the administrator.",
)
complete_chat_session_history = fetch_and_process_chat_session_history( complete_chat_session_history = fetch_and_process_chat_session_history(
db_session=db_session, db_session=db_session,
start=start or datetime.fromtimestamp(0, tz=timezone.utc), start=start or datetime.fromtimestamp(0, tz=timezone.utc),
@@ -213,6 +255,9 @@ def get_query_history_as_csv(
question_answer_pairs: list[QuestionAnswerPairSnapshot] = [] question_answer_pairs: list[QuestionAnswerPairSnapshot] = []
for chat_session_snapshot in complete_chat_session_history: for chat_session_snapshot in complete_chat_session_history:
if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
chat_session_snapshot.user_email = ONYX_ANONYMIZED_EMAIL
question_answer_pairs.extend( question_answer_pairs.extend(
QuestionAnswerPairSnapshot.from_chat_session_snapshot(chat_session_snapshot) QuestionAnswerPairSnapshot.from_chat_session_snapshot(chat_session_snapshot)
) )

View File

@@ -6,6 +6,7 @@ from typing import cast
from onyx.auth.schemas import AuthBackend from onyx.auth.schemas import AuthBackend
from onyx.configs.constants import AuthType from onyx.configs.constants import AuthType
from onyx.configs.constants import DocumentIndexType from onyx.configs.constants import DocumentIndexType
from onyx.configs.constants import QueryHistoryType
from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy from onyx.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy
##### #####
@@ -29,6 +30,9 @@ GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
) # 1 day ) # 1 day
DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true" DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true"
ONYX_QUERY_HISTORY_TYPE = QueryHistoryType(
(os.environ.get("ONYX_QUERY_HISTORY_TYPE") or QueryHistoryType.NORMAL.value).lower()
)
##### #####
# Web Configs # Web Configs

View File

@@ -213,6 +213,12 @@ class AuthType(str, Enum):
CLOUD = "cloud" CLOUD = "cloud"
class QueryHistoryType(str, Enum):
DISABLED = "disabled"
ANONYMIZED = "anonymized"
NORMAL = "normal"
# Special characters for password validation # Special characters for password validation
PASSWORD_SPECIAL_CHARS = "!@#$%^&*()_+-=[]{}|;:,.<>?" PASSWORD_SPECIAL_CHARS = "!@#$%^&*()_+-=[]{}|;:,.<>?"

View File

@@ -4,6 +4,7 @@ from enum import Enum
from pydantic import BaseModel from pydantic import BaseModel
from onyx.configs.constants import NotificationType from onyx.configs.constants import NotificationType
from onyx.configs.constants import QueryHistoryType
from onyx.db.models import Notification as NotificationDBModel from onyx.db.models import Notification as NotificationDBModel
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
@@ -50,6 +51,7 @@ class Settings(BaseModel):
temperature_override_enabled: bool | None = False temperature_override_enabled: bool | None = False
auto_scroll: bool | None = False auto_scroll: bool | None = False
query_history_type: QueryHistoryType | None = None
class UserSettings(Settings): class UserSettings(Settings):

View File

@@ -1,3 +1,4 @@
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.constants import KV_SETTINGS_KEY from onyx.configs.constants import KV_SETTINGS_KEY
from onyx.configs.constants import OnyxRedisLocks from onyx.configs.constants import OnyxRedisLocks
from onyx.key_value_store.factory import get_kv_store from onyx.key_value_store.factory import get_kv_store
@@ -45,6 +46,7 @@ def load_settings() -> Settings:
anonymous_user_enabled = False anonymous_user_enabled = False
settings.anonymous_user_enabled = anonymous_user_enabled settings.anonymous_user_enabled = anonymous_user_enabled
settings.query_history_type = ONYX_QUERY_HISTORY_TYPE
return settings return settings

View File

@@ -4,6 +4,12 @@ export enum ApplicationStatus {
ACTIVE = "active", ACTIVE = "active",
} }
export enum QueryHistoryType {
DISABLED = "disabled",
ANONYMIZED = "anonymized",
NORMAL = "normal",
}
export interface Settings { export interface Settings {
anonymous_user_enabled: boolean; anonymous_user_enabled: boolean;
maximum_chat_retention_days: number | null; maximum_chat_retention_days: number | null;
@@ -14,6 +20,7 @@ export interface Settings {
application_status: ApplicationStatus; application_status: ApplicationStatus;
auto_scroll: boolean; auto_scroll: boolean;
temperature_override_enabled: boolean; temperature_override_enabled: boolean;
query_history_type: QueryHistoryType;
} }
export enum NotificationType { export enum NotificationType {

View File

@@ -359,6 +359,9 @@ export function ClientLayout({
), ),
link: "/admin/performance/usage", link: "/admin/performance/usage",
}, },
...(settings?.settings.query_history_type !==
"disabled"
? [
{ {
name: ( name: (
<div className="flex"> <div className="flex">
@@ -366,11 +369,15 @@ export function ClientLayout({
className="text-text-700" className="text-text-700"
size={18} size={18}
/> />
<div className="ml-1">Query History</div> <div className="ml-1">
Query History
</div>
</div> </div>
), ),
link: "/admin/performance/query-history", link: "/admin/performance/query-history",
}, },
]
: []),
{ {
name: ( name: (
<div className="flex"> <div className="flex">

View File

@@ -3,6 +3,7 @@ import {
EnterpriseSettings, EnterpriseSettings,
ApplicationStatus, ApplicationStatus,
Settings, Settings,
QueryHistoryType,
} from "@/app/admin/settings/interfaces"; } from "@/app/admin/settings/interfaces";
import { import {
CUSTOM_ANALYTICS_ENABLED, CUSTOM_ANALYTICS_ENABLED,
@@ -53,6 +54,7 @@ export async function fetchSettingsSS(): Promise<CombinedSettings | null> {
anonymous_user_enabled: false, anonymous_user_enabled: false,
pro_search_enabled: true, pro_search_enabled: true,
temperature_override_enabled: true, temperature_override_enabled: true,
query_history_type: QueryHistoryType.NORMAL,
}; };
} else { } else {
throw new Error( throw new Error(