From 2b9a751b96f7c730890c8dda6714ddcbd4402b45 Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Thu, 24 Oct 2024 12:50:09 -0700 Subject: [PATCH] working chat feedback dump script (with api addition) (#2891) * working chat feedback dump script (with api addition) * mypy fix * comment out pydantic models (but leave for reference) * small code review tweaks * bump to clear vercel issue? --- .../ee/danswer/server/query_history/api.py | 33 +++ backend/scripts/chat_feedback_dump.py | 239 ++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 backend/scripts/chat_feedback_dump.py diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py index 1411f973e..f50b9cc52 100644 --- a/backend/ee/danswer/server/query_history/api.py +++ b/backend/ee/danswer/server/query_history/api.py @@ -20,10 +20,13 @@ from danswer.configs.constants import MessageType from danswer.configs.constants import QAFeedbackType from danswer.configs.constants import SessionType from danswer.db.chat import get_chat_session_by_id +from danswer.db.chat import get_chat_sessions_by_user from danswer.db.engine import get_session from danswer.db.models import ChatMessage from danswer.db.models import ChatSession from danswer.db.models import User +from danswer.server.query_and_chat.models import ChatSessionDetails +from danswer.server.query_and_chat.models import ChatSessionsResponse from ee.danswer.db.query_history import fetch_chat_sessions_eagerly_by_time router = APIRouter() @@ -330,6 +333,36 @@ def snapshot_from_chat_session( ) +@router.get("/admin/chat-sessions") +def get_user_chat_sessions( + user_id: UUID, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> ChatSessionsResponse: + try: + chat_sessions = get_chat_sessions_by_user( + user_id=user_id, deleted=False, db_session=db_session, limit=0 + ) + + except ValueError: + raise ValueError("Chat session does not exist or has been deleted") + + return ChatSessionsResponse( + sessions=[ + ChatSessionDetails( + id=chat.id, + name=chat.description, + persona_id=chat.persona_id, + time_created=chat.time_created.isoformat(), + shared_status=chat.shared_status, + folder_id=chat.folder_id, + current_alternate_model=chat.current_alternate_model, + ) + for chat in chat_sessions + ] + ) + + @router.get("/admin/chat-session-history") def get_chat_session_history( feedback_type: QAFeedbackType | None = None, diff --git a/backend/scripts/chat_feedback_dump.py b/backend/scripts/chat_feedback_dump.py new file mode 100644 index 000000000..f0d6d3cbb --- /dev/null +++ b/backend/scripts/chat_feedback_dump.py @@ -0,0 +1,239 @@ +# This file is used to demonstrate how to use the backend APIs directly +# to query out feedback for all messages +import argparse +import logging +from logging import getLogger +from typing import Any +from uuid import UUID + +import requests + +from danswer.server.manage.models import AllUsersResponse +from danswer.server.query_and_chat.models import ChatSessionsResponse +from ee.danswer.server.query_history.api import ChatSessionSnapshot + +# Configure the logger +logging.basicConfig( + level=logging.INFO, # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", # Log format + handlers=[logging.StreamHandler()], # Output logs to console +) + +logger = getLogger(__name__) + +# uncomment the following pydantic models if you need the script to be independent +# from pydantic import BaseModel +# from datetime import datetime +# from enum import Enum + +# class UserRole(str, Enum): +# """ +# User roles +# - Basic can't perform any admin actions +# - Admin can perform all admin actions +# - Curator can perform admin actions for +# groups they are curators of +# - Global Curator can perform admin actions +# for all groups they are a member of +# """ + +# BASIC = "basic" +# ADMIN = "admin" +# CURATOR = "curator" +# GLOBAL_CURATOR = "global_curator" + + +# class UserStatus(str, Enum): +# LIVE = "live" +# INVITED = "invited" +# DEACTIVATED = "deactivated" + + +# class FullUserSnapshot(BaseModel): +# id: UUID +# email: str +# role: UserRole +# status: UserStatus + + +# class InvitedUserSnapshot(BaseModel): +# email: str + + +# class AllUsersResponse(BaseModel): +# accepted: list[FullUserSnapshot] +# invited: list[InvitedUserSnapshot] +# accepted_pages: int +# invited_pages: int + + +# class ChatSessionSharedStatus(str, Enum): +# PUBLIC = "public" +# PRIVATE = "private" + + +# class ChatSessionDetails(BaseModel): +# id: UUID +# name: str +# persona_id: int | None = None +# time_created: str +# shared_status: ChatSessionSharedStatus +# folder_id: int | None = None +# current_alternate_model: str | None = None + + +# class ChatSessionsResponse(BaseModel): +# sessions: list[ChatSessionDetails] + + +# class SessionType(str, Enum): +# CHAT = "Chat" +# SEARCH = "Search" +# SLACK = "Slack" + + +# class AbridgedSearchDoc(BaseModel): +# """A subset of the info present in `SearchDoc`""" + +# document_id: str +# semantic_identifier: str +# link: str | None + + +# class QAFeedbackType(str, Enum): +# LIKE = "like" # User likes the answer, used for metrics +# DISLIKE = "dislike" # User dislikes the answer, used for metrics + + +# class MessageType(str, Enum): +# # Using OpenAI standards, Langchain equivalent shown in comment +# # System message is always constructed on the fly, not saved +# SYSTEM = "system" # SystemMessage +# USER = "user" # HumanMessage +# ASSISTANT = "assistant" # AIMessage + + +# class MessageSnapshot(BaseModel): +# message: str +# message_type: MessageType +# documents: list[AbridgedSearchDoc] +# feedback_type: QAFeedbackType | None +# feedback_text: str | None +# time_created: datetime + + +# class ChatSessionSnapshot(BaseModel): +# id: UUID +# user_email: str +# name: str | None +# messages: list[MessageSnapshot] +# persona_name: str | None +# time_created: datetime +# flow_type: SessionType + + +def create_new_chat_session(danswer_url: str, api_key: str | None) -> int: + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + session_endpoint = danswer_url + "/api/chat/create-chat-session" + + response = requests.get(session_endpoint, headers=headers) + response.raise_for_status() + + new_session_id = response.json()["chat_session_id"] + return new_session_id + + +def manage_users(danswer_url: str, headers: dict[str, str] | None) -> AllUsersResponse: + endpoint = danswer_url + "/manage/users" + + response = requests.get( + endpoint, + headers=headers, + ) + response.raise_for_status() + + all_users = AllUsersResponse(**response.json()) + return all_users + + +def get_chat_sessions( + danswer_url: str, headers: dict[str, str] | None, user_id: UUID +) -> ChatSessionsResponse: + endpoint = danswer_url + "/admin/chat-sessions" + + params: dict[str, Any] = {"user_id": user_id} + response = requests.get( + endpoint, + params=params, + headers=headers, + ) + response.raise_for_status() + + sessions = ChatSessionsResponse(**response.json()) + return sessions + + +def get_session_history( + danswer_url: str, headers: dict[str, str] | None, session_id: UUID +) -> ChatSessionSnapshot: + endpoint = danswer_url + f"/admin/chat-session-history/{session_id}" + + response = requests.get( + endpoint, + headers=headers, + ) + response.raise_for_status() + + sessions = ChatSessionSnapshot(**response.json()) + return sessions + + +def process_all_chat_feedback(danswer_url: str, api_key: str | None) -> None: + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + + all_users = manage_users(danswer_url, headers) + if not all_users: + raise RuntimeError("manage_users returned None") + + logger.info(f"Accepted users: {len(all_users.accepted)}") + + user_ids: list[UUID] = [user.id for user in all_users.accepted] + + for user_id in user_ids: + r_sessions = get_chat_sessions(danswer_url, headers, user_id) + logger.info(f"user={user_id} num_sessions={len(r_sessions.sessions)}") + for session in r_sessions.sessions: + try: + s = get_session_history(danswer_url, headers, session.id) + except requests.exceptions.HTTPError: + logger.exception("get_session_history failed.") + + for m in s.messages: + logger.info( + f"user={user_id} " + f"session={session.id} " + f"message={m.message} " + f"feedback_type={m.feedback_type} " + f"feedback_text={m.feedback_text}" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Sample API Usage - Chat Feedback") + parser.add_argument( + "--url", + type=str, + default="http://localhost:8080", + help="Danswer URL, should point to Danswer nginx.", + ) + + # Not needed if Auth is disabled? + # Or for Danswer MIT Edition API key must be replaced with session cookie + parser.add_argument( + "--api-key", + type=str, + help="Danswer Admin Level API key", + ) + + args = parser.parse_args() + process_all_chat_feedback(danswer_url=args.url, api_key=args.api_key)