working chat feedback dump script (with api addition) (#2891)

* working chat feedback dump script (with api addition)

* mypy fix

* comment out pydantic models (but leave for reference)

* small code review tweaks

* bump to clear vercel issue?
This commit is contained in:
rkuo-danswer 2024-10-24 12:50:09 -07:00 committed by GitHub
parent 1b6b134722
commit 2b9a751b96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 272 additions and 0 deletions

View File

@ -20,10 +20,13 @@ from danswer.configs.constants import MessageType
from danswer.configs.constants import QAFeedbackType
from danswer.configs.constants import SessionType
from danswer.db.chat import get_chat_session_by_id
from danswer.db.chat import get_chat_sessions_by_user
from danswer.db.engine import get_session
from danswer.db.models import ChatMessage
from danswer.db.models import ChatSession
from danswer.db.models import User
from danswer.server.query_and_chat.models import ChatSessionDetails
from danswer.server.query_and_chat.models import ChatSessionsResponse
from ee.danswer.db.query_history import fetch_chat_sessions_eagerly_by_time
router = APIRouter()
@ -330,6 +333,36 @@ def snapshot_from_chat_session(
)
@router.get("/admin/chat-sessions")
def get_user_chat_sessions(
user_id: UUID,
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> ChatSessionsResponse:
try:
chat_sessions = get_chat_sessions_by_user(
user_id=user_id, deleted=False, db_session=db_session, limit=0
)
except ValueError:
raise ValueError("Chat session does not exist or has been deleted")
return ChatSessionsResponse(
sessions=[
ChatSessionDetails(
id=chat.id,
name=chat.description,
persona_id=chat.persona_id,
time_created=chat.time_created.isoformat(),
shared_status=chat.shared_status,
folder_id=chat.folder_id,
current_alternate_model=chat.current_alternate_model,
)
for chat in chat_sessions
]
)
@router.get("/admin/chat-session-history")
def get_chat_session_history(
feedback_type: QAFeedbackType | None = None,

View File

@ -0,0 +1,239 @@
# This file is used to demonstrate how to use the backend APIs directly
# to query out feedback for all messages
import argparse
import logging
from logging import getLogger
from typing import Any
from uuid import UUID
import requests
from danswer.server.manage.models import AllUsersResponse
from danswer.server.query_and_chat.models import ChatSessionsResponse
from ee.danswer.server.query_history.api import ChatSessionSnapshot
# Configure the logger
logging.basicConfig(
level=logging.INFO, # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", # Log format
handlers=[logging.StreamHandler()], # Output logs to console
)
logger = getLogger(__name__)
# uncomment the following pydantic models if you need the script to be independent
# from pydantic import BaseModel
# from datetime import datetime
# from enum import Enum
# class UserRole(str, Enum):
# """
# User roles
# - Basic can't perform any admin actions
# - Admin can perform all admin actions
# - Curator can perform admin actions for
# groups they are curators of
# - Global Curator can perform admin actions
# for all groups they are a member of
# """
# BASIC = "basic"
# ADMIN = "admin"
# CURATOR = "curator"
# GLOBAL_CURATOR = "global_curator"
# class UserStatus(str, Enum):
# LIVE = "live"
# INVITED = "invited"
# DEACTIVATED = "deactivated"
# class FullUserSnapshot(BaseModel):
# id: UUID
# email: str
# role: UserRole
# status: UserStatus
# class InvitedUserSnapshot(BaseModel):
# email: str
# class AllUsersResponse(BaseModel):
# accepted: list[FullUserSnapshot]
# invited: list[InvitedUserSnapshot]
# accepted_pages: int
# invited_pages: int
# class ChatSessionSharedStatus(str, Enum):
# PUBLIC = "public"
# PRIVATE = "private"
# class ChatSessionDetails(BaseModel):
# id: UUID
# name: str
# persona_id: int | None = None
# time_created: str
# shared_status: ChatSessionSharedStatus
# folder_id: int | None = None
# current_alternate_model: str | None = None
# class ChatSessionsResponse(BaseModel):
# sessions: list[ChatSessionDetails]
# class SessionType(str, Enum):
# CHAT = "Chat"
# SEARCH = "Search"
# SLACK = "Slack"
# class AbridgedSearchDoc(BaseModel):
# """A subset of the info present in `SearchDoc`"""
# document_id: str
# semantic_identifier: str
# link: str | None
# class QAFeedbackType(str, Enum):
# LIKE = "like" # User likes the answer, used for metrics
# DISLIKE = "dislike" # User dislikes the answer, used for metrics
# class MessageType(str, Enum):
# # Using OpenAI standards, Langchain equivalent shown in comment
# # System message is always constructed on the fly, not saved
# SYSTEM = "system" # SystemMessage
# USER = "user" # HumanMessage
# ASSISTANT = "assistant" # AIMessage
# class MessageSnapshot(BaseModel):
# message: str
# message_type: MessageType
# documents: list[AbridgedSearchDoc]
# feedback_type: QAFeedbackType | None
# feedback_text: str | None
# time_created: datetime
# class ChatSessionSnapshot(BaseModel):
# id: UUID
# user_email: str
# name: str | None
# messages: list[MessageSnapshot]
# persona_name: str | None
# time_created: datetime
# flow_type: SessionType
def create_new_chat_session(danswer_url: str, api_key: str | None) -> int:
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
session_endpoint = danswer_url + "/api/chat/create-chat-session"
response = requests.get(session_endpoint, headers=headers)
response.raise_for_status()
new_session_id = response.json()["chat_session_id"]
return new_session_id
def manage_users(danswer_url: str, headers: dict[str, str] | None) -> AllUsersResponse:
endpoint = danswer_url + "/manage/users"
response = requests.get(
endpoint,
headers=headers,
)
response.raise_for_status()
all_users = AllUsersResponse(**response.json())
return all_users
def get_chat_sessions(
danswer_url: str, headers: dict[str, str] | None, user_id: UUID
) -> ChatSessionsResponse:
endpoint = danswer_url + "/admin/chat-sessions"
params: dict[str, Any] = {"user_id": user_id}
response = requests.get(
endpoint,
params=params,
headers=headers,
)
response.raise_for_status()
sessions = ChatSessionsResponse(**response.json())
return sessions
def get_session_history(
danswer_url: str, headers: dict[str, str] | None, session_id: UUID
) -> ChatSessionSnapshot:
endpoint = danswer_url + f"/admin/chat-session-history/{session_id}"
response = requests.get(
endpoint,
headers=headers,
)
response.raise_for_status()
sessions = ChatSessionSnapshot(**response.json())
return sessions
def process_all_chat_feedback(danswer_url: str, api_key: str | None) -> None:
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
all_users = manage_users(danswer_url, headers)
if not all_users:
raise RuntimeError("manage_users returned None")
logger.info(f"Accepted users: {len(all_users.accepted)}")
user_ids: list[UUID] = [user.id for user in all_users.accepted]
for user_id in user_ids:
r_sessions = get_chat_sessions(danswer_url, headers, user_id)
logger.info(f"user={user_id} num_sessions={len(r_sessions.sessions)}")
for session in r_sessions.sessions:
try:
s = get_session_history(danswer_url, headers, session.id)
except requests.exceptions.HTTPError:
logger.exception("get_session_history failed.")
for m in s.messages:
logger.info(
f"user={user_id} "
f"session={session.id} "
f"message={m.message} "
f"feedback_type={m.feedback_type} "
f"feedback_text={m.feedback_text}"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Sample API Usage - Chat Feedback")
parser.add_argument(
"--url",
type=str,
default="http://localhost:8080",
help="Danswer URL, should point to Danswer nginx.",
)
# Not needed if Auth is disabled?
# Or for Danswer MIT Edition API key must be replaced with session cookie
parser.add_argument(
"--api-key",
type=str,
help="Danswer Admin Level API key",
)
args = parser.parse_args()
process_all_chat_feedback(danswer_url=args.url, api_key=args.api_key)