Admin Analytics/Query History dashboards (#6)

This commit is contained in:
Chris Weaver
2023-10-21 20:02:59 -07:00
parent 428f5edd21
commit e9f273d99a
24 changed files with 1151 additions and 0 deletions

View File

@@ -0,0 +1,64 @@
import datetime
from collections.abc import Sequence
from uuid import UUID
from sqlalchemy import case
from sqlalchemy import cast
from sqlalchemy import Date
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.configs.constants import QAFeedbackType
from danswer.db.models import QueryEvent
def fetch_query_analytics(
db_session: Session,
start: datetime.datetime,
end: datetime.datetime,
) -> Sequence[tuple[int, int, int, datetime.date]]:
stmt = (
select(
func.count(QueryEvent.id),
func.sum(case((QueryEvent.feedback == QAFeedbackType.LIKE, 1), else_=0)),
func.sum(case((QueryEvent.feedback == QAFeedbackType.DISLIKE, 1), else_=0)),
cast(QueryEvent.time_created, Date),
)
.where(
QueryEvent.time_created >= start,
)
.where(
QueryEvent.time_created <= end,
)
.group_by(cast(QueryEvent.time_created, Date))
.order_by(cast(QueryEvent.time_created, Date))
)
return db_session.execute(stmt).all() # type: ignore
def fetch_per_user_query_analytics(
db_session: Session,
start: datetime.datetime,
end: datetime.datetime,
) -> Sequence[tuple[int, int, int, datetime.date, UUID]]:
stmt = (
select(
func.count(QueryEvent.id),
func.sum(case((QueryEvent.feedback == QAFeedbackType.LIKE, 1), else_=0)),
func.sum(case((QueryEvent.feedback == QAFeedbackType.DISLIKE, 1), else_=0)),
cast(QueryEvent.time_created, Date),
QueryEvent.user_id,
)
.where(
QueryEvent.time_created >= start,
)
.where(
QueryEvent.time_created <= end,
)
.group_by(cast(QueryEvent.time_created, Date), QueryEvent.user_id)
.order_by(cast(QueryEvent.time_created, Date), QueryEvent.user_id)
)
return db_session.execute(stmt).all() # type: ignore

View File

@@ -0,0 +1,14 @@
from collections.abc import Sequence
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.db.models import Document
def fetch_documents_from_ids(
db_session: Session, document_ids: list[str]
) -> Sequence[Document]:
return db_session.scalars(
select(Document).where(Document.id.in_(document_ids))
).all()

View File

@@ -0,0 +1,57 @@
import datetime
from collections.abc import Sequence
from typing import cast
from typing import Literal
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import InstrumentedAttribute
from danswer.configs.constants import QAFeedbackType
from danswer.db.models import QueryEvent
SortByOptions = Literal["time_created", "feedback"]
def fetch_query_history(
db_session: Session,
start: datetime.datetime,
end: datetime.datetime,
query: str | None = None,
feedback_type: QAFeedbackType | None = None,
sort_by_field: SortByOptions = "time_created",
sort_by_direction: Literal["asc", "desc"] = "desc",
offset: int = 0,
limit: int = 500,
) -> Sequence[QueryEvent]:
stmt = (
select(QueryEvent)
.where(
QueryEvent.time_created >= start,
)
.where(
QueryEvent.time_created <= end,
)
)
order_by_field = cast(InstrumentedAttribute, getattr(QueryEvent, sort_by_field))
if sort_by_direction == "asc":
stmt = stmt.order_by(order_by_field.asc())
else:
stmt = stmt.order_by(order_by_field.desc())
stmt = stmt.offset(offset).limit(limit)
if query:
stmt = stmt.where(
or_(
QueryEvent.llm_answer.ilike(f"%{query}%"),
QueryEvent.query.ilike(f"%{query}%"),
)
)
if feedback_type:
stmt = stmt.where(QueryEvent.feedback == feedback_type)
return db_session.scalars(stmt).all()

View File

@@ -16,6 +16,8 @@ from danswer.main import get_application
from danswer.utils.logger import setup_logger
from danswer.utils.variable_functionality import global_version
from ee.danswer.configs.app_configs import OPENID_CONFIG_URL
from ee.danswer.server.analytics.api import router as analytics_router
from ee.danswer.server.query_history.api import router as query_history_router
from ee.danswer.server.saml import router as saml_router
from ee.danswer.server.user_group.api import router as user_group_router
@@ -54,6 +56,9 @@ def get_ee_application() -> FastAPI:
# RBAC / group access control
application.include_router(user_group_router)
# analytics endpoints
application.include_router(analytics_router)
application.include_router(query_history_router)
return application

View File

@@ -0,0 +1,81 @@
import datetime
from collections import defaultdict
from fastapi import APIRouter
from fastapi import Depends
from pydantic import BaseModel
from sqlalchemy.orm import Session
import danswer.db.models as db_models
from danswer.auth.users import current_admin_user
from danswer.db.engine import get_session
from ee.danswer.db.analytics import fetch_per_user_query_analytics
from ee.danswer.db.analytics import fetch_query_analytics
router = APIRouter(prefix="/analytics")
class QueryAnalyticsResponse(BaseModel):
total_queries: int
total_likes: int
total_dislikes: int
date: datetime.date
@router.get("/admin/query")
def get_query_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QueryAnalyticsResponse]:
daily_query_usage_info = fetch_query_analytics(
db_session=db_session,
start=start
or (
datetime.datetime.utcnow() - datetime.timedelta(days=30)
), # default is 30d lookback
end=end or datetime.datetime.utcnow(),
)
return [
QueryAnalyticsResponse(
total_queries=total_queries,
total_likes=total_likes,
total_dislikes=total_dislikes,
date=date,
)
for total_queries, total_likes, total_dislikes, date in daily_query_usage_info
]
class UserAnalyticsResponse(BaseModel):
total_active_users: int
date: datetime.date
@router.get("/admin/user")
def get_user_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[UserAnalyticsResponse]:
daily_query_usage_info_per_user = fetch_per_user_query_analytics(
db_session=db_session,
start=start
or (
datetime.datetime.utcnow() - datetime.timedelta(days=30)
), # default is 30d lookback
end=end or datetime.datetime.utcnow(),
)
user_analytics: dict[datetime.date, int] = defaultdict(int)
for __, ___, ____, date, _____ in daily_query_usage_info_per_user:
user_analytics[date] += 1
return [
UserAnalyticsResponse(
total_active_users=cnt,
date=date,
)
for date, cnt in user_analytics.items()
]

View File

@@ -0,0 +1,116 @@
from collections.abc import Iterable
from datetime import datetime
from datetime import timedelta
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
import danswer.db.models as db_models
from danswer.auth.users import current_admin_user
from danswer.configs.constants import QAFeedbackType
from danswer.db.engine import get_session
from danswer.db.feedback import fetch_query_event_by_id
from danswer.db.models import Document
from ee.danswer.db.document import fetch_documents_from_ids
from ee.danswer.db.query_history import fetch_query_history
router = APIRouter()
class AbridgedSearchDoc(BaseModel):
"""A subset of the info present in `SearchDoc`"""
document_id: str
semantic_identifier: str
link: str | None
class QuerySnapshot(BaseModel):
id: int
query: str
llm_answer: str | None
retrieved_documents: list[AbridgedSearchDoc]
feedback: QAFeedbackType | None
time_created: datetime
@classmethod
def build(
cls, query_event: db_models.QueryEvent, documents: Iterable[Document]
) -> "QuerySnapshot":
return cls(
id=query_event.id,
query=query_event.query,
llm_answer=query_event.llm_answer,
retrieved_documents=[
AbridgedSearchDoc(
document_id=document.id,
semantic_identifier=document.semantic_id,
link=document.link,
)
for document in documents
],
feedback=query_event.feedback,
time_created=query_event.time_created,
)
@router.get("/admin/query-history")
def get_query_history(
feedback_type: QAFeedbackType | None = None,
start: datetime | None = None,
end: datetime | None = None,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QuerySnapshot]:
query_history = fetch_query_history(
db_session=db_session,
start=start
or (datetime.utcnow() - timedelta(days=30)), # default is 30d lookback
end=end or datetime.utcnow(),
feedback_type=feedback_type,
)
all_relevant_document_ids: set[str] = set()
for query_event in query_history:
all_relevant_document_ids = all_relevant_document_ids.union(
query_event.retrieved_document_ids or []
)
document_id_to_document = {
document.id: document
for document in fetch_documents_from_ids(
db_session, list(all_relevant_document_ids)
)
}
query_snapshots: list[QuerySnapshot] = []
for query_event in query_history:
unique_document_ids = set(query_event.retrieved_document_ids or [])
documents = [
document_id_to_document[doc_id]
for doc_id in unique_document_ids
if doc_id in document_id_to_document
]
query_snapshots.append(
QuerySnapshot.build(query_event=query_event, documents=documents)
)
return query_snapshots
@router.get("/admin/query-history/{query_id}")
def get_query(
query_id: int,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> QuerySnapshot:
try:
query_event = fetch_query_event_by_id(query_id=query_id, db_session=db_session)
except ValueError:
raise HTTPException(400, f"Query event with id '{query_id}' does not exist.")
documents = fetch_documents_from_ids(
db_session, query_event.retrieved_document_ids or []
)
return QuerySnapshot.build(query_event=query_event, documents=documents)