mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Admin Analytics/Query History dashboards (#6)
This commit is contained in:
64
backend/ee/danswer/db/analytics.py
Normal file
64
backend/ee/danswer/db/analytics.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import datetime
|
||||
from collections.abc import Sequence
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import case
|
||||
from sqlalchemy import cast
|
||||
from sqlalchemy import Date
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.constants import QAFeedbackType
|
||||
from danswer.db.models import QueryEvent
|
||||
|
||||
|
||||
def fetch_query_analytics(
|
||||
db_session: Session,
|
||||
start: datetime.datetime,
|
||||
end: datetime.datetime,
|
||||
) -> Sequence[tuple[int, int, int, datetime.date]]:
|
||||
stmt = (
|
||||
select(
|
||||
func.count(QueryEvent.id),
|
||||
func.sum(case((QueryEvent.feedback == QAFeedbackType.LIKE, 1), else_=0)),
|
||||
func.sum(case((QueryEvent.feedback == QAFeedbackType.DISLIKE, 1), else_=0)),
|
||||
cast(QueryEvent.time_created, Date),
|
||||
)
|
||||
.where(
|
||||
QueryEvent.time_created >= start,
|
||||
)
|
||||
.where(
|
||||
QueryEvent.time_created <= end,
|
||||
)
|
||||
.group_by(cast(QueryEvent.time_created, Date))
|
||||
.order_by(cast(QueryEvent.time_created, Date))
|
||||
)
|
||||
|
||||
return db_session.execute(stmt).all() # type: ignore
|
||||
|
||||
|
||||
def fetch_per_user_query_analytics(
|
||||
db_session: Session,
|
||||
start: datetime.datetime,
|
||||
end: datetime.datetime,
|
||||
) -> Sequence[tuple[int, int, int, datetime.date, UUID]]:
|
||||
stmt = (
|
||||
select(
|
||||
func.count(QueryEvent.id),
|
||||
func.sum(case((QueryEvent.feedback == QAFeedbackType.LIKE, 1), else_=0)),
|
||||
func.sum(case((QueryEvent.feedback == QAFeedbackType.DISLIKE, 1), else_=0)),
|
||||
cast(QueryEvent.time_created, Date),
|
||||
QueryEvent.user_id,
|
||||
)
|
||||
.where(
|
||||
QueryEvent.time_created >= start,
|
||||
)
|
||||
.where(
|
||||
QueryEvent.time_created <= end,
|
||||
)
|
||||
.group_by(cast(QueryEvent.time_created, Date), QueryEvent.user_id)
|
||||
.order_by(cast(QueryEvent.time_created, Date), QueryEvent.user_id)
|
||||
)
|
||||
|
||||
return db_session.execute(stmt).all() # type: ignore
|
14
backend/ee/danswer/db/document.py
Normal file
14
backend/ee/danswer/db/document.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from collections.abc import Sequence
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.db.models import Document
|
||||
|
||||
|
||||
def fetch_documents_from_ids(
|
||||
db_session: Session, document_ids: list[str]
|
||||
) -> Sequence[Document]:
|
||||
return db_session.scalars(
|
||||
select(Document).where(Document.id.in_(document_ids))
|
||||
).all()
|
57
backend/ee/danswer/db/query_history.py
Normal file
57
backend/ee/danswer/db/query_history.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import datetime
|
||||
from collections.abc import Sequence
|
||||
from typing import cast
|
||||
from typing import Literal
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.orm.attributes import InstrumentedAttribute
|
||||
|
||||
from danswer.configs.constants import QAFeedbackType
|
||||
from danswer.db.models import QueryEvent
|
||||
|
||||
SortByOptions = Literal["time_created", "feedback"]
|
||||
|
||||
|
||||
def fetch_query_history(
|
||||
db_session: Session,
|
||||
start: datetime.datetime,
|
||||
end: datetime.datetime,
|
||||
query: str | None = None,
|
||||
feedback_type: QAFeedbackType | None = None,
|
||||
sort_by_field: SortByOptions = "time_created",
|
||||
sort_by_direction: Literal["asc", "desc"] = "desc",
|
||||
offset: int = 0,
|
||||
limit: int = 500,
|
||||
) -> Sequence[QueryEvent]:
|
||||
stmt = (
|
||||
select(QueryEvent)
|
||||
.where(
|
||||
QueryEvent.time_created >= start,
|
||||
)
|
||||
.where(
|
||||
QueryEvent.time_created <= end,
|
||||
)
|
||||
)
|
||||
|
||||
order_by_field = cast(InstrumentedAttribute, getattr(QueryEvent, sort_by_field))
|
||||
if sort_by_direction == "asc":
|
||||
stmt = stmt.order_by(order_by_field.asc())
|
||||
else:
|
||||
stmt = stmt.order_by(order_by_field.desc())
|
||||
|
||||
stmt = stmt.offset(offset).limit(limit)
|
||||
|
||||
if query:
|
||||
stmt = stmt.where(
|
||||
or_(
|
||||
QueryEvent.llm_answer.ilike(f"%{query}%"),
|
||||
QueryEvent.query.ilike(f"%{query}%"),
|
||||
)
|
||||
)
|
||||
|
||||
if feedback_type:
|
||||
stmt = stmt.where(QueryEvent.feedback == feedback_type)
|
||||
|
||||
return db_session.scalars(stmt).all()
|
@@ -16,6 +16,8 @@ from danswer.main import get_application
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.variable_functionality import global_version
|
||||
from ee.danswer.configs.app_configs import OPENID_CONFIG_URL
|
||||
from ee.danswer.server.analytics.api import router as analytics_router
|
||||
from ee.danswer.server.query_history.api import router as query_history_router
|
||||
from ee.danswer.server.saml import router as saml_router
|
||||
from ee.danswer.server.user_group.api import router as user_group_router
|
||||
|
||||
@@ -54,6 +56,9 @@ def get_ee_application() -> FastAPI:
|
||||
|
||||
# RBAC / group access control
|
||||
application.include_router(user_group_router)
|
||||
# analytics endpoints
|
||||
application.include_router(analytics_router)
|
||||
application.include_router(query_history_router)
|
||||
|
||||
return application
|
||||
|
||||
|
81
backend/ee/danswer/server/analytics/api.py
Normal file
81
backend/ee/danswer/server/analytics/api.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
import danswer.db.models as db_models
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.db.engine import get_session
|
||||
from ee.danswer.db.analytics import fetch_per_user_query_analytics
|
||||
from ee.danswer.db.analytics import fetch_query_analytics
|
||||
|
||||
router = APIRouter(prefix="/analytics")
|
||||
|
||||
|
||||
class QueryAnalyticsResponse(BaseModel):
|
||||
total_queries: int
|
||||
total_likes: int
|
||||
total_dislikes: int
|
||||
date: datetime.date
|
||||
|
||||
|
||||
@router.get("/admin/query")
|
||||
def get_query_analytics(
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: db_models.User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[QueryAnalyticsResponse]:
|
||||
daily_query_usage_info = fetch_query_analytics(
|
||||
db_session=db_session,
|
||||
start=start
|
||||
or (
|
||||
datetime.datetime.utcnow() - datetime.timedelta(days=30)
|
||||
), # default is 30d lookback
|
||||
end=end or datetime.datetime.utcnow(),
|
||||
)
|
||||
return [
|
||||
QueryAnalyticsResponse(
|
||||
total_queries=total_queries,
|
||||
total_likes=total_likes,
|
||||
total_dislikes=total_dislikes,
|
||||
date=date,
|
||||
)
|
||||
for total_queries, total_likes, total_dislikes, date in daily_query_usage_info
|
||||
]
|
||||
|
||||
|
||||
class UserAnalyticsResponse(BaseModel):
|
||||
total_active_users: int
|
||||
date: datetime.date
|
||||
|
||||
|
||||
@router.get("/admin/user")
|
||||
def get_user_analytics(
|
||||
start: datetime.datetime | None = None,
|
||||
end: datetime.datetime | None = None,
|
||||
_: db_models.User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[UserAnalyticsResponse]:
|
||||
daily_query_usage_info_per_user = fetch_per_user_query_analytics(
|
||||
db_session=db_session,
|
||||
start=start
|
||||
or (
|
||||
datetime.datetime.utcnow() - datetime.timedelta(days=30)
|
||||
), # default is 30d lookback
|
||||
end=end or datetime.datetime.utcnow(),
|
||||
)
|
||||
|
||||
user_analytics: dict[datetime.date, int] = defaultdict(int)
|
||||
for __, ___, ____, date, _____ in daily_query_usage_info_per_user:
|
||||
user_analytics[date] += 1
|
||||
return [
|
||||
UserAnalyticsResponse(
|
||||
total_active_users=cnt,
|
||||
date=date,
|
||||
)
|
||||
for date, cnt in user_analytics.items()
|
||||
]
|
116
backend/ee/danswer/server/query_history/api.py
Normal file
116
backend/ee/danswer/server/query_history/api.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from collections.abc import Iterable
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
import danswer.db.models as db_models
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.configs.constants import QAFeedbackType
|
||||
from danswer.db.engine import get_session
|
||||
from danswer.db.feedback import fetch_query_event_by_id
|
||||
from danswer.db.models import Document
|
||||
from ee.danswer.db.document import fetch_documents_from_ids
|
||||
from ee.danswer.db.query_history import fetch_query_history
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class AbridgedSearchDoc(BaseModel):
|
||||
"""A subset of the info present in `SearchDoc`"""
|
||||
|
||||
document_id: str
|
||||
semantic_identifier: str
|
||||
link: str | None
|
||||
|
||||
|
||||
class QuerySnapshot(BaseModel):
|
||||
id: int
|
||||
query: str
|
||||
llm_answer: str | None
|
||||
retrieved_documents: list[AbridgedSearchDoc]
|
||||
feedback: QAFeedbackType | None
|
||||
time_created: datetime
|
||||
|
||||
@classmethod
|
||||
def build(
|
||||
cls, query_event: db_models.QueryEvent, documents: Iterable[Document]
|
||||
) -> "QuerySnapshot":
|
||||
return cls(
|
||||
id=query_event.id,
|
||||
query=query_event.query,
|
||||
llm_answer=query_event.llm_answer,
|
||||
retrieved_documents=[
|
||||
AbridgedSearchDoc(
|
||||
document_id=document.id,
|
||||
semantic_identifier=document.semantic_id,
|
||||
link=document.link,
|
||||
)
|
||||
for document in documents
|
||||
],
|
||||
feedback=query_event.feedback,
|
||||
time_created=query_event.time_created,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/admin/query-history")
|
||||
def get_query_history(
|
||||
feedback_type: QAFeedbackType | None = None,
|
||||
start: datetime | None = None,
|
||||
end: datetime | None = None,
|
||||
_: db_models.User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[QuerySnapshot]:
|
||||
query_history = fetch_query_history(
|
||||
db_session=db_session,
|
||||
start=start
|
||||
or (datetime.utcnow() - timedelta(days=30)), # default is 30d lookback
|
||||
end=end or datetime.utcnow(),
|
||||
feedback_type=feedback_type,
|
||||
)
|
||||
|
||||
all_relevant_document_ids: set[str] = set()
|
||||
for query_event in query_history:
|
||||
all_relevant_document_ids = all_relevant_document_ids.union(
|
||||
query_event.retrieved_document_ids or []
|
||||
)
|
||||
document_id_to_document = {
|
||||
document.id: document
|
||||
for document in fetch_documents_from_ids(
|
||||
db_session, list(all_relevant_document_ids)
|
||||
)
|
||||
}
|
||||
|
||||
query_snapshots: list[QuerySnapshot] = []
|
||||
for query_event in query_history:
|
||||
unique_document_ids = set(query_event.retrieved_document_ids or [])
|
||||
documents = [
|
||||
document_id_to_document[doc_id]
|
||||
for doc_id in unique_document_ids
|
||||
if doc_id in document_id_to_document
|
||||
]
|
||||
query_snapshots.append(
|
||||
QuerySnapshot.build(query_event=query_event, documents=documents)
|
||||
)
|
||||
return query_snapshots
|
||||
|
||||
|
||||
@router.get("/admin/query-history/{query_id}")
|
||||
def get_query(
|
||||
query_id: int,
|
||||
_: db_models.User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> QuerySnapshot:
|
||||
try:
|
||||
query_event = fetch_query_event_by_id(query_id=query_id, db_session=db_session)
|
||||
except ValueError:
|
||||
raise HTTPException(400, f"Query event with id '{query_id}' does not exist.")
|
||||
documents = fetch_documents_from_ids(
|
||||
db_session, query_event.retrieved_document_ids or []
|
||||
)
|
||||
return QuerySnapshot.build(query_event=query_event, documents=documents)
|
Reference in New Issue
Block a user