diff --git a/backend/ee/danswer/db/query_history.py b/backend/ee/danswer/db/query_history.py index 5badca06bd70..6a74290063da 100644 --- a/backend/ee/danswer/db/query_history.py +++ b/backend/ee/danswer/db/query_history.py @@ -4,27 +4,29 @@ from typing import cast from typing import Literal from sqlalchemy import or_ +from sqlalchemy import Select from sqlalchemy import select +from sqlalchemy.orm import aliased from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import InstrumentedAttribute from danswer.configs.constants import QAFeedbackType from danswer.db.models import QueryEvent +from danswer.db.models import User SortByOptions = Literal["time_created", "feedback"] -def fetch_query_history( - db_session: Session, +def build_query_history_query( start: datetime.datetime, end: datetime.datetime, - query: str | None = None, - feedback_type: QAFeedbackType | None = None, - sort_by_field: SortByOptions = "time_created", - sort_by_direction: Literal["asc", "desc"] = "desc", - offset: int = 0, - limit: int = 500, -) -> Sequence[QueryEvent]: + query: str | None, + feedback_type: QAFeedbackType | None, + sort_by_field: SortByOptions, + sort_by_direction: Literal["asc", "desc"], + offset: int, + limit: int | None, +) -> Select[tuple[QueryEvent]]: stmt = ( select(QueryEvent) .where( @@ -41,7 +43,10 @@ def fetch_query_history( else: stmt = stmt.order_by(order_by_field.desc()) - stmt = stmt.offset(offset).limit(limit) + if offset: + stmt = stmt.offset(offset) + if limit: + stmt = stmt.limit(limit) if query: stmt = stmt.where( @@ -54,4 +59,59 @@ def fetch_query_history( if feedback_type: stmt = stmt.where(QueryEvent.feedback == feedback_type) + return stmt + + +def fetch_query_history( + db_session: Session, + start: datetime.datetime, + end: datetime.datetime, + query: str | None = None, + feedback_type: QAFeedbackType | None = None, + sort_by_field: SortByOptions = "time_created", + sort_by_direction: Literal["asc", "desc"] = "desc", + offset: int = 0, + limit: int | None = 500, +) -> Sequence[QueryEvent]: + stmt = build_query_history_query( + start=start, + end=end, + query=query, + feedback_type=feedback_type, + sort_by_field=sort_by_field, + sort_by_direction=sort_by_direction, + offset=offset, + limit=limit, + ) + return db_session.scalars(stmt).all() + + +def fetch_query_history_with_user_email( + db_session: Session, + start: datetime.datetime, + end: datetime.datetime, + query: str | None = None, + feedback_type: QAFeedbackType | None = None, + sort_by_field: SortByOptions = "time_created", + sort_by_direction: Literal["asc", "desc"] = "desc", + offset: int = 0, + limit: int | None = 500, +) -> Sequence[tuple[QueryEvent, str | None]]: + subquery = build_query_history_query( + start=start, + end=end, + query=query, + feedback_type=feedback_type, + sort_by_field=sort_by_field, + sort_by_direction=sort_by_direction, + offset=offset, + limit=limit, + ).subquery() + subquery_alias = aliased(QueryEvent, subquery) + + stmt_with_user_email = select(subquery_alias, User.email).join( # type: ignore + User, subquery_alias.user_id == User.id, isouter=True + ) + + return db_session.execute(stmt_with_user_email).all() # type: ignore diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py index c1176fbe351b..387e8dfdb282 100644 --- a/backend/ee/danswer/server/query_history/api.py +++ b/backend/ee/danswer/server/query_history/api.py @@ -1,10 +1,14 @@ +import csv +import io from collections.abc import Iterable from datetime import datetime from datetime import timedelta +from datetime import timezone from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi.responses import StreamingResponse from pydantic import BaseModel from sqlalchemy.orm import Session @@ -15,7 +19,9 @@ from danswer.db.engine import get_session from danswer.db.feedback import fetch_query_event_by_id from danswer.db.models import Document from ee.danswer.db.document import fetch_documents_from_ids -from ee.danswer.db.query_history import fetch_query_history +from ee.danswer.db.query_history import ( + fetch_query_history_with_user_email, +) router = APIRouter() @@ -31,6 +37,7 @@ class AbridgedSearchDoc(BaseModel): class QuerySnapshot(BaseModel): id: int + user_email: str | None query: str llm_answer: str | None retrieved_documents: list[AbridgedSearchDoc] @@ -39,10 +46,14 @@ class QuerySnapshot(BaseModel): @classmethod def build( - cls, query_event: db_models.QueryEvent, documents: Iterable[Document] + cls, + query_event: db_models.QueryEvent, + user_email: str | None, + documents: Iterable[Document], ) -> "QuerySnapshot": return cls( id=query_event.id, + user_email=user_email, query=query_event.query, llm_answer=query_event.llm_answer, retrieved_documents=[ @@ -57,25 +68,43 @@ class QuerySnapshot(BaseModel): time_created=query_event.time_created, ) + def to_json(self) -> dict[str, str]: + return { + "id": str(self.id), + "query": self.query, + "user_email": self.user_email or "", + "llm_answer": self.llm_answer or "", + "retrieved_documents": "|".join( + [ + doc.link or doc.semantic_identifier + for doc in self.retrieved_documents + ] + ), + "feedback": self.feedback.value if self.feedback else "", + "time_created": str(self.time_created), + } -@router.get("/admin/query-history") -def get_query_history( - feedback_type: QAFeedbackType | None = None, - start: datetime | None = None, - end: datetime | None = None, - _: db_models.User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), + +def fetch_and_process_query_history( + db_session: Session, + start: datetime | None, + end: datetime | None, + feedback_type: QAFeedbackType | None, + limit: int | None = 500, ) -> list[QuerySnapshot]: - query_history = fetch_query_history( + query_history_with_user_email = fetch_query_history_with_user_email( db_session=db_session, start=start - or (datetime.utcnow() - timedelta(days=30)), # default is 30d lookback - end=end or datetime.utcnow(), + or ( + datetime.now(tz=timezone.utc) - timedelta(days=30) + ), # default is 30d lookback + end=end or datetime.now(tz=timezone.utc), feedback_type=feedback_type, + limit=limit, ) all_relevant_document_ids: set[str] = set() - for query_event in query_history: + for query_event, _ in query_history_with_user_email: all_relevant_document_ids = all_relevant_document_ids.union( query_event.retrieved_document_ids or [] ) @@ -87,7 +116,7 @@ def get_query_history( } query_snapshots: list[QuerySnapshot] = [] - for query_event in query_history: + for query_event, user_email in query_history_with_user_email: unique_document_ids = set(query_event.retrieved_document_ids or []) documents = [ document_id_to_document[doc_id] @@ -95,11 +124,29 @@ def get_query_history( if doc_id in document_id_to_document ] query_snapshots.append( - QuerySnapshot.build(query_event=query_event, documents=documents) + QuerySnapshot.build( + query_event=query_event, user_email=user_email, documents=documents + ) ) return query_snapshots +@router.get("/admin/query-history") +def get_query_history( + feedback_type: QAFeedbackType | None = None, + start: datetime | None = None, + end: datetime | None = None, + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[QuerySnapshot]: + return fetch_and_process_query_history( + db_session=db_session, + start=start, + end=end, + feedback_type=feedback_type, + ) + + @router.get("/admin/query-history/{query_id}") def get_query( query_id: int, @@ -113,4 +160,40 @@ def get_query( documents = fetch_documents_from_ids( db_session, query_event.retrieved_document_ids or [] ) - return QuerySnapshot.build(query_event=query_event, documents=documents) + return QuerySnapshot.build( + query_event=query_event, + user_email=query_event.user.email if query_event.user else None, + documents=documents, + ) + + +@router.get("/admin/query-history-csv") +def get_query_history_as_csv( + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> StreamingResponse: + complete_query_history = fetch_and_process_query_history( + db_session=db_session, + start=datetime.fromtimestamp(0, tz=timezone.utc), + end=datetime.now(tz=timezone.utc), + feedback_type=None, + limit=None, + ) + + # Create an in-memory text stream + stream = io.StringIO() + writer = csv.DictWriter(stream, fieldnames=list(QuerySnapshot.__fields__.keys())) + writer.writeheader() + for row in complete_query_history: + writer.writerow(row.to_json()) + + # Reset the stream's position to the start + stream.seek(0) + + return StreamingResponse( + iter([stream.getvalue()]), + media_type="text/csv", + headers={ + "Content-Disposition": "attachment;filename=danswer_query_history.csv" + }, + ) diff --git a/web/src/app/ee/admin/performance/analytics/types.ts b/web/src/app/ee/admin/performance/analytics/types.ts index 6e50f466e408..e656851a005c 100644 --- a/web/src/app/ee/admin/performance/analytics/types.ts +++ b/web/src/app/ee/admin/performance/analytics/types.ts @@ -21,6 +21,7 @@ export interface AbridgedSearchDoc { export interface QuerySnapshot { id: number; query: string; + user_email: string | null; llm_answer: string; retrieved_documents: AbridgedSearchDoc[]; time_created: string; diff --git a/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx b/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx new file mode 100644 index 000000000000..550d9d3ceebe --- /dev/null +++ b/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx @@ -0,0 +1,13 @@ +import { FiDownload } from "react-icons/fi"; + +export function DownloadAsCSV() { + return ( + + + Download as CSV + + ); +} diff --git a/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx b/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx index cf7dc092d9c2..a0f18679ef1d 100644 --- a/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx +++ b/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx @@ -14,7 +14,10 @@ import { Divider } from "@tremor/react"; import { Select, SelectItem } from "@tremor/react"; import { ThreeDotsLoader } from "@/components/Loading"; import { QuerySnapshot } from "../analytics/types"; -import { timestampToDateString } from "@/lib/dateUtils"; +import { + timestampToDateString, + timestampToReadableDate, +} from "@/lib/dateUtils"; import { FiBook, FiFrown, FiMinus, FiSmile } from "react-icons/fi"; import { useState } from "react"; import { Feedback } from "@/lib/types"; @@ -22,6 +25,7 @@ import { DateRangeSelector } from "../DateRangeSelector"; import { PageSelector } from "@/components/PageSelector"; import Link from "next/link"; import { FeedbackBadge } from "./FeedbackBadge"; +import { DownloadAsCSV } from "./DownloadAsCSV"; const NUM_IN_PAGE = 20; @@ -54,7 +58,10 @@ function QueryHistoryTableRow({ - {timestampToDateString(querySnapshot.time_created)} + {querySnapshot.user_email || "-"} + + {timestampToReadableDate(querySnapshot.time_created)} + {/* Wrapping in to avoid console warnings */} {queryHistoryData ? ( <> -
- +
+
+ - + +
+ +
@@ -130,6 +144,7 @@ export function QueryHistoryTable() { LLM AnswerRetrieved DocumentsFeedback + UserDate diff --git a/web/src/app/ee/admin/performance/query-history/[id]/page.tsx b/web/src/app/ee/admin/performance/query-history/[id]/page.tsx index 614c124bf1f0..f5d879886363 100644 --- a/web/src/app/ee/admin/performance/query-history/[id]/page.tsx +++ b/web/src/app/ee/admin/performance/query-history/[id]/page.tsx @@ -1,10 +1,11 @@ -import { Bold, Text, Card, Title, Divider } from "@tremor/react"; +import { Bold, Text, Card, Title, Divider, Italic } from "@tremor/react"; import { QuerySnapshot } from "../../analytics/types"; import { buildUrl } from "@/lib/utilsSS"; import { BackButton } from "./BackButton"; import { FiBook } from "react-icons/fi"; import { processCookies } from "@/lib/userSS"; import { cookies } from "next/headers"; +import { timestampToReadableDate } from "@/lib/dateUtils"; export default async function QueryPage({ params, @@ -26,6 +27,11 @@ export default async function QueryPage({ Query Details + + {queryEvent.user_email || "-"},{" "} + {timestampToReadableDate(queryEvent.time_created)} + +
diff --git a/web/src/lib/dateUtils.ts b/web/src/lib/dateUtils.ts index 5693d0d422e2..728d8a7d2cdb 100644 --- a/web/src/lib/dateUtils.ts +++ b/web/src/lib/dateUtils.ts @@ -16,3 +16,26 @@ export const timestampToDateString = (timestamp: string) => { .padStart(2, "0")}`; return formattedDate; }; + +// Options for formatting the date +const dateOptions: Intl.DateTimeFormatOptions = { + year: "numeric", + month: "2-digit", + day: "2-digit", +}; + +// Options for formatting the time +const timeOptions: Intl.DateTimeFormatOptions = { + hour: "numeric", + minute: "2-digit", + hour12: true, // Use 12-hour format with AM/PM +}; + +export const timestampToReadableDate = (timestamp: string) => { + const date = new Date(timestamp); + return ( + date.toLocaleDateString(undefined, dateOptions) + + ", " + + date.toLocaleTimeString(undefined, timeOptions) + ); +};