diff --git a/backend/ee/danswer/db/query_history.py b/backend/ee/danswer/db/query_history.py
index 5badca06bd70..6a74290063da 100644
--- a/backend/ee/danswer/db/query_history.py
+++ b/backend/ee/danswer/db/query_history.py
@@ -4,27 +4,29 @@ from typing import cast
from typing import Literal
from sqlalchemy import or_
+from sqlalchemy import Select
from sqlalchemy import select
+from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import InstrumentedAttribute
from danswer.configs.constants import QAFeedbackType
from danswer.db.models import QueryEvent
+from danswer.db.models import User
SortByOptions = Literal["time_created", "feedback"]
-def fetch_query_history(
- db_session: Session,
+def build_query_history_query(
start: datetime.datetime,
end: datetime.datetime,
- query: str | None = None,
- feedback_type: QAFeedbackType | None = None,
- sort_by_field: SortByOptions = "time_created",
- sort_by_direction: Literal["asc", "desc"] = "desc",
- offset: int = 0,
- limit: int = 500,
-) -> Sequence[QueryEvent]:
+ query: str | None,
+ feedback_type: QAFeedbackType | None,
+ sort_by_field: SortByOptions,
+ sort_by_direction: Literal["asc", "desc"],
+ offset: int,
+ limit: int | None,
+) -> Select[tuple[QueryEvent]]:
stmt = (
select(QueryEvent)
.where(
@@ -41,7 +43,10 @@ def fetch_query_history(
else:
stmt = stmt.order_by(order_by_field.desc())
- stmt = stmt.offset(offset).limit(limit)
+ if offset:
+ stmt = stmt.offset(offset)
+ if limit:
+ stmt = stmt.limit(limit)
if query:
stmt = stmt.where(
@@ -54,4 +59,59 @@ def fetch_query_history(
if feedback_type:
stmt = stmt.where(QueryEvent.feedback == feedback_type)
+ return stmt
+
+
+def fetch_query_history(
+ db_session: Session,
+ start: datetime.datetime,
+ end: datetime.datetime,
+ query: str | None = None,
+ feedback_type: QAFeedbackType | None = None,
+ sort_by_field: SortByOptions = "time_created",
+ sort_by_direction: Literal["asc", "desc"] = "desc",
+ offset: int = 0,
+ limit: int | None = 500,
+) -> Sequence[QueryEvent]:
+ stmt = build_query_history_query(
+ start=start,
+ end=end,
+ query=query,
+ feedback_type=feedback_type,
+ sort_by_field=sort_by_field,
+ sort_by_direction=sort_by_direction,
+ offset=offset,
+ limit=limit,
+ )
+
return db_session.scalars(stmt).all()
+
+
+def fetch_query_history_with_user_email(
+ db_session: Session,
+ start: datetime.datetime,
+ end: datetime.datetime,
+ query: str | None = None,
+ feedback_type: QAFeedbackType | None = None,
+ sort_by_field: SortByOptions = "time_created",
+ sort_by_direction: Literal["asc", "desc"] = "desc",
+ offset: int = 0,
+ limit: int | None = 500,
+) -> Sequence[tuple[QueryEvent, str | None]]:
+ subquery = build_query_history_query(
+ start=start,
+ end=end,
+ query=query,
+ feedback_type=feedback_type,
+ sort_by_field=sort_by_field,
+ sort_by_direction=sort_by_direction,
+ offset=offset,
+ limit=limit,
+ ).subquery()
+ subquery_alias = aliased(QueryEvent, subquery)
+
+ stmt_with_user_email = select(subquery_alias, User.email).join( # type: ignore
+ User, subquery_alias.user_id == User.id, isouter=True
+ )
+
+ return db_session.execute(stmt_with_user_email).all() # type: ignore
diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py
index c1176fbe351b..387e8dfdb282 100644
--- a/backend/ee/danswer/server/query_history/api.py
+++ b/backend/ee/danswer/server/query_history/api.py
@@ -1,10 +1,14 @@
+import csv
+import io
from collections.abc import Iterable
from datetime import datetime
from datetime import timedelta
+from datetime import timezone
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -15,7 +19,9 @@ from danswer.db.engine import get_session
from danswer.db.feedback import fetch_query_event_by_id
from danswer.db.models import Document
from ee.danswer.db.document import fetch_documents_from_ids
-from ee.danswer.db.query_history import fetch_query_history
+from ee.danswer.db.query_history import (
+ fetch_query_history_with_user_email,
+)
router = APIRouter()
@@ -31,6 +37,7 @@ class AbridgedSearchDoc(BaseModel):
class QuerySnapshot(BaseModel):
id: int
+ user_email: str | None
query: str
llm_answer: str | None
retrieved_documents: list[AbridgedSearchDoc]
@@ -39,10 +46,14 @@ class QuerySnapshot(BaseModel):
@classmethod
def build(
- cls, query_event: db_models.QueryEvent, documents: Iterable[Document]
+ cls,
+ query_event: db_models.QueryEvent,
+ user_email: str | None,
+ documents: Iterable[Document],
) -> "QuerySnapshot":
return cls(
id=query_event.id,
+ user_email=user_email,
query=query_event.query,
llm_answer=query_event.llm_answer,
retrieved_documents=[
@@ -57,25 +68,43 @@ class QuerySnapshot(BaseModel):
time_created=query_event.time_created,
)
+ def to_json(self) -> dict[str, str]:
+ return {
+ "id": str(self.id),
+ "query": self.query,
+ "user_email": self.user_email or "",
+ "llm_answer": self.llm_answer or "",
+ "retrieved_documents": "|".join(
+ [
+ doc.link or doc.semantic_identifier
+ for doc in self.retrieved_documents
+ ]
+ ),
+ "feedback": self.feedback.value if self.feedback else "",
+ "time_created": str(self.time_created),
+ }
-@router.get("/admin/query-history")
-def get_query_history(
- feedback_type: QAFeedbackType | None = None,
- start: datetime | None = None,
- end: datetime | None = None,
- _: db_models.User | None = Depends(current_admin_user),
- db_session: Session = Depends(get_session),
+
+def fetch_and_process_query_history(
+ db_session: Session,
+ start: datetime | None,
+ end: datetime | None,
+ feedback_type: QAFeedbackType | None,
+ limit: int | None = 500,
) -> list[QuerySnapshot]:
- query_history = fetch_query_history(
+ query_history_with_user_email = fetch_query_history_with_user_email(
db_session=db_session,
start=start
- or (datetime.utcnow() - timedelta(days=30)), # default is 30d lookback
- end=end or datetime.utcnow(),
+ or (
+ datetime.now(tz=timezone.utc) - timedelta(days=30)
+ ), # default is 30d lookback
+ end=end or datetime.now(tz=timezone.utc),
feedback_type=feedback_type,
+ limit=limit,
)
all_relevant_document_ids: set[str] = set()
- for query_event in query_history:
+ for query_event, _ in query_history_with_user_email:
all_relevant_document_ids = all_relevant_document_ids.union(
query_event.retrieved_document_ids or []
)
@@ -87,7 +116,7 @@ def get_query_history(
}
query_snapshots: list[QuerySnapshot] = []
- for query_event in query_history:
+ for query_event, user_email in query_history_with_user_email:
unique_document_ids = set(query_event.retrieved_document_ids or [])
documents = [
document_id_to_document[doc_id]
@@ -95,11 +124,29 @@ def get_query_history(
if doc_id in document_id_to_document
]
query_snapshots.append(
- QuerySnapshot.build(query_event=query_event, documents=documents)
+ QuerySnapshot.build(
+ query_event=query_event, user_email=user_email, documents=documents
+ )
)
return query_snapshots
+@router.get("/admin/query-history")
+def get_query_history(
+ feedback_type: QAFeedbackType | None = None,
+ start: datetime | None = None,
+ end: datetime | None = None,
+ _: db_models.User | None = Depends(current_admin_user),
+ db_session: Session = Depends(get_session),
+) -> list[QuerySnapshot]:
+ return fetch_and_process_query_history(
+ db_session=db_session,
+ start=start,
+ end=end,
+ feedback_type=feedback_type,
+ )
+
+
@router.get("/admin/query-history/{query_id}")
def get_query(
query_id: int,
@@ -113,4 +160,40 @@ def get_query(
documents = fetch_documents_from_ids(
db_session, query_event.retrieved_document_ids or []
)
- return QuerySnapshot.build(query_event=query_event, documents=documents)
+ return QuerySnapshot.build(
+ query_event=query_event,
+ user_email=query_event.user.email if query_event.user else None,
+ documents=documents,
+ )
+
+
+@router.get("/admin/query-history-csv")
+def get_query_history_as_csv(
+ _: db_models.User | None = Depends(current_admin_user),
+ db_session: Session = Depends(get_session),
+) -> StreamingResponse:
+ complete_query_history = fetch_and_process_query_history(
+ db_session=db_session,
+ start=datetime.fromtimestamp(0, tz=timezone.utc),
+ end=datetime.now(tz=timezone.utc),
+ feedback_type=None,
+ limit=None,
+ )
+
+ # Create an in-memory text stream
+ stream = io.StringIO()
+ writer = csv.DictWriter(stream, fieldnames=list(QuerySnapshot.__fields__.keys()))
+ writer.writeheader()
+ for row in complete_query_history:
+ writer.writerow(row.to_json())
+
+ # Reset the stream's position to the start
+ stream.seek(0)
+
+ return StreamingResponse(
+ iter([stream.getvalue()]),
+ media_type="text/csv",
+ headers={
+ "Content-Disposition": "attachment;filename=danswer_query_history.csv"
+ },
+ )
diff --git a/web/src/app/ee/admin/performance/analytics/types.ts b/web/src/app/ee/admin/performance/analytics/types.ts
index 6e50f466e408..e656851a005c 100644
--- a/web/src/app/ee/admin/performance/analytics/types.ts
+++ b/web/src/app/ee/admin/performance/analytics/types.ts
@@ -21,6 +21,7 @@ export interface AbridgedSearchDoc {
export interface QuerySnapshot {
id: number;
query: string;
+ user_email: string | null;
llm_answer: string;
retrieved_documents: AbridgedSearchDoc[];
time_created: string;
diff --git a/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx b/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx
new file mode 100644
index 000000000000..550d9d3ceebe
--- /dev/null
+++ b/web/src/app/ee/admin/performance/query-history/DownloadAsCSV.tsx
@@ -0,0 +1,13 @@
+import { FiDownload } from "react-icons/fi";
+
+export function DownloadAsCSV() {
+ return (
+
+