Improvements to Query History (#17)

* Add option to download query-history as a CSV

* Add user email + more complete timestamp
This commit is contained in:
Chris Weaver
2023-11-18 17:47:04 -08:00
parent f799d9aa11
commit 3984350ff9
7 changed files with 236 additions and 35 deletions

View File

@@ -4,27 +4,29 @@ from typing import cast
from typing import Literal
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
from sqlalchemy.orm.attributes import InstrumentedAttribute
from danswer.configs.constants import QAFeedbackType
from danswer.db.models import QueryEvent
from danswer.db.models import User
SortByOptions = Literal["time_created", "feedback"]
def fetch_query_history(
db_session: Session,
def build_query_history_query(
start: datetime.datetime,
end: datetime.datetime,
query: str | None = None,
feedback_type: QAFeedbackType | None = None,
sort_by_field: SortByOptions = "time_created",
sort_by_direction: Literal["asc", "desc"] = "desc",
offset: int = 0,
limit: int = 500,
) -> Sequence[QueryEvent]:
query: str | None,
feedback_type: QAFeedbackType | None,
sort_by_field: SortByOptions,
sort_by_direction: Literal["asc", "desc"],
offset: int,
limit: int | None,
) -> Select[tuple[QueryEvent]]:
stmt = (
select(QueryEvent)
.where(
@@ -41,7 +43,10 @@ def fetch_query_history(
else:
stmt = stmt.order_by(order_by_field.desc())
stmt = stmt.offset(offset).limit(limit)
if offset:
stmt = stmt.offset(offset)
if limit:
stmt = stmt.limit(limit)
if query:
stmt = stmt.where(
@@ -54,4 +59,59 @@ def fetch_query_history(
if feedback_type:
stmt = stmt.where(QueryEvent.feedback == feedback_type)
return stmt
def fetch_query_history(
db_session: Session,
start: datetime.datetime,
end: datetime.datetime,
query: str | None = None,
feedback_type: QAFeedbackType | None = None,
sort_by_field: SortByOptions = "time_created",
sort_by_direction: Literal["asc", "desc"] = "desc",
offset: int = 0,
limit: int | None = 500,
) -> Sequence[QueryEvent]:
stmt = build_query_history_query(
start=start,
end=end,
query=query,
feedback_type=feedback_type,
sort_by_field=sort_by_field,
sort_by_direction=sort_by_direction,
offset=offset,
limit=limit,
)
return db_session.scalars(stmt).all()
def fetch_query_history_with_user_email(
db_session: Session,
start: datetime.datetime,
end: datetime.datetime,
query: str | None = None,
feedback_type: QAFeedbackType | None = None,
sort_by_field: SortByOptions = "time_created",
sort_by_direction: Literal["asc", "desc"] = "desc",
offset: int = 0,
limit: int | None = 500,
) -> Sequence[tuple[QueryEvent, str | None]]:
subquery = build_query_history_query(
start=start,
end=end,
query=query,
feedback_type=feedback_type,
sort_by_field=sort_by_field,
sort_by_direction=sort_by_direction,
offset=offset,
limit=limit,
).subquery()
subquery_alias = aliased(QueryEvent, subquery)
stmt_with_user_email = select(subquery_alias, User.email).join( # type: ignore
User, subquery_alias.user_id == User.id, isouter=True
)
return db_session.execute(stmt_with_user_email).all() # type: ignore

View File

@@ -1,10 +1,14 @@
import csv
import io
from collections.abc import Iterable
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -15,7 +19,9 @@ from danswer.db.engine import get_session
from danswer.db.feedback import fetch_query_event_by_id
from danswer.db.models import Document
from ee.danswer.db.document import fetch_documents_from_ids
from ee.danswer.db.query_history import fetch_query_history
from ee.danswer.db.query_history import (
fetch_query_history_with_user_email,
)
router = APIRouter()
@@ -31,6 +37,7 @@ class AbridgedSearchDoc(BaseModel):
class QuerySnapshot(BaseModel):
id: int
user_email: str | None
query: str
llm_answer: str | None
retrieved_documents: list[AbridgedSearchDoc]
@@ -39,10 +46,14 @@ class QuerySnapshot(BaseModel):
@classmethod
def build(
cls, query_event: db_models.QueryEvent, documents: Iterable[Document]
cls,
query_event: db_models.QueryEvent,
user_email: str | None,
documents: Iterable[Document],
) -> "QuerySnapshot":
return cls(
id=query_event.id,
user_email=user_email,
query=query_event.query,
llm_answer=query_event.llm_answer,
retrieved_documents=[
@@ -57,25 +68,43 @@ class QuerySnapshot(BaseModel):
time_created=query_event.time_created,
)
def to_json(self) -> dict[str, str]:
return {
"id": str(self.id),
"query": self.query,
"user_email": self.user_email or "",
"llm_answer": self.llm_answer or "",
"retrieved_documents": "|".join(
[
doc.link or doc.semantic_identifier
for doc in self.retrieved_documents
]
),
"feedback": self.feedback.value if self.feedback else "",
"time_created": str(self.time_created),
}
@router.get("/admin/query-history")
def get_query_history(
feedback_type: QAFeedbackType | None = None,
start: datetime | None = None,
end: datetime | None = None,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
def fetch_and_process_query_history(
db_session: Session,
start: datetime | None,
end: datetime | None,
feedback_type: QAFeedbackType | None,
limit: int | None = 500,
) -> list[QuerySnapshot]:
query_history = fetch_query_history(
query_history_with_user_email = fetch_query_history_with_user_email(
db_session=db_session,
start=start
or (datetime.utcnow() - timedelta(days=30)), # default is 30d lookback
end=end or datetime.utcnow(),
or (
datetime.now(tz=timezone.utc) - timedelta(days=30)
), # default is 30d lookback
end=end or datetime.now(tz=timezone.utc),
feedback_type=feedback_type,
limit=limit,
)
all_relevant_document_ids: set[str] = set()
for query_event in query_history:
for query_event, _ in query_history_with_user_email:
all_relevant_document_ids = all_relevant_document_ids.union(
query_event.retrieved_document_ids or []
)
@@ -87,7 +116,7 @@ def get_query_history(
}
query_snapshots: list[QuerySnapshot] = []
for query_event in query_history:
for query_event, user_email in query_history_with_user_email:
unique_document_ids = set(query_event.retrieved_document_ids or [])
documents = [
document_id_to_document[doc_id]
@@ -95,11 +124,29 @@ def get_query_history(
if doc_id in document_id_to_document
]
query_snapshots.append(
QuerySnapshot.build(query_event=query_event, documents=documents)
QuerySnapshot.build(
query_event=query_event, user_email=user_email, documents=documents
)
)
return query_snapshots
@router.get("/admin/query-history")
def get_query_history(
feedback_type: QAFeedbackType | None = None,
start: datetime | None = None,
end: datetime | None = None,
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QuerySnapshot]:
return fetch_and_process_query_history(
db_session=db_session,
start=start,
end=end,
feedback_type=feedback_type,
)
@router.get("/admin/query-history/{query_id}")
def get_query(
query_id: int,
@@ -113,4 +160,40 @@ def get_query(
documents = fetch_documents_from_ids(
db_session, query_event.retrieved_document_ids or []
)
return QuerySnapshot.build(query_event=query_event, documents=documents)
return QuerySnapshot.build(
query_event=query_event,
user_email=query_event.user.email if query_event.user else None,
documents=documents,
)
@router.get("/admin/query-history-csv")
def get_query_history_as_csv(
_: db_models.User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse:
complete_query_history = fetch_and_process_query_history(
db_session=db_session,
start=datetime.fromtimestamp(0, tz=timezone.utc),
end=datetime.now(tz=timezone.utc),
feedback_type=None,
limit=None,
)
# Create an in-memory text stream
stream = io.StringIO()
writer = csv.DictWriter(stream, fieldnames=list(QuerySnapshot.__fields__.keys()))
writer.writeheader()
for row in complete_query_history:
writer.writerow(row.to_json())
# Reset the stream's position to the start
stream.seek(0)
return StreamingResponse(
iter([stream.getvalue()]),
media_type="text/csv",
headers={
"Content-Disposition": "attachment;filename=danswer_query_history.csv"
},
)

View File

@@ -21,6 +21,7 @@ export interface AbridgedSearchDoc {
export interface QuerySnapshot {
id: number;
query: string;
user_email: string | null;
llm_answer: string;
retrieved_documents: AbridgedSearchDoc[];
time_created: string;

View File

@@ -0,0 +1,13 @@
import { FiDownload } from "react-icons/fi";
export function DownloadAsCSV() {
return (
<a
href="/api/admin/query-history-csv"
className="text-gray-300 flex ml-auto py-2 px-4 border border-gray-800 h-fit cursor-pointer hover:bg-gray-800 text-sm"
>
<FiDownload className="my-auto mr-2" />
Download as CSV
</a>
);
}

View File

@@ -14,7 +14,10 @@ import { Divider } from "@tremor/react";
import { Select, SelectItem } from "@tremor/react";
import { ThreeDotsLoader } from "@/components/Loading";
import { QuerySnapshot } from "../analytics/types";
import { timestampToDateString } from "@/lib/dateUtils";
import {
timestampToDateString,
timestampToReadableDate,
} from "@/lib/dateUtils";
import { FiBook, FiFrown, FiMinus, FiSmile } from "react-icons/fi";
import { useState } from "react";
import { Feedback } from "@/lib/types";
@@ -22,6 +25,7 @@ import { DateRangeSelector } from "../DateRangeSelector";
import { PageSelector } from "@/components/PageSelector";
import Link from "next/link";
import { FeedbackBadge } from "./FeedbackBadge";
import { DownloadAsCSV } from "./DownloadAsCSV";
const NUM_IN_PAGE = 20;
@@ -54,7 +58,10 @@ function QueryHistoryTableRow({
<TableCell>
<FeedbackBadge feedback={querySnapshot.feedback} />
</TableCell>
<TableCell>{timestampToDateString(querySnapshot.time_created)}</TableCell>
<TableCell>{querySnapshot.user_email || "-"}</TableCell>
<TableCell>
{timestampToReadableDate(querySnapshot.time_created)}
</TableCell>
{/* Wrapping in <td> to avoid console warnings */}
<td className="w-0 p-0">
<Link
@@ -114,13 +121,20 @@ export function QueryHistoryTable() {
<Card className="mt-8">
{queryHistoryData ? (
<>
<div className="gap-y-3 flex flex-col">
<SelectFeedbackType
value={selectedFeedbackType || "all"}
onValueChange={setSelectedFeedbackType}
/>
<div className="flex">
<div className="gap-y-3 flex flex-col">
<SelectFeedbackType
value={selectedFeedbackType || "all"}
onValueChange={setSelectedFeedbackType}
/>
<DateRangeSelector value={timeRange} onValueChange={setTimeRange} />
<DateRangeSelector
value={timeRange}
onValueChange={setTimeRange}
/>
</div>
<DownloadAsCSV />
</div>
<Divider />
<Table className="mt-5">
@@ -130,6 +144,7 @@ export function QueryHistoryTable() {
<TableHeaderCell>LLM Answer</TableHeaderCell>
<TableHeaderCell>Retrieved Documents</TableHeaderCell>
<TableHeaderCell>Feedback</TableHeaderCell>
<TableHeaderCell>User</TableHeaderCell>
<TableHeaderCell>Date</TableHeaderCell>
</TableRow>
</TableHead>

View File

@@ -1,10 +1,11 @@
import { Bold, Text, Card, Title, Divider } from "@tremor/react";
import { Bold, Text, Card, Title, Divider, Italic } from "@tremor/react";
import { QuerySnapshot } from "../../analytics/types";
import { buildUrl } from "@/lib/utilsSS";
import { BackButton } from "./BackButton";
import { FiBook } from "react-icons/fi";
import { processCookies } from "@/lib/userSS";
import { cookies } from "next/headers";
import { timestampToReadableDate } from "@/lib/dateUtils";
export default async function QueryPage({
params,
@@ -26,6 +27,11 @@ export default async function QueryPage({
<Card className="mt-4">
<Title>Query Details</Title>
<Text className="flex flex-wrap whitespace-normal mt-1 text-xs">
{queryEvent.user_email || "-"},{" "}
{timestampToReadableDate(queryEvent.time_created)}
</Text>
<Divider />
<div className="flex flex-col gap-y-3">

View File

@@ -16,3 +16,26 @@ export const timestampToDateString = (timestamp: string) => {
.padStart(2, "0")}`;
return formattedDate;
};
// Options for formatting the date
const dateOptions: Intl.DateTimeFormatOptions = {
year: "numeric",
month: "2-digit",
day: "2-digit",
};
// Options for formatting the time
const timeOptions: Intl.DateTimeFormatOptions = {
hour: "numeric",
minute: "2-digit",
hour12: true, // Use 12-hour format with AM/PM
};
export const timestampToReadableDate = (timestamp: string) => {
const date = new Date(timestamp);
return (
date.toLocaleDateString(undefined, dateOptions) +
", " +
date.toLocaleTimeString(undefined, timeOptions)
);
};