mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-03 09:28:25 +02:00
Usage reports (#118)
--------- Co-authored-by: amohamdy99 <a.mohamdy99@gmail.com>
This commit is contained in:
parent
5c12a3e872
commit
f0b2b57d81
@ -145,4 +145,5 @@ class FileOrigin(str, Enum):
|
||||
CHAT_UPLOAD = "chat_upload"
|
||||
CHAT_IMAGE_GEN = "chat_image_gen"
|
||||
CONNECTOR = "connector"
|
||||
GENERATED_REPORT = "generated_report"
|
||||
OTHER = "other"
|
||||
|
@ -1368,3 +1368,30 @@ class EmailToExternalUserCache(Base):
|
||||
)
|
||||
|
||||
user = relationship("User")
|
||||
|
||||
|
||||
class UsageReport(Base):
|
||||
"""This stores metadata about usage reports generated by admin including user who generated
|
||||
them as well las the period they cover. The actual zip file of the report is stored as a lo
|
||||
using the PGFileStore
|
||||
"""
|
||||
|
||||
__tablename__ = "usage_reports"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
report_name: Mapped[str] = mapped_column(ForeignKey("file_store.file_name"))
|
||||
|
||||
# if None, report was auto-generated
|
||||
requestor_user_id: Mapped[UUID | None] = mapped_column(
|
||||
ForeignKey("user.id"), nullable=True
|
||||
)
|
||||
time_created: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
period_from: Mapped[datetime.datetime | None] = mapped_column(
|
||||
DateTime(timezone=True)
|
||||
)
|
||||
period_to: Mapped[datetime.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
requestor = relationship("User")
|
||||
file = relationship("PGFileStore")
|
||||
|
@ -1,3 +1,4 @@
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from typing import IO
|
||||
|
||||
@ -6,6 +7,8 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.constants import FileOrigin
|
||||
from danswer.db.models import PGFileStore
|
||||
from danswer.file_store.constants import MAX_IN_MEMORY_SIZE
|
||||
from danswer.file_store.constants import STANDARD_CHUNK_SIZE
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@ -26,18 +29,40 @@ def create_populate_lobj(
|
||||
pg_conn = get_pg_conn_from_session(db_session)
|
||||
large_object = pg_conn.lobject()
|
||||
|
||||
large_object.write(content.read())
|
||||
# write in multiple chunks to avoid loading the whole file into memory
|
||||
while True:
|
||||
chunk = content.read(STANDARD_CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
large_object.write(chunk)
|
||||
|
||||
large_object.close()
|
||||
|
||||
return large_object.oid
|
||||
|
||||
|
||||
def read_lobj(lobj_oid: int, db_session: Session, mode: str | None = None) -> IO:
|
||||
def read_lobj(
|
||||
lobj_oid: int,
|
||||
db_session: Session,
|
||||
mode: str | None = None,
|
||||
use_tempfile: bool = False,
|
||||
) -> IO:
|
||||
pg_conn = get_pg_conn_from_session(db_session)
|
||||
large_object = (
|
||||
pg_conn.lobject(lobj_oid, mode=mode) if mode else pg_conn.lobject(lobj_oid)
|
||||
)
|
||||
return BytesIO(large_object.read())
|
||||
|
||||
if use_tempfile:
|
||||
temp_file = tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)
|
||||
while True:
|
||||
chunk = large_object.read(STANDARD_CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
temp_file.write(chunk)
|
||||
temp_file.seek(0)
|
||||
return temp_file
|
||||
else:
|
||||
return BytesIO(large_object.read())
|
||||
|
||||
|
||||
def delete_lobj_by_id(
|
||||
|
2
backend/danswer/file_store/constants.py
Normal file
2
backend/danswer/file_store/constants.py
Normal file
@ -0,0 +1,2 @@
|
||||
MAX_IN_MEMORY_SIZE = 30 * 1024 * 1024 # 30MB
|
||||
STANDARD_CHUNK_SIZE = 10 * 1024 * 1024 # 10MB chunks
|
@ -5,6 +5,7 @@ from typing import IO
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.constants import FileOrigin
|
||||
from danswer.db.models import PGFileStore
|
||||
from danswer.db.pg_file_store import create_populate_lobj
|
||||
from danswer.db.pg_file_store import delete_lobj_by_id
|
||||
from danswer.db.pg_file_store import delete_pgfilestore_by_file_name
|
||||
@ -26,6 +27,7 @@ class FileStore(ABC):
|
||||
display_name: str | None,
|
||||
file_origin: FileOrigin,
|
||||
file_type: str,
|
||||
file_metadata: dict | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Save a file to the blob store
|
||||
@ -41,12 +43,17 @@ class FileStore(ABC):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def read_file(self, file_name: str, mode: str | None) -> IO:
|
||||
def read_file(
|
||||
self, file_name: str, mode: str | None, use_tempfile: bool = False
|
||||
) -> IO:
|
||||
"""
|
||||
Read the content of a given file by the name
|
||||
|
||||
Parameters:
|
||||
- file_name: Name of file to read
|
||||
- mode: Mode to open the file (e.g. 'b' for binary)
|
||||
- use_tempfile: Whether to use a temporary file to store the contents
|
||||
in order to avoid loading the entire file into memory
|
||||
|
||||
Returns:
|
||||
Contents of the file and metadata dict
|
||||
@ -73,6 +80,7 @@ class PostgresBackedFileStore(FileStore):
|
||||
display_name: str | None,
|
||||
file_origin: FileOrigin,
|
||||
file_type: str,
|
||||
file_metadata: dict | None = None,
|
||||
) -> None:
|
||||
try:
|
||||
# The large objects in postgres are saved as special objects can be listed with
|
||||
@ -85,20 +93,33 @@ class PostgresBackedFileStore(FileStore):
|
||||
file_type=file_type,
|
||||
lobj_oid=obj_id,
|
||||
db_session=self.db_session,
|
||||
file_metadata=file_metadata,
|
||||
)
|
||||
self.db_session.commit()
|
||||
except Exception:
|
||||
self.db_session.rollback()
|
||||
raise
|
||||
|
||||
def read_file(self, file_name: str, mode: str | None = None) -> IO:
|
||||
def read_file(
|
||||
self, file_name: str, mode: str | None = None, use_tempfile: bool = False
|
||||
) -> IO:
|
||||
file_record = get_pgfilestore_by_file_name(
|
||||
file_name=file_name, db_session=self.db_session
|
||||
)
|
||||
return read_lobj(
|
||||
lobj_oid=file_record.lobj_oid, db_session=self.db_session, mode=mode
|
||||
lobj_oid=file_record.lobj_oid,
|
||||
db_session=self.db_session,
|
||||
mode=mode,
|
||||
use_tempfile=use_tempfile,
|
||||
)
|
||||
|
||||
def read_file_record(self, file_name: str) -> PGFileStore:
|
||||
file_record = get_pgfilestore_by_file_name(
|
||||
file_name=file_name, db_session=self.db_session
|
||||
)
|
||||
|
||||
return file_record
|
||||
|
||||
def delete_file(self, file_name: str) -> None:
|
||||
try:
|
||||
file_record = get_pgfilestore_by_file_name(
|
||||
|
@ -14,6 +14,7 @@ from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.variable_functionality import global_version
|
||||
from ee.danswer.background.user_group_sync import name_user_group_sync_task
|
||||
from ee.danswer.db.user_group import fetch_user_groups
|
||||
from ee.danswer.server.reporting.usage_export_generation import create_new_usage_report
|
||||
from ee.danswer.user_groups.sync import sync_user_groups
|
||||
|
||||
logger = setup_logger()
|
||||
@ -76,6 +77,20 @@ def check_for_user_groups_sync_task() -> None:
|
||||
register_task(task.id, task_name, db_session)
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
name="autogenerate_usage_report_task",
|
||||
soft_time_limit=JOB_TIMEOUT,
|
||||
)
|
||||
def autogenerate_usage_report_task() -> None:
|
||||
"""This generates usage report under the /admin/generate-usage/report endpoint"""
|
||||
with Session(get_sqlalchemy_engine()) as db_session:
|
||||
create_new_usage_report(
|
||||
db_session=db_session,
|
||||
user_id=None,
|
||||
period=None,
|
||||
)
|
||||
|
||||
|
||||
#####
|
||||
# Celery Beat (Periodic Tasks) Settings
|
||||
#####
|
||||
@ -84,5 +99,9 @@ celery_app.conf.beat_schedule = {
|
||||
"task": "check_for_user_groups_sync_task",
|
||||
"schedule": timedelta(seconds=5),
|
||||
},
|
||||
"autogenerate_usage_report": {
|
||||
"task": "autogenerate_usage_report_task",
|
||||
"schedule": timedelta(days=30), # TODO: change this to config flag
|
||||
},
|
||||
**(celery_app.conf.beat_schedule or {}),
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ import datetime
|
||||
from typing import Literal
|
||||
|
||||
from sqlalchemy import asc
|
||||
from sqlalchemy import BinaryExpression
|
||||
from sqlalchemy import ColumnElement
|
||||
from sqlalchemy import desc
|
||||
from sqlalchemy.orm import contains_eager
|
||||
from sqlalchemy.orm import joinedload
|
||||
@ -17,16 +19,22 @@ def fetch_chat_sessions_eagerly_by_time(
|
||||
start: datetime.datetime,
|
||||
end: datetime.datetime,
|
||||
db_session: Session,
|
||||
ascending: bool = False,
|
||||
limit: int | None = 500,
|
||||
initial_id: int | None = None,
|
||||
) -> list[ChatSession]:
|
||||
time_order = asc(ChatSession.time_created) if ascending else desc(ChatSession.time_created) # type: ignore
|
||||
id_order = desc(ChatSession.id) # type: ignore
|
||||
time_order = desc(ChatSession.time_created) # type: ignore
|
||||
message_order = asc(ChatMessage.id) # type: ignore
|
||||
|
||||
filters: list[ColumnElement | BinaryExpression] = [
|
||||
ChatSession.time_created.between(start, end)
|
||||
]
|
||||
if initial_id:
|
||||
filters.append(ChatSession.id < initial_id)
|
||||
subquery = (
|
||||
db_session.query(ChatSession.id, ChatSession.time_created)
|
||||
.filter(ChatSession.time_created.between(start, end))
|
||||
.order_by(desc(ChatSession.id), time_order)
|
||||
.filter(*filters)
|
||||
.order_by(id_order, time_order)
|
||||
.distinct(ChatSession.id)
|
||||
.limit(limit)
|
||||
.subquery()
|
||||
@ -34,7 +42,7 @@ def fetch_chat_sessions_eagerly_by_time(
|
||||
|
||||
query = (
|
||||
db_session.query(ChatSession)
|
||||
.join(subquery, ChatSession.id == subquery.c.id)
|
||||
.join(subquery, ChatSession.id == subquery.c.id) # type: ignore
|
||||
.outerjoin(ChatMessage, ChatSession.id == ChatMessage.chat_session_id)
|
||||
.options(
|
||||
joinedload(ChatSession.user),
|
||||
|
108
backend/ee/danswer/db/usage_export.py
Normal file
108
backend/ee/danswer/db/usage_export.py
Normal file
@ -0,0 +1,108 @@
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
from typing import IO
|
||||
|
||||
from fastapi_users_db_sqlalchemy import UUID_ID
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.db.models import UsageReport
|
||||
from danswer.file_store.file_store import get_default_file_store
|
||||
from ee.danswer.db.query_history import fetch_chat_sessions_eagerly_by_time
|
||||
from ee.danswer.server.reporting.usage_export_models import ChatMessageSkeleton
|
||||
from ee.danswer.server.reporting.usage_export_models import FlowType
|
||||
from ee.danswer.server.reporting.usage_export_models import UsageReportMetadata
|
||||
|
||||
|
||||
# Gets skeletons of all message
|
||||
def get_empty_chat_messages_entries__paginated(
|
||||
db_session: Session,
|
||||
period: tuple[datetime, datetime],
|
||||
limit: int | None = 1,
|
||||
initial_id: int | None = None,
|
||||
) -> list[ChatMessageSkeleton]:
|
||||
chat_sessions = fetch_chat_sessions_eagerly_by_time(
|
||||
period[0], period[1], db_session, limit=limit, initial_id=initial_id
|
||||
)
|
||||
|
||||
message_skeletons: list[ChatMessageSkeleton] = []
|
||||
for chat_session in chat_sessions:
|
||||
if chat_session.one_shot:
|
||||
flow_type = FlowType.SEARCH
|
||||
elif chat_session.danswerbot_flow:
|
||||
flow_type = FlowType.SLACK
|
||||
else:
|
||||
flow_type = FlowType.CHAT
|
||||
|
||||
for message in chat_session.messages:
|
||||
# only count user messages
|
||||
if message.message_type != MessageType.USER:
|
||||
continue
|
||||
|
||||
message_skeletons.append(
|
||||
ChatMessageSkeleton(
|
||||
message_id=chat_session.id,
|
||||
chat_session_id=chat_session.id,
|
||||
user_id=str(chat_session.user_id) if chat_session.user_id else None,
|
||||
flow_type=flow_type,
|
||||
time_sent=message.time_sent,
|
||||
)
|
||||
)
|
||||
|
||||
return message_skeletons
|
||||
|
||||
|
||||
def get_all_empty_chat_message_entries(
|
||||
db_session: Session,
|
||||
period: tuple[datetime, datetime],
|
||||
) -> Generator[list[ChatMessageSkeleton], None, None]:
|
||||
initial_id = None
|
||||
while True:
|
||||
message_skeletons = get_empty_chat_messages_entries__paginated(
|
||||
db_session, period, initial_id=initial_id
|
||||
)
|
||||
if not message_skeletons:
|
||||
return
|
||||
|
||||
yield message_skeletons
|
||||
initial_id = message_skeletons[-1].message_id
|
||||
|
||||
|
||||
def get_all_usage_reports(db_session: Session) -> list[UsageReportMetadata]:
|
||||
return [
|
||||
UsageReportMetadata(
|
||||
report_name=r.report_name,
|
||||
requestor=str(r.requestor_user_id) if r.requestor_user_id else None,
|
||||
time_created=r.time_created,
|
||||
period_from=r.period_from,
|
||||
period_to=r.period_to,
|
||||
)
|
||||
for r in db_session.query(UsageReport).all()
|
||||
]
|
||||
|
||||
|
||||
def get_usage_report_data(
|
||||
db_session: Session,
|
||||
report_name: str,
|
||||
) -> IO:
|
||||
file_store = get_default_file_store(db_session)
|
||||
# usage report may be very large, so don't load it all into memory
|
||||
return file_store.read_file(file_name=report_name, mode="b", use_tempfile=True)
|
||||
|
||||
|
||||
def write_usage_report(
|
||||
db_session: Session,
|
||||
report_name: str,
|
||||
user_id: uuid.UUID | UUID_ID | None,
|
||||
period: tuple[datetime, datetime] | None,
|
||||
) -> UsageReport:
|
||||
new_report = UsageReport(
|
||||
report_name=report_name,
|
||||
requestor_user_id=user_id,
|
||||
period_from=period[0] if period else None,
|
||||
period_to=period[1] if period else None,
|
||||
)
|
||||
db_session.add(new_report)
|
||||
db_session.commit()
|
||||
return new_report
|
@ -33,6 +33,7 @@ from ee.danswer.server.query_and_chat.query_backend import (
|
||||
basic_router as query_router,
|
||||
)
|
||||
from ee.danswer.server.query_history.api import router as query_history_router
|
||||
from ee.danswer.server.reporting.usage_export_api import router as usage_export_router
|
||||
from ee.danswer.server.saml import router as saml_router
|
||||
from ee.danswer.server.seeding import seed_db
|
||||
from ee.danswer.server.token_rate_limits.api import (
|
||||
@ -97,6 +98,7 @@ def get_ee_application() -> FastAPI:
|
||||
application, token_rate_limit_settings_router
|
||||
)
|
||||
include_router_with_global_prefix_prepended(application, enterprise_settings_router)
|
||||
include_router_with_global_prefix_prepended(application, usage_export_router)
|
||||
|
||||
# Ensure all routes have auth enabled or are explicitly marked as public
|
||||
check_ee_router_auth(application)
|
||||
|
82
backend/ee/danswer/server/reporting/usage_export_api.py
Normal file
82
backend/ee/danswer/server/reporting/usage_export_api.py
Normal file
@ -0,0 +1,82 @@
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi import Response
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.db.engine import get_session
|
||||
from danswer.db.models import User
|
||||
from danswer.file_store.constants import STANDARD_CHUNK_SIZE
|
||||
from ee.danswer.db.usage_export import get_all_usage_reports
|
||||
from ee.danswer.db.usage_export import get_usage_report_data
|
||||
from ee.danswer.db.usage_export import UsageReportMetadata
|
||||
from ee.danswer.server.reporting.usage_export_generation import create_new_usage_report
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class GenerateUsageReportParams(BaseModel):
|
||||
period_from: str | None = None
|
||||
period_to: str | None = None
|
||||
|
||||
|
||||
@router.post("/admin/generate-usage-report")
|
||||
def generate_report(
|
||||
params: GenerateUsageReportParams,
|
||||
user: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> UsageReportMetadata:
|
||||
period = None
|
||||
if params.period_from and params.period_to:
|
||||
try:
|
||||
period = (
|
||||
datetime.fromisoformat(params.period_from),
|
||||
datetime.fromisoformat(params.period_to),
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
new_report = create_new_usage_report(db_session, user.id if user else None, period)
|
||||
return new_report
|
||||
|
||||
|
||||
@router.get("/admin/usage-report/{report_name}")
|
||||
def read_usage_report(
|
||||
report_name: str,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> Response:
|
||||
try:
|
||||
file = get_usage_report_data(db_session, report_name)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
def iterfile() -> Generator[bytes, None, None]:
|
||||
while True:
|
||||
chunk = file.read(STANDARD_CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return StreamingResponse(
|
||||
content=iterfile(),
|
||||
media_type="application/zip",
|
||||
headers={"Content-Disposition": f"attachment; filename={report_name}"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/admin/usage-report")
|
||||
def fetch_usage_reports(
|
||||
_: User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[UsageReportMetadata]:
|
||||
try:
|
||||
return get_all_usage_reports(db_session)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
165
backend/ee/danswer/server/reporting/usage_export_generation.py
Normal file
165
backend/ee/danswer/server/reporting/usage_export_generation.py
Normal file
@ -0,0 +1,165 @@
|
||||
import csv
|
||||
import tempfile
|
||||
import uuid
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
|
||||
from fastapi_users_db_sqlalchemy import UUID_ID
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.schemas import UserStatus
|
||||
from danswer.configs.constants import FileOrigin
|
||||
from danswer.db.users import list_users
|
||||
from danswer.file_store.constants import MAX_IN_MEMORY_SIZE
|
||||
from danswer.file_store.file_store import FileStore
|
||||
from danswer.file_store.file_store import get_default_file_store
|
||||
from ee.danswer.db.usage_export import get_all_empty_chat_message_entries
|
||||
from ee.danswer.db.usage_export import write_usage_report
|
||||
from ee.danswer.server.reporting.usage_export_models import UsageReportMetadata
|
||||
from ee.danswer.server.reporting.usage_export_models import UserSkeleton
|
||||
|
||||
|
||||
def generate_chat_messages_report(
|
||||
db_session: Session,
|
||||
file_store: FileStore,
|
||||
report_id: str,
|
||||
period: tuple[datetime, datetime] | None,
|
||||
) -> str:
|
||||
file_name = f"{report_id}_chat_sessions"
|
||||
|
||||
if period is None:
|
||||
period = (
|
||||
datetime.fromtimestamp(0, tz=timezone.utc),
|
||||
datetime.now(tz=timezone.utc),
|
||||
)
|
||||
else:
|
||||
# time-picker sends a time which is at the beginning of the day
|
||||
# so we need to add one day to the end time to make it inclusive
|
||||
period = (
|
||||
period[0],
|
||||
period[1] + timedelta(days=1),
|
||||
)
|
||||
|
||||
with tempfile.SpooledTemporaryFile(
|
||||
max_size=MAX_IN_MEMORY_SIZE, mode="w+"
|
||||
) as temp_file:
|
||||
csvwriter = csv.writer(temp_file, delimiter=",")
|
||||
csvwriter.writerow(["session_id", "user_id", "flow_type", "time_sent"])
|
||||
for chat_message_skeleton_batch in get_all_empty_chat_message_entries(
|
||||
db_session, period
|
||||
):
|
||||
for chat_message_skeleton in chat_message_skeleton_batch:
|
||||
csvwriter.writerow(
|
||||
[
|
||||
chat_message_skeleton.chat_session_id,
|
||||
chat_message_skeleton.user_id,
|
||||
chat_message_skeleton.flow_type,
|
||||
chat_message_skeleton.time_sent.isoformat(),
|
||||
]
|
||||
)
|
||||
|
||||
# after writing seek to begining of buffer
|
||||
temp_file.seek(0)
|
||||
file_store.save_file(
|
||||
file_name=file_name,
|
||||
content=temp_file,
|
||||
display_name=file_name,
|
||||
file_origin=FileOrigin.OTHER,
|
||||
file_type="text/csv",
|
||||
)
|
||||
|
||||
return file_name
|
||||
|
||||
|
||||
def generate_user_report(
|
||||
db_session: Session,
|
||||
file_store: FileStore,
|
||||
report_id: str,
|
||||
) -> str:
|
||||
file_name = f"{report_id}_users"
|
||||
|
||||
with tempfile.SpooledTemporaryFile(
|
||||
max_size=MAX_IN_MEMORY_SIZE, mode="w+"
|
||||
) as temp_file:
|
||||
csvwriter = csv.writer(temp_file, delimiter=",")
|
||||
csvwriter.writerow(["user_id", "status"])
|
||||
|
||||
users = list_users(db_session)
|
||||
for user in users:
|
||||
user_skeleton = UserSkeleton(
|
||||
user_id=str(user.id),
|
||||
status=UserStatus.LIVE if user.is_active else UserStatus.DEACTIVATED,
|
||||
)
|
||||
csvwriter.writerow([user_skeleton.user_id, user_skeleton.status])
|
||||
|
||||
temp_file.seek(0)
|
||||
file_store.save_file(
|
||||
file_name=file_name,
|
||||
content=temp_file,
|
||||
display_name=file_name,
|
||||
file_origin=FileOrigin.OTHER,
|
||||
file_type="text/csv",
|
||||
)
|
||||
|
||||
return file_name
|
||||
|
||||
|
||||
def create_new_usage_report(
|
||||
db_session: Session,
|
||||
user_id: UUID_ID | None, # None = auto-generated
|
||||
period: tuple[datetime, datetime] | None,
|
||||
) -> UsageReportMetadata:
|
||||
report_id = str(uuid.uuid4())
|
||||
file_store = get_default_file_store(db_session)
|
||||
|
||||
messages_filename = generate_chat_messages_report(
|
||||
db_session, file_store, report_id, period
|
||||
)
|
||||
users_filename = generate_user_report(db_session, file_store, report_id)
|
||||
|
||||
with tempfile.SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE) as zip_buffer:
|
||||
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zip_file:
|
||||
# write messages
|
||||
chat_messages_tmpfile = file_store.read_file(
|
||||
messages_filename, mode="b", use_tempfile=True
|
||||
)
|
||||
zip_file.writestr(
|
||||
"chat_messages.csv",
|
||||
chat_messages_tmpfile.read(),
|
||||
)
|
||||
|
||||
# write users
|
||||
users_tmpfile = file_store.read_file(
|
||||
users_filename, mode="b", use_tempfile=True
|
||||
)
|
||||
zip_file.writestr("users.csv", users_tmpfile.read())
|
||||
|
||||
zip_buffer.seek(0)
|
||||
|
||||
# store zip blob to file_store
|
||||
report_name = (
|
||||
f"{datetime.now(tz=timezone.utc).strftime('%Y-%m-%d')}"
|
||||
f"_{report_id}_usage_report.zip"
|
||||
)
|
||||
file_store.save_file(
|
||||
file_name=report_name,
|
||||
content=zip_buffer,
|
||||
display_name=report_name,
|
||||
file_origin=FileOrigin.GENERATED_REPORT,
|
||||
file_type="application/zip",
|
||||
)
|
||||
|
||||
# add report after zip file is written
|
||||
new_report = write_usage_report(db_session, report_name, user_id, period)
|
||||
|
||||
return UsageReportMetadata(
|
||||
report_name=new_report.report_name,
|
||||
requestor=(
|
||||
str(new_report.requestor_user_id) if new_report.requestor_user_id else None
|
||||
),
|
||||
time_created=new_report.time_created,
|
||||
period_from=new_report.period_from,
|
||||
period_to=new_report.period_to,
|
||||
)
|
33
backend/ee/danswer/server/reporting/usage_export_models.py
Normal file
33
backend/ee/danswer/server/reporting/usage_export_models.py
Normal file
@ -0,0 +1,33 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from danswer.auth.schemas import UserStatus
|
||||
|
||||
|
||||
class FlowType(str, Enum):
|
||||
CHAT = "chat"
|
||||
SEARCH = "search"
|
||||
SLACK = "slack"
|
||||
|
||||
|
||||
class ChatMessageSkeleton(BaseModel):
|
||||
message_id: int
|
||||
chat_session_id: int
|
||||
user_id: str | None
|
||||
flow_type: FlowType
|
||||
time_sent: datetime
|
||||
|
||||
|
||||
class UserSkeleton(BaseModel):
|
||||
user_id: str
|
||||
status: UserStatus
|
||||
|
||||
|
||||
class UsageReportMetadata(BaseModel):
|
||||
report_name: str
|
||||
requestor: str | None
|
||||
time_created: datetime
|
||||
period_from: datetime | None # None = All time
|
||||
period_to: datetime | None
|
263
web/src/app/ee/admin/performance/usage/UsageReports.tsx
Normal file
263
web/src/app/ee/admin/performance/usage/UsageReports.tsx
Normal file
@ -0,0 +1,263 @@
|
||||
"use client";
|
||||
|
||||
import { errorHandlingFetcher } from "@/lib/fetcher";
|
||||
|
||||
import { FiDownload, FiDownloadCloud } from "react-icons/fi";
|
||||
import {
|
||||
DateRangePicker,
|
||||
DateRangePickerItem,
|
||||
DateRangePickerValue,
|
||||
Divider,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableHeaderCell,
|
||||
TableRow,
|
||||
Text,
|
||||
Title,
|
||||
} from "@tremor/react";
|
||||
import useSWR from "swr";
|
||||
import { Button } from "@tremor/react";
|
||||
import { useState } from "react";
|
||||
import { UsageReport } from "./types";
|
||||
import { ThreeDotsLoader } from "@/components/Loading";
|
||||
import Link from "next/link";
|
||||
import { humanReadableFormat, humanReadableFormatWithTime } from "@/lib/time";
|
||||
import { ErrorCallout } from "@/components/ErrorCallout";
|
||||
import { PageSelector } from "@/components/PageSelector";
|
||||
|
||||
function GenerateReportInput() {
|
||||
const [dateRange, setDateRange] = useState<DateRangePickerValue | undefined>(
|
||||
undefined
|
||||
);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const [errorOccurred, setErrorOccurred] = useState<Error | null>(null);
|
||||
|
||||
const download = (bytes: Blob) => {
|
||||
let elm = document.createElement("a");
|
||||
elm.href = URL.createObjectURL(bytes);
|
||||
elm.setAttribute("download", "usage_reports.zip");
|
||||
elm.click();
|
||||
};
|
||||
|
||||
const requestReport = async () => {
|
||||
setIsLoading(true);
|
||||
setErrorOccurred(null);
|
||||
try {
|
||||
let period_from: string | null = null;
|
||||
let period_to: string | null = null;
|
||||
|
||||
if (dateRange?.selectValue != "allTime" && dateRange?.from) {
|
||||
period_from = dateRange?.from?.toISOString();
|
||||
period_to = dateRange?.to?.toISOString() ?? new Date().toISOString();
|
||||
}
|
||||
|
||||
const res = await fetch("/api/admin/generate-usage-report", {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
period_from: period_from,
|
||||
period_to: period_to,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw Error(`Received an error: ${res.statusText}`);
|
||||
}
|
||||
|
||||
const report = await res.json();
|
||||
const transfer = await fetch(
|
||||
`/api/admin/usage-report/${report.report_name}`
|
||||
);
|
||||
|
||||
const bytes = await transfer.blob();
|
||||
download(bytes);
|
||||
} catch (e) {
|
||||
setErrorOccurred(e as Error);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const today = new Date();
|
||||
|
||||
const lastWeek = new Date();
|
||||
lastWeek.setDate(today.getDate() - 7);
|
||||
|
||||
const lastMonth = new Date();
|
||||
lastMonth.setMonth(today.getMonth() - 1);
|
||||
|
||||
const lastYear = new Date();
|
||||
lastYear.setFullYear(today.getFullYear() - 1);
|
||||
|
||||
return (
|
||||
<div className="mb-8">
|
||||
<Title className="mb-2">Generate Usage Reports</Title>
|
||||
<Text className="mb-8">
|
||||
Generate usage statistics for users in the workspace.
|
||||
</Text>
|
||||
<DateRangePicker
|
||||
maxDate={new Date()}
|
||||
defaultValue={{
|
||||
from: undefined,
|
||||
to: undefined,
|
||||
selectValue: "allTime",
|
||||
}}
|
||||
className="mb-3"
|
||||
enableClear={false}
|
||||
selectPlaceholder="Range"
|
||||
value={dateRange}
|
||||
onValueChange={setDateRange}
|
||||
>
|
||||
<DateRangePickerItem key="lastWeek" value="lastWeek" from={lastWeek}>
|
||||
Last 7 days
|
||||
</DateRangePickerItem>
|
||||
<DateRangePickerItem key="lastMonth" value="lastMonth" from={lastMonth}>
|
||||
Last 30 days
|
||||
</DateRangePickerItem>
|
||||
<DateRangePickerItem key="lastYear" value="lastYear" from={lastYear}>
|
||||
Last year
|
||||
</DateRangePickerItem>
|
||||
<DateRangePickerItem
|
||||
key="allTime"
|
||||
value="allTime"
|
||||
from={new Date(1970, 0, 1)}
|
||||
>
|
||||
All time
|
||||
</DateRangePickerItem>
|
||||
</DateRangePicker>
|
||||
<Button
|
||||
color={"blue"}
|
||||
icon={FiDownloadCloud}
|
||||
size="xs"
|
||||
loading={isLoading}
|
||||
disabled={isLoading}
|
||||
onClick={() => requestReport()}
|
||||
>
|
||||
Generate Report
|
||||
</Button>
|
||||
<p className="mt-1 text-xs">This can take a few minutes.</p>
|
||||
{errorOccurred && (
|
||||
<ErrorCallout
|
||||
errorTitle="Something went wrong."
|
||||
errorMsg={errorOccurred?.toString()}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const USAGE_REPORT_URL = "/api/admin/usage-report";
|
||||
|
||||
function UsageReportsTable() {
|
||||
const [page, setPage] = useState(1);
|
||||
const NUM_IN_PAGE = 10;
|
||||
|
||||
const {
|
||||
data: usageReportsMetadata,
|
||||
error: usageReportsError,
|
||||
isLoading: usageReportsIsLoading,
|
||||
} = useSWR<UsageReport[]>(USAGE_REPORT_URL, errorHandlingFetcher);
|
||||
|
||||
const paginatedReports = usageReportsMetadata
|
||||
? usageReportsMetadata
|
||||
.slice(0)
|
||||
.reverse()
|
||||
.slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
|
||||
: [];
|
||||
|
||||
const totalPages = usageReportsMetadata
|
||||
? Math.ceil(usageReportsMetadata.length / NUM_IN_PAGE)
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Title className="mb-2 mt-6 mx-auto"> Previous Reports </Title>
|
||||
{usageReportsIsLoading ? (
|
||||
<div className="flex justify-center w-full">
|
||||
<ThreeDotsLoader />
|
||||
</div>
|
||||
) : usageReportsError ? (
|
||||
<ErrorCallout
|
||||
errorTitle="Something went wrong."
|
||||
errorMsg={(usageReportsError as Error).toString()}
|
||||
/>
|
||||
) : (
|
||||
<>
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableHeaderCell>Report</TableHeaderCell>
|
||||
<TableHeaderCell>Period</TableHeaderCell>
|
||||
<TableHeaderCell>Generated By</TableHeaderCell>
|
||||
<TableHeaderCell>Time Generated</TableHeaderCell>
|
||||
<TableHeaderCell>Download</TableHeaderCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
|
||||
<TableBody>
|
||||
{paginatedReports.map((r) => (
|
||||
<TableRow key={r.report_name}>
|
||||
<TableCell>
|
||||
{r.report_name.split("_")[1]?.substring(0, 8) ||
|
||||
r.report_name.substring(0, 8)}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
{r.period_from
|
||||
? `${humanReadableFormat(
|
||||
r.period_from
|
||||
)} - ${humanReadableFormat(r.period_to!)}`
|
||||
: "All time"}
|
||||
</TableCell>
|
||||
<TableCell>{r.requestor ?? "Auto generated"}</TableCell>
|
||||
<TableCell>
|
||||
{humanReadableFormatWithTime(r.time_created)}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Link
|
||||
href={`/api/admin/usage-report/${r.report_name}`}
|
||||
className="flex justify-center"
|
||||
>
|
||||
<FiDownload color="primary" />
|
||||
</Link>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
<div className="mt-3 flex">
|
||||
<div className="mx-auto">
|
||||
<PageSelector
|
||||
totalPages={totalPages}
|
||||
currentPage={page}
|
||||
onPageChange={(newPage) => {
|
||||
setPage(newPage);
|
||||
window.scrollTo({
|
||||
top: 0,
|
||||
left: 0,
|
||||
behavior: "smooth",
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function UsageReports() {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<GenerateReportInput />
|
||||
<Divider />
|
||||
<UsageReportsTable />
|
||||
</div>
|
||||
);
|
||||
}
|
@ -8,6 +8,8 @@ import { BarChartIcon } from "@/components/icons/icons";
|
||||
import { useTimeRange } from "../lib";
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
import { FiActivity } from "react-icons/fi";
|
||||
import UsageReports from "./UsageReports";
|
||||
import { Divider } from "@tremor/react";
|
||||
|
||||
export default function AnalyticsPage() {
|
||||
const [timeRange, setTimeRange] = useTimeRange();
|
||||
@ -25,6 +27,8 @@ export default function AnalyticsPage() {
|
||||
<QueryPerformanceChart timeRange={timeRange} />
|
||||
<FeedbackChart timeRange={timeRange} />
|
||||
<DanswerBotChart timeRange={timeRange} />
|
||||
<Divider />
|
||||
<UsageReports />
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
@ -52,3 +52,11 @@ export interface ChatSessionMinimal {
|
||||
time_created: string;
|
||||
feedback_type: Feedback | "mixed" | null;
|
||||
}
|
||||
|
||||
export interface UsageReport {
|
||||
report_name: string;
|
||||
requestor: string | null;
|
||||
time_created: string;
|
||||
period_from: string | null;
|
||||
period_to: string | null;
|
||||
}
|
||||
|
@ -75,3 +75,21 @@ export function humanReadableFormat(dateString: string): string {
|
||||
// Format the date and return it
|
||||
return formatter.format(date);
|
||||
}
|
||||
|
||||
export function humanReadableFormatWithTime(datetimeString: string): string {
|
||||
// Create a Date object from the dateString
|
||||
const date = new Date(datetimeString);
|
||||
|
||||
// Use Intl.DateTimeFormat to format the date
|
||||
// Specify the locale as 'en-US' and options for month, day, and year
|
||||
const formatter = new Intl.DateTimeFormat("en-US", {
|
||||
month: "long", // full month name
|
||||
day: "numeric", // numeric day
|
||||
year: "numeric", // numeric year
|
||||
hour: "numeric",
|
||||
minute: "numeric",
|
||||
});
|
||||
|
||||
// Format the date and return it
|
||||
return formatter.format(date);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user