mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
Bugfix/query history notes (#4204)
* early work in progress * rename utility script * move actual data seeding to a shareable function * add test * make the test pass with the fix * fix comment * slight improvements and notes to query history and seeding * update test --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
parent
61ccba82a9
commit
9217243e3e
@ -27,6 +27,8 @@ def get_empty_chat_messages_entries__paginated(
|
||||
first element is the most recent timestamp out of the sessions iterated
|
||||
- this timestamp can be used to paginate forward in time
|
||||
second element is a list of messages belonging to all the sessions iterated
|
||||
|
||||
Only messages of type USER are returned
|
||||
"""
|
||||
chat_sessions = fetch_chat_sessions_eagerly_by_time(
|
||||
start=period[0],
|
||||
|
@ -48,10 +48,15 @@ def fetch_and_process_chat_session_history(
|
||||
feedback_type: QAFeedbackType | None,
|
||||
limit: int | None = 500,
|
||||
) -> list[ChatSessionSnapshot]:
|
||||
# observed to be slow a scale of 8192 sessions and 4 messages per session
|
||||
|
||||
# this is a little slow (5 seconds)
|
||||
chat_sessions = fetch_chat_sessions_eagerly_by_time(
|
||||
start=start, end=end, db_session=db_session, limit=limit
|
||||
)
|
||||
|
||||
# this is VERY slow (80 seconds) due to create_chat_chain being called
|
||||
# for each session. Needs optimizing.
|
||||
chat_session_snapshots = [
|
||||
snapshot_from_chat_session(chat_session=chat_session, db_session=db_session)
|
||||
for chat_session in chat_sessions
|
||||
@ -246,6 +251,8 @@ def get_query_history_as_csv(
|
||||
detail="Query history has been disabled by the administrator.",
|
||||
)
|
||||
|
||||
# this call is very expensive and is timing out via endpoint
|
||||
# TODO: optimize call and/or generate via background task
|
||||
complete_chat_session_history = fetch_and_process_chat_session_history(
|
||||
db_session=db_session,
|
||||
start=start or datetime.fromtimestamp(0, tz=timezone.utc),
|
||||
|
@ -1,6 +1,7 @@
|
||||
import random
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from logging import getLogger
|
||||
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.db.chat import create_chat_session
|
||||
@ -9,6 +10,8 @@ from onyx.db.chat import get_or_create_root_message
|
||||
from onyx.db.engine import get_session_with_current_tenant
|
||||
from onyx.db.models import ChatSession
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
|
||||
"""Utility function to seed chat history for testing.
|
||||
@ -19,12 +22,18 @@ def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
|
||||
the times.
|
||||
"""
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
logger.info(f"Seeding {num_sessions} sessions.")
|
||||
for y in range(0, num_sessions):
|
||||
create_chat_session(db_session, f"pytest_session_{y}", None, None)
|
||||
|
||||
# randomize all session times
|
||||
logger.info(f"Seeding {num_messages} messages per session.")
|
||||
rows = db_session.query(ChatSession).all()
|
||||
for row in rows:
|
||||
for x in range(0, len(rows)):
|
||||
if x % 1024 == 0:
|
||||
logger.info(f"Seeded messages for {x} sessions so far.")
|
||||
|
||||
row = rows[x]
|
||||
row.time_created = datetime.utcnow() - timedelta(
|
||||
days=random.randint(0, days)
|
||||
)
|
||||
@ -34,20 +43,37 @@ def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
|
||||
|
||||
root_message = get_or_create_root_message(row.id, db_session)
|
||||
|
||||
current_message_type = MessageType.USER
|
||||
parent_message = root_message
|
||||
for x in range(0, num_messages):
|
||||
if current_message_type == MessageType.USER:
|
||||
msg = f"pytest_message_user_{x}"
|
||||
else:
|
||||
msg = f"pytest_message_assistant_{x}"
|
||||
|
||||
chat_message = create_new_chat_message(
|
||||
row.id,
|
||||
root_message,
|
||||
f"pytest_message_{x}",
|
||||
parent_message,
|
||||
msg,
|
||||
None,
|
||||
0,
|
||||
MessageType.USER,
|
||||
current_message_type,
|
||||
db_session,
|
||||
)
|
||||
|
||||
chat_message.time_sent = row.time_created + timedelta(
|
||||
minutes=random.randint(0, 10)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
db_session.commit()
|
||||
|
||||
current_message_type = (
|
||||
MessageType.ASSISTANT
|
||||
if current_message_type == MessageType.USER
|
||||
else MessageType.USER
|
||||
)
|
||||
parent_message = chat_message
|
||||
|
||||
db_session.commit()
|
||||
|
||||
logger.info(f"Seeded messages for {len(rows)} sessions. Finished.")
|
||||
|
@ -10,7 +10,9 @@ from onyx.db.seeding.chat_history_seeding import seed_chat_history
|
||||
def test_usage_reports(reset: None) -> None:
|
||||
EXPECTED_SESSIONS = 2048
|
||||
MESSAGES_PER_SESSION = 4
|
||||
EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION
|
||||
|
||||
# divide by 2 because only messages of type USER are returned
|
||||
EXPECTED_MESSAGES = EXPECTED_SESSIONS * MESSAGES_PER_SESSION / 2
|
||||
|
||||
seed_chat_history(EXPECTED_SESSIONS, MESSAGES_PER_SESSION, 90)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user