mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-02 08:58:11 +02:00
Reorder Tables for cleaner extending (#800)
This commit is contained in:
parent
8020db9e9a
commit
8954a04602
@ -65,6 +65,11 @@ class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
"""
|
||||
Auth/Authz (users, permissions, access) Tables
|
||||
"""
|
||||
|
||||
|
||||
class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
|
||||
# even an almost empty token from keycloak will not fit the default 1024 bytes
|
||||
access_token: Mapped[str] = mapped_column(Text, nullable=False) # type: ignore
|
||||
@ -93,7 +98,7 @@ class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):
|
||||
|
||||
|
||||
"""
|
||||
Association tables
|
||||
Association Tables
|
||||
NOTE: must be at the top since they are referenced by other tables
|
||||
"""
|
||||
|
||||
@ -186,6 +191,49 @@ class ConnectorCredentialPair(Base):
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Documents/Indexing Tables
|
||||
"""
|
||||
|
||||
|
||||
class Document(Base):
|
||||
__tablename__ = "document"
|
||||
|
||||
# this should correspond to the ID of the document
|
||||
# (as is passed around in Danswer)
|
||||
id: Mapped[str] = mapped_column(String, primary_key=True)
|
||||
from_ingestion_api: Mapped[bool] = mapped_column(
|
||||
Boolean, default=False, nullable=True
|
||||
)
|
||||
# 0 for neutral, positive for mostly endorse, negative for mostly reject
|
||||
boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST)
|
||||
hidden: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
semantic_id: Mapped[str] = mapped_column(String)
|
||||
# First Section's link
|
||||
link: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
# The updated time is also used as a measure of the last successful state of the doc
|
||||
# pulled from the source (to help skip reindexing already updated docs in case of
|
||||
# connector retries)
|
||||
doc_updated_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
# The following are not attached to User because the account/email may not be known
|
||||
# within Danswer
|
||||
# Something like the document creator
|
||||
primary_owners: Mapped[list[str] | None] = mapped_column(
|
||||
postgresql.ARRAY(String), nullable=True
|
||||
)
|
||||
# Something like assignee or space owner
|
||||
secondary_owners: Mapped[list[str] | None] = mapped_column(
|
||||
postgresql.ARRAY(String), nullable=True
|
||||
)
|
||||
# TODO if more sensitive data is added here for display, make sure to add user/group permission
|
||||
|
||||
retrieval_feedbacks: Mapped[List["DocumentRetrievalFeedback"]] = relationship(
|
||||
"DocumentRetrievalFeedback", back_populates="document"
|
||||
)
|
||||
|
||||
|
||||
class Connector(Base):
|
||||
__tablename__ = "connector"
|
||||
|
||||
@ -316,8 +364,7 @@ class IndexAttempt(Base):
|
||||
|
||||
|
||||
class DocumentByConnectorCredentialPair(Base):
|
||||
"""Represents an indexing of a document by a specific connector / credential
|
||||
pair"""
|
||||
"""Represents an indexing of a document by a specific connector / credential pair"""
|
||||
|
||||
__tablename__ = "document_by_connector_credential_pair"
|
||||
|
||||
@ -338,6 +385,11 @@ class DocumentByConnectorCredentialPair(Base):
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Messages Tables
|
||||
"""
|
||||
|
||||
|
||||
class QueryEvent(Base):
|
||||
__tablename__ = "query_event"
|
||||
|
||||
@ -374,92 +426,6 @@ class QueryEvent(Base):
|
||||
)
|
||||
|
||||
|
||||
class DocumentRetrievalFeedback(Base):
|
||||
__tablename__ = "document_retrieval_feedback"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
qa_event_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("query_event.id"),
|
||||
)
|
||||
document_id: Mapped[str] = mapped_column(
|
||||
ForeignKey("document.id"),
|
||||
)
|
||||
# How high up this document is in the results, 1 for first
|
||||
document_rank: Mapped[int] = mapped_column(Integer)
|
||||
clicked: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
feedback: Mapped[SearchFeedbackType | None] = mapped_column(
|
||||
Enum(SearchFeedbackType), nullable=True
|
||||
)
|
||||
|
||||
qa_event: Mapped[QueryEvent] = relationship(
|
||||
"QueryEvent", back_populates="document_feedbacks"
|
||||
)
|
||||
document: Mapped["Document"] = relationship(
|
||||
"Document", back_populates="retrieval_feedbacks"
|
||||
)
|
||||
|
||||
|
||||
class Document(Base):
|
||||
__tablename__ = "document"
|
||||
|
||||
# this should correspond to the ID of the document
|
||||
# (as is passed around in Danswer)
|
||||
id: Mapped[str] = mapped_column(String, primary_key=True)
|
||||
from_ingestion_api: Mapped[bool] = mapped_column(
|
||||
Boolean, default=False, nullable=True
|
||||
)
|
||||
# 0 for neutral, positive for mostly endorse, negative for mostly reject
|
||||
boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST)
|
||||
hidden: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
semantic_id: Mapped[str] = mapped_column(String)
|
||||
# First Section's link
|
||||
link: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
# The updated time is also used as a measure of the last successful state of the doc
|
||||
# pulled from the source (to help skip reindexing already updated docs in case of
|
||||
# connector retries)
|
||||
doc_updated_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
# The following are not attached to User because the account/email may not be known
|
||||
# within Danswer
|
||||
# Something like the document creator
|
||||
primary_owners: Mapped[list[str] | None] = mapped_column(
|
||||
postgresql.ARRAY(String), nullable=True
|
||||
)
|
||||
# Something like assignee or space owner
|
||||
secondary_owners: Mapped[list[str] | None] = mapped_column(
|
||||
postgresql.ARRAY(String), nullable=True
|
||||
)
|
||||
# TODO if more sensitive data is added here for display, make sure to add user/group permission
|
||||
|
||||
retrieval_feedbacks: Mapped[List[DocumentRetrievalFeedback]] = relationship(
|
||||
"DocumentRetrievalFeedback", back_populates="document"
|
||||
)
|
||||
|
||||
|
||||
class DocumentSet(Base):
|
||||
__tablename__ = "document_set"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String, unique=True)
|
||||
description: Mapped[str] = mapped_column(String)
|
||||
user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
|
||||
# whether or not changes to the document set have been propogated
|
||||
is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
|
||||
connector_credential_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(
|
||||
"ConnectorCredentialPair",
|
||||
secondary=DocumentSet__ConnectorCredentialPair.__table__,
|
||||
back_populates="document_sets",
|
||||
overlaps="document_set",
|
||||
)
|
||||
personas: Mapped[list["Persona"]] = relationship(
|
||||
"Persona",
|
||||
secondary=Persona__DocumentSet.__table__,
|
||||
back_populates="document_sets",
|
||||
)
|
||||
|
||||
|
||||
class ChatSession(Base):
|
||||
__tablename__ = "chat_session"
|
||||
|
||||
@ -487,6 +453,129 @@ class ChatSession(Base):
|
||||
persona: Mapped[Optional["Persona"]] = relationship("Persona")
|
||||
|
||||
|
||||
class ChatMessage(Base):
|
||||
__tablename__ = "chat_message"
|
||||
|
||||
chat_session_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("chat_session.id"), primary_key=True
|
||||
)
|
||||
message_number: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
edit_number: Mapped[int] = mapped_column(Integer, default=0, primary_key=True)
|
||||
parent_edit_number: Mapped[int | None] = mapped_column(
|
||||
Integer, nullable=True
|
||||
) # null if first message
|
||||
latest: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
message: Mapped[str] = mapped_column(Text)
|
||||
token_count: Mapped[int] = mapped_column(Integer)
|
||||
message_type: Mapped[MessageType] = mapped_column(Enum(MessageType))
|
||||
reference_docs: Mapped[dict[str, Any] | None] = mapped_column(
|
||||
postgresql.JSONB(), nullable=True
|
||||
)
|
||||
persona_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id"), nullable=True
|
||||
)
|
||||
time_sent: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
chat_session: Mapped[ChatSession] = relationship("ChatSession")
|
||||
persona: Mapped[Optional["Persona"]] = relationship("Persona")
|
||||
|
||||
|
||||
"""
|
||||
Feedback, Logging, Metrics Tables
|
||||
"""
|
||||
|
||||
|
||||
class DocumentRetrievalFeedback(Base):
|
||||
__tablename__ = "document_retrieval_feedback"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
qa_event_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("query_event.id"),
|
||||
)
|
||||
document_id: Mapped[str] = mapped_column(
|
||||
ForeignKey("document.id"),
|
||||
)
|
||||
# How high up this document is in the results, 1 for first
|
||||
document_rank: Mapped[int] = mapped_column(Integer)
|
||||
clicked: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
feedback: Mapped[SearchFeedbackType | None] = mapped_column(
|
||||
Enum(SearchFeedbackType), nullable=True
|
||||
)
|
||||
|
||||
qa_event: Mapped[QueryEvent] = relationship(
|
||||
"QueryEvent", back_populates="document_feedbacks"
|
||||
)
|
||||
document: Mapped[Document] = relationship(
|
||||
"Document", back_populates="retrieval_feedbacks"
|
||||
)
|
||||
|
||||
|
||||
class ChatMessageFeedback(Base):
|
||||
__tablename__ = "chat_feedback"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
chat_message_chat_session_id: Mapped[int] = mapped_column(Integer)
|
||||
chat_message_message_number: Mapped[int] = mapped_column(Integer)
|
||||
chat_message_edit_number: Mapped[int] = mapped_column(Integer)
|
||||
is_positive: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||
feedback_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
ForeignKeyConstraint(
|
||||
[
|
||||
"chat_message_chat_session_id",
|
||||
"chat_message_message_number",
|
||||
"chat_message_edit_number",
|
||||
],
|
||||
[
|
||||
"chat_message.chat_session_id",
|
||||
"chat_message.message_number",
|
||||
"chat_message.edit_number",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
chat_message: Mapped[ChatMessage] = relationship(
|
||||
"ChatMessage",
|
||||
foreign_keys=[
|
||||
chat_message_chat_session_id,
|
||||
chat_message_message_number,
|
||||
chat_message_edit_number,
|
||||
],
|
||||
backref="feedbacks",
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Structures, Organizational, Configurations Tables
|
||||
"""
|
||||
|
||||
|
||||
class DocumentSet(Base):
|
||||
__tablename__ = "document_set"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String, unique=True)
|
||||
description: Mapped[str] = mapped_column(String)
|
||||
user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
|
||||
# whether or not changes to the document set have been propagated
|
||||
is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
|
||||
connector_credential_pairs: Mapped[list[ConnectorCredentialPair]] = relationship(
|
||||
"ConnectorCredentialPair",
|
||||
secondary=DocumentSet__ConnectorCredentialPair.__table__,
|
||||
back_populates="document_sets",
|
||||
overlaps="document_set",
|
||||
)
|
||||
personas: Mapped[list["Persona"]] = relationship(
|
||||
"Persona",
|
||||
secondary=Persona__DocumentSet.__table__,
|
||||
back_populates="document_sets",
|
||||
)
|
||||
|
||||
|
||||
class ToolInfo(TypedDict):
|
||||
name: str
|
||||
description: str
|
||||
@ -537,71 +626,6 @@ class Persona(Base):
|
||||
)
|
||||
|
||||
|
||||
class ChatMessage(Base):
|
||||
__tablename__ = "chat_message"
|
||||
|
||||
chat_session_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("chat_session.id"), primary_key=True
|
||||
)
|
||||
message_number: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
edit_number: Mapped[int] = mapped_column(Integer, default=0, primary_key=True)
|
||||
parent_edit_number: Mapped[int | None] = mapped_column(
|
||||
Integer, nullable=True
|
||||
) # null if first message
|
||||
latest: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
message: Mapped[str] = mapped_column(Text)
|
||||
token_count: Mapped[int] = mapped_column(Integer)
|
||||
message_type: Mapped[MessageType] = mapped_column(Enum(MessageType))
|
||||
reference_docs: Mapped[dict[str, Any] | None] = mapped_column(
|
||||
postgresql.JSONB(), nullable=True
|
||||
)
|
||||
persona_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id"), nullable=True
|
||||
)
|
||||
time_sent: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
chat_session: Mapped[ChatSession] = relationship("ChatSession")
|
||||
persona: Mapped[Persona | None] = relationship("Persona")
|
||||
|
||||
|
||||
class ChatMessageFeedback(Base):
|
||||
__tablename__ = "chat_feedback"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
chat_message_chat_session_id: Mapped[int] = mapped_column(Integer)
|
||||
chat_message_message_number: Mapped[int] = mapped_column(Integer)
|
||||
chat_message_edit_number: Mapped[int] = mapped_column(Integer)
|
||||
is_positive: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||
feedback_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
__table_args__ = (
|
||||
ForeignKeyConstraint(
|
||||
[
|
||||
"chat_message_chat_session_id",
|
||||
"chat_message_message_number",
|
||||
"chat_message_edit_number",
|
||||
],
|
||||
[
|
||||
"chat_message.chat_session_id",
|
||||
"chat_message.message_number",
|
||||
"chat_message.edit_number",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
chat_message: Mapped[ChatMessage] = relationship(
|
||||
"ChatMessage",
|
||||
foreign_keys=[
|
||||
chat_message_chat_session_id,
|
||||
chat_message_message_number,
|
||||
chat_message_edit_number,
|
||||
],
|
||||
backref="feedbacks",
|
||||
)
|
||||
|
||||
|
||||
AllowedAnswerFilters = (
|
||||
Literal["well_answered_postfilter"] | Literal["questionmark_prefilter"]
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user