mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-20 04:50:57 +02:00
Improve slack flow
This commit is contained in:
parent
0b0665044f
commit
7869f23e12
@ -0,0 +1,39 @@
|
|||||||
|
"""Add slack bot display type
|
||||||
|
|
||||||
|
Revision ID: fcd135795f21
|
||||||
|
Revises: 0a2b51deb0b8
|
||||||
|
Create Date: 2024-03-04 17:03:27.116284
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "fcd135795f21"
|
||||||
|
down_revision = "0a2b51deb0b8"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"slack_bot_config",
|
||||||
|
sa.Column(
|
||||||
|
"response_type",
|
||||||
|
sa.Enum(
|
||||||
|
"QUOTES",
|
||||||
|
"CITATIONS",
|
||||||
|
name="slackbotresponsetype",
|
||||||
|
native_enum=False,
|
||||||
|
),
|
||||||
|
nullable=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
|
||||||
|
)
|
||||||
|
op.alter_column("slack_bot_config", "response_type", nullable=False)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("slack_bot_config", "response_type")
|
@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from datetime import datetime
|
from collections.abc import Sequence
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
@ -16,7 +16,6 @@ from danswer.chat.models import DanswerAnswerPiece
|
|||||||
from danswer.chat.models import LlmDoc
|
from danswer.chat.models import LlmDoc
|
||||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
from danswer.configs.chat_configs import STOP_STREAM_PAT
|
from danswer.configs.chat_configs import STOP_STREAM_PAT
|
||||||
from danswer.configs.constants import DocumentSource
|
|
||||||
from danswer.configs.constants import IGNORE_FOR_QA
|
from danswer.configs.constants import IGNORE_FOR_QA
|
||||||
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
|
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||||
from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
|
from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
|
||||||
@ -34,13 +33,12 @@ from danswer.llm.utils import tokenizer_trim_content
|
|||||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||||
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
|
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
|
||||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
|
||||||
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
|
|
||||||
from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT
|
from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT
|
||||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||||
from danswer.prompts.constants import CODE_BLOCK_PAT
|
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||||
from danswer.prompts.constants import TRIPLE_BACKTICK
|
from danswer.prompts.constants import TRIPLE_BACKTICK
|
||||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||||
|
from danswer.prompts.prompt_utils import build_task_prompt_reminders
|
||||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||||
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
|
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
|
||||||
from danswer.prompts.token_counts import (
|
from danswer.prompts.token_counts import (
|
||||||
@ -53,68 +51,6 @@ from danswer.utils.logger import setup_logger
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
# Maps connector enum string to a more natural language representation for the LLM
|
|
||||||
# If not on the list, uses the original but slightly cleaned up, see below
|
|
||||||
CONNECTOR_NAME_MAP = {
|
|
||||||
"web": "Website",
|
|
||||||
"requesttracker": "Request Tracker",
|
|
||||||
"github": "GitHub",
|
|
||||||
"file": "File Upload",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def clean_up_source(source_str: str) -> str:
|
|
||||||
if source_str in CONNECTOR_NAME_MAP:
|
|
||||||
return CONNECTOR_NAME_MAP[source_str]
|
|
||||||
return source_str.replace("_", " ").title()
|
|
||||||
|
|
||||||
|
|
||||||
def build_doc_context_str(
|
|
||||||
semantic_identifier: str,
|
|
||||||
source_type: DocumentSource,
|
|
||||||
content: str,
|
|
||||||
metadata_dict: dict[str, str | list[str]],
|
|
||||||
updated_at: datetime | None,
|
|
||||||
ind: int,
|
|
||||||
include_metadata: bool = True,
|
|
||||||
) -> str:
|
|
||||||
context_str = ""
|
|
||||||
if include_metadata:
|
|
||||||
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
|
|
||||||
context_str += f"Source: {clean_up_source(source_type)}\n"
|
|
||||||
|
|
||||||
for k, v in metadata_dict.items():
|
|
||||||
if isinstance(v, list):
|
|
||||||
v_str = ", ".join(v)
|
|
||||||
context_str += f"{k.capitalize()}: {v_str}\n"
|
|
||||||
else:
|
|
||||||
context_str += f"{k.capitalize()}: {v}\n"
|
|
||||||
|
|
||||||
if updated_at:
|
|
||||||
update_str = updated_at.strftime("%B %d, %Y %H:%M")
|
|
||||||
context_str += f"Updated: {update_str}\n"
|
|
||||||
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
|
|
||||||
return context_str
|
|
||||||
|
|
||||||
|
|
||||||
def build_complete_context_str(
|
|
||||||
context_docs: list[LlmDoc | InferenceChunk],
|
|
||||||
include_metadata: bool = True,
|
|
||||||
) -> str:
|
|
||||||
context_str = ""
|
|
||||||
for ind, doc in enumerate(context_docs, start=1):
|
|
||||||
context_str += build_doc_context_str(
|
|
||||||
semantic_identifier=doc.semantic_identifier,
|
|
||||||
source_type=doc.source_type,
|
|
||||||
content=doc.content,
|
|
||||||
metadata_dict=doc.metadata,
|
|
||||||
updated_at=doc.updated_at,
|
|
||||||
ind=ind,
|
|
||||||
include_metadata=include_metadata,
|
|
||||||
)
|
|
||||||
|
|
||||||
return context_str.strip()
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache()
|
@lru_cache()
|
||||||
def build_chat_system_message(
|
def build_chat_system_message(
|
||||||
@ -147,18 +83,6 @@ def build_chat_system_message(
|
|||||||
return system_msg, token_count
|
return system_msg, token_count
|
||||||
|
|
||||||
|
|
||||||
def build_task_prompt_reminders(
|
|
||||||
prompt: Prompt,
|
|
||||||
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
|
|
||||||
citation_str: str = CITATION_REMINDER,
|
|
||||||
language_hint_str: str = LANGUAGE_HINT,
|
|
||||||
) -> str:
|
|
||||||
base_task = prompt.task_prompt
|
|
||||||
citation_or_nothing = citation_str if prompt.include_citations else ""
|
|
||||||
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
|
|
||||||
return base_task + citation_or_nothing + language_hint_or_nothing
|
|
||||||
|
|
||||||
|
|
||||||
def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
|
def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
|
||||||
return LlmDoc(
|
return LlmDoc(
|
||||||
document_id=inf_chunk.document_id,
|
document_id=inf_chunk.document_id,
|
||||||
@ -172,7 +96,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
|
|||||||
|
|
||||||
|
|
||||||
def map_document_id_order(
|
def map_document_id_order(
|
||||||
chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True
|
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
|
||||||
) -> dict[str, int]:
|
) -> dict[str, int]:
|
||||||
order_mapping = {}
|
order_mapping = {}
|
||||||
current = 1 if one_indexed else 0
|
current = 1 if one_indexed else 0
|
||||||
@ -568,6 +492,63 @@ def extract_citations_from_stream(
|
|||||||
yield DanswerAnswerPiece(answer_piece=curr_segment)
|
yield DanswerAnswerPiece(answer_piece=curr_segment)
|
||||||
|
|
||||||
|
|
||||||
|
def reorganize_citations(
|
||||||
|
answer: str, citations: list[CitationInfo]
|
||||||
|
) -> tuple[str, list[CitationInfo]]:
|
||||||
|
"""For a complete, citation-aware response, we want to reorganize the citations so that
|
||||||
|
they are in the order of the documents that were used in the response. This just looks nicer / avoids
|
||||||
|
confusion ("Why is there [7] when only 2 documents are cited?")."""
|
||||||
|
|
||||||
|
# Regular expression to find all instances of [[x]](LINK)
|
||||||
|
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
|
||||||
|
|
||||||
|
all_citation_matches = re.findall(pattern, answer)
|
||||||
|
|
||||||
|
new_citation_info: dict[int, CitationInfo] = {}
|
||||||
|
for citation_match in all_citation_matches:
|
||||||
|
try:
|
||||||
|
citation_num = int(citation_match[0])
|
||||||
|
if citation_num in new_citation_info:
|
||||||
|
continue
|
||||||
|
|
||||||
|
matching_citation = next(
|
||||||
|
iter([c for c in citations if c.citation_num == int(citation_num)]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if matching_citation is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_citation_info[citation_num] = CitationInfo(
|
||||||
|
citation_num=len(new_citation_info) + 1,
|
||||||
|
document_id=matching_citation.document_id,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Function to replace citations with their new number
|
||||||
|
def slack_link_format(match: re.Match) -> str:
|
||||||
|
link_text = match.group(1)
|
||||||
|
try:
|
||||||
|
citation_num = int(link_text)
|
||||||
|
if citation_num in new_citation_info:
|
||||||
|
link_text = new_citation_info[citation_num].citation_num
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
link_url = match.group(2)
|
||||||
|
return f"[[{link_text}]]({link_url})"
|
||||||
|
|
||||||
|
# Substitute all matches in the input text
|
||||||
|
new_answer = re.sub(pattern, slack_link_format, answer)
|
||||||
|
|
||||||
|
# if any citations weren't parsable, just add them back to be safe
|
||||||
|
for citation in citations:
|
||||||
|
if citation.citation_num not in new_citation_info:
|
||||||
|
new_citation_info[citation.citation_num] = citation
|
||||||
|
|
||||||
|
return new_answer, list(new_citation_info.values())
|
||||||
|
|
||||||
|
|
||||||
def get_prompt_tokens(prompt: Prompt) -> int:
|
def get_prompt_tokens(prompt: Prompt) -> int:
|
||||||
# Note: currently custom prompts do not allow datetime aware, only default prompts
|
# Note: currently custom prompts do not allow datetime aware, only default prompts
|
||||||
return (
|
return (
|
||||||
|
@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
|
|||||||
|
|
||||||
from danswer.chat.chat_utils import build_chat_system_message
|
from danswer.chat.chat_utils import build_chat_system_message
|
||||||
from danswer.chat.chat_utils import build_chat_user_message
|
from danswer.chat.chat_utils import build_chat_user_message
|
||||||
from danswer.chat.chat_utils import build_doc_context_str
|
|
||||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||||
from danswer.chat.chat_utils import compute_max_llm_input_tokens
|
from danswer.chat.chat_utils import compute_max_llm_input_tokens
|
||||||
from danswer.chat.chat_utils import create_chat_chain
|
from danswer.chat.chat_utils import create_chat_chain
|
||||||
@ -51,6 +50,7 @@ from danswer.llm.utils import get_default_llm_version
|
|||||||
from danswer.llm.utils import get_max_input_tokens
|
from danswer.llm.utils import get_max_input_tokens
|
||||||
from danswer.llm.utils import tokenizer_trim_content
|
from danswer.llm.utils import tokenizer_trim_content
|
||||||
from danswer.llm.utils import translate_history_to_basemessages
|
from danswer.llm.utils import translate_history_to_basemessages
|
||||||
|
from danswer.prompts.prompt_utils import build_doc_context_str
|
||||||
from danswer.search.models import OptionalSearchSetting
|
from danswer.search.models import OptionalSearchSetting
|
||||||
from danswer.search.models import RetrievalDetails
|
from danswer.search.models import RetrievalDetails
|
||||||
from danswer.search.request_preprocessing import retrieval_preprocessing
|
from danswer.search.request_preprocessing import retrieval_preprocessing
|
||||||
|
@ -52,6 +52,8 @@ ENABLE_DANSWERBOT_REFLEXION = (
|
|||||||
)
|
)
|
||||||
# Currently not support chain of thought, probably will add back later
|
# Currently not support chain of thought, probably will add back later
|
||||||
DANSWER_BOT_DISABLE_COT = True
|
DANSWER_BOT_DISABLE_COT = True
|
||||||
|
# if set, will default DanswerBot to use quotes and reference documents
|
||||||
|
DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true"
|
||||||
|
|
||||||
# Maximum Questions Per Minute, Default Uncapped
|
# Maximum Questions Per Minute, Default Uncapped
|
||||||
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
|
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
|
||||||
|
@ -1,15 +1,20 @@
|
|||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from re import Match
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
import timeago # type: ignore
|
import timeago # type: ignore
|
||||||
from slack_sdk.models.blocks import ActionsBlock
|
from slack_sdk.models.blocks import ActionsBlock
|
||||||
from slack_sdk.models.blocks import Block
|
from slack_sdk.models.blocks import Block
|
||||||
from slack_sdk.models.blocks import ButtonElement
|
from slack_sdk.models.blocks import ButtonElement
|
||||||
|
from slack_sdk.models.blocks import ContextBlock
|
||||||
from slack_sdk.models.blocks import DividerBlock
|
from slack_sdk.models.blocks import DividerBlock
|
||||||
from slack_sdk.models.blocks import HeaderBlock
|
from slack_sdk.models.blocks import HeaderBlock
|
||||||
from slack_sdk.models.blocks import Option
|
from slack_sdk.models.blocks import Option
|
||||||
from slack_sdk.models.blocks import RadioButtonsElement
|
from slack_sdk.models.blocks import RadioButtonsElement
|
||||||
from slack_sdk.models.blocks import SectionBlock
|
from slack_sdk.models.blocks import SectionBlock
|
||||||
|
from slack_sdk.models.blocks.basic_components import MarkdownTextObject
|
||||||
|
from slack_sdk.models.blocks.block_elements import ImageElement
|
||||||
|
|
||||||
from danswer.chat.models import DanswerQuote
|
from danswer.chat.models import DanswerQuote
|
||||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||||
@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
|
|||||||
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
|
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
|
||||||
from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
|
from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
|
||||||
from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
|
from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
|
||||||
|
from danswer.danswerbot.slack.icons import source_to_github_img_link
|
||||||
from danswer.danswerbot.slack.utils import build_feedback_id
|
from danswer.danswerbot.slack.utils import build_feedback_id
|
||||||
from danswer.danswerbot.slack.utils import remove_slack_text_interactions
|
from danswer.danswerbot.slack.utils import remove_slack_text_interactions
|
||||||
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
|
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
|
||||||
@ -29,7 +35,35 @@ from danswer.search.models import SavedSearchDoc
|
|||||||
from danswer.utils.text_processing import decode_escapes
|
from danswer.utils.text_processing import decode_escapes
|
||||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||||
|
|
||||||
_MAX_BLURB_LEN = 75
|
_MAX_BLURB_LEN = 45
|
||||||
|
|
||||||
|
|
||||||
|
def _process_citations_for_slack(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Converts instances of [[x]](LINK) in the input text to Slack's link format <LINK|[x]>.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- text (str): The input string containing markdown links.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- str: The string with markdown links converted to Slack format.
|
||||||
|
"""
|
||||||
|
# Regular expression to find all instances of [[x]](LINK)
|
||||||
|
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
|
||||||
|
|
||||||
|
# Function to replace each found instance with Slack's format
|
||||||
|
def slack_link_format(match: Match) -> str:
|
||||||
|
link_text = match.group(1)
|
||||||
|
link_url = match.group(2)
|
||||||
|
return f"<{link_url}|[{link_text}]>"
|
||||||
|
|
||||||
|
# Substitute all matches in the input text
|
||||||
|
return re.sub(pattern, slack_link_format, text)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_markdown_link_text(text: str) -> str:
|
||||||
|
# Remove any newlines within the text
|
||||||
|
return text.replace("\n", " ").strip()
|
||||||
|
|
||||||
|
|
||||||
def build_qa_feedback_block(message_id: int) -> Block:
|
def build_qa_feedback_block(message_id: int) -> Block:
|
||||||
@ -38,13 +72,12 @@ def build_qa_feedback_block(message_id: int) -> Block:
|
|||||||
elements=[
|
elements=[
|
||||||
ButtonElement(
|
ButtonElement(
|
||||||
action_id=LIKE_BLOCK_ACTION_ID,
|
action_id=LIKE_BLOCK_ACTION_ID,
|
||||||
text="👍",
|
text="👍 Helpful",
|
||||||
style="primary",
|
style="primary",
|
||||||
),
|
),
|
||||||
ButtonElement(
|
ButtonElement(
|
||||||
action_id=DISLIKE_BLOCK_ACTION_ID,
|
action_id=DISLIKE_BLOCK_ACTION_ID,
|
||||||
text="👎",
|
text="👎 Not helpful",
|
||||||
style="danger",
|
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -164,6 +197,80 @@ def build_documents_blocks(
|
|||||||
return section_blocks
|
return section_blocks
|
||||||
|
|
||||||
|
|
||||||
|
def build_sources_blocks(
|
||||||
|
cited_documents: list[tuple[int, SavedSearchDoc]],
|
||||||
|
num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
|
||||||
|
) -> list[Block]:
|
||||||
|
if not cited_documents:
|
||||||
|
return [
|
||||||
|
SectionBlock(
|
||||||
|
text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
seen_docs_identifiers = set()
|
||||||
|
section_blocks: list[Block] = [SectionBlock(text="*Sources:*")]
|
||||||
|
included_docs = 0
|
||||||
|
for citation_num, d in cited_documents:
|
||||||
|
if d.document_id in seen_docs_identifiers:
|
||||||
|
continue
|
||||||
|
seen_docs_identifiers.add(d.document_id)
|
||||||
|
|
||||||
|
doc_sem_id = d.semantic_identifier
|
||||||
|
if d.source_type == DocumentSource.SLACK.value:
|
||||||
|
# for legacy reasons, before the switch to how Slack semantic identifiers are constructed
|
||||||
|
if "#" not in doc_sem_id:
|
||||||
|
doc_sem_id = "#" + doc_sem_id
|
||||||
|
|
||||||
|
# this is needed to try and prevent the line from overflowing
|
||||||
|
# if it does overflow, the image gets placed above the title and it
|
||||||
|
# looks bad
|
||||||
|
doc_sem_id = (
|
||||||
|
doc_sem_id[:_MAX_BLURB_LEN] + "..."
|
||||||
|
if len(doc_sem_id) > _MAX_BLURB_LEN
|
||||||
|
else doc_sem_id
|
||||||
|
)
|
||||||
|
|
||||||
|
owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None
|
||||||
|
days_ago_str = (
|
||||||
|
timeago.format(d.updated_at, datetime.now(pytz.utc))
|
||||||
|
if d.updated_at
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
final_metadata_str = " | ".join(
|
||||||
|
([owner_str] if owner_str else [])
|
||||||
|
+ ([days_ago_str] if days_ago_str else [])
|
||||||
|
)
|
||||||
|
|
||||||
|
document_title = clean_markdown_link_text(doc_sem_id)
|
||||||
|
img_link = source_to_github_img_link(d.source_type)
|
||||||
|
|
||||||
|
section_blocks.append(
|
||||||
|
ContextBlock(
|
||||||
|
elements=(
|
||||||
|
[
|
||||||
|
ImageElement(
|
||||||
|
image_url=img_link,
|
||||||
|
alt_text=f"{d.source_type.value} logo",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
if img_link
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
+ [
|
||||||
|
MarkdownTextObject(
|
||||||
|
text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if included_docs >= num_docs_to_display:
|
||||||
|
break
|
||||||
|
|
||||||
|
return section_blocks
|
||||||
|
|
||||||
|
|
||||||
def build_quotes_block(
|
def build_quotes_block(
|
||||||
quotes: list[DanswerQuote],
|
quotes: list[DanswerQuote],
|
||||||
) -> list[Block]:
|
) -> list[Block]:
|
||||||
@ -214,6 +321,7 @@ def build_qa_response_blocks(
|
|||||||
time_cutoff: datetime | None,
|
time_cutoff: datetime | None,
|
||||||
favor_recent: bool,
|
favor_recent: bool,
|
||||||
skip_quotes: bool = False,
|
skip_quotes: bool = False,
|
||||||
|
process_message_for_citations: bool = False,
|
||||||
skip_ai_feedback: bool = False,
|
skip_ai_feedback: bool = False,
|
||||||
) -> list[Block]:
|
) -> list[Block]:
|
||||||
if DISABLE_GENERATIVE_AI:
|
if DISABLE_GENERATIVE_AI:
|
||||||
@ -221,8 +329,6 @@ def build_qa_response_blocks(
|
|||||||
|
|
||||||
quotes_blocks: list[Block] = []
|
quotes_blocks: list[Block] = []
|
||||||
|
|
||||||
ai_answer_header = HeaderBlock(text="AI Answer")
|
|
||||||
|
|
||||||
filter_block: Block | None = None
|
filter_block: Block | None = None
|
||||||
if time_cutoff or favor_recent or source_filters:
|
if time_cutoff or favor_recent or source_filters:
|
||||||
filter_text = "Filters: "
|
filter_text = "Filters: "
|
||||||
@ -247,6 +353,8 @@ def build_qa_response_blocks(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
answer_processed = decode_escapes(remove_slack_text_interactions(answer))
|
answer_processed = decode_escapes(remove_slack_text_interactions(answer))
|
||||||
|
if process_message_for_citations:
|
||||||
|
answer_processed = _process_citations_for_slack(answer_processed)
|
||||||
answer_block = SectionBlock(text=answer_processed)
|
answer_block = SectionBlock(text=answer_processed)
|
||||||
if quotes:
|
if quotes:
|
||||||
quotes_blocks = build_quotes_block(quotes)
|
quotes_blocks = build_quotes_block(quotes)
|
||||||
@ -259,7 +367,7 @@ def build_qa_response_blocks(
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
response_blocks: list[Block] = [ai_answer_header]
|
response_blocks: list[Block] = []
|
||||||
|
|
||||||
if filter_block is not None:
|
if filter_block is not None:
|
||||||
response_blocks.append(filter_block)
|
response_blocks.append(filter_block)
|
||||||
@ -271,7 +379,6 @@ def build_qa_response_blocks(
|
|||||||
|
|
||||||
if not skip_quotes:
|
if not skip_quotes:
|
||||||
response_blocks.extend(quotes_blocks)
|
response_blocks.extend(quotes_blocks)
|
||||||
response_blocks.append(DividerBlock())
|
|
||||||
|
|
||||||
return response_blocks
|
return response_blocks
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ from typing import TypeVar
|
|||||||
from retry import retry
|
from retry import retry
|
||||||
from slack_sdk import WebClient
|
from slack_sdk import WebClient
|
||||||
from slack_sdk.errors import SlackApiError
|
from slack_sdk.errors import SlackApiError
|
||||||
|
from slack_sdk.models.blocks import DividerBlock
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||||
@ -18,12 +19,14 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANS
|
|||||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
|
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
|
||||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
|
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
|
||||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
|
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
|
||||||
|
from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES
|
||||||
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
|
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
|
||||||
from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT
|
from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT
|
||||||
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
|
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
|
||||||
from danswer.danswerbot.slack.blocks import build_documents_blocks
|
from danswer.danswerbot.slack.blocks import build_documents_blocks
|
||||||
from danswer.danswerbot.slack.blocks import build_follow_up_block
|
from danswer.danswerbot.slack.blocks import build_follow_up_block
|
||||||
from danswer.danswerbot.slack.blocks import build_qa_response_blocks
|
from danswer.danswerbot.slack.blocks import build_qa_response_blocks
|
||||||
|
from danswer.danswerbot.slack.blocks import build_sources_blocks
|
||||||
from danswer.danswerbot.slack.blocks import get_restate_blocks
|
from danswer.danswerbot.slack.blocks import get_restate_blocks
|
||||||
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
|
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
|
||||||
from danswer.danswerbot.slack.models import SlackMessageInfo
|
from danswer.danswerbot.slack.models import SlackMessageInfo
|
||||||
@ -35,6 +38,7 @@ from danswer.danswerbot.slack.utils import SlackRateLimiter
|
|||||||
from danswer.danswerbot.slack.utils import update_emote_react
|
from danswer.danswerbot.slack.utils import update_emote_react
|
||||||
from danswer.db.engine import get_sqlalchemy_engine
|
from danswer.db.engine import get_sqlalchemy_engine
|
||||||
from danswer.db.models import SlackBotConfig
|
from danswer.db.models import SlackBotConfig
|
||||||
|
from danswer.db.models import SlackBotResponseType
|
||||||
from danswer.llm.utils import check_number_of_tokens
|
from danswer.llm.utils import check_number_of_tokens
|
||||||
from danswer.llm.utils import get_default_llm_version
|
from danswer.llm.utils import get_default_llm_version
|
||||||
from danswer.llm.utils import get_max_input_tokens
|
from danswer.llm.utils import get_max_input_tokens
|
||||||
@ -137,6 +141,13 @@ def handle_message(
|
|||||||
|
|
||||||
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
|
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
|
||||||
|
|
||||||
|
# figure out if we want to use citations or quotes
|
||||||
|
use_citations = (
|
||||||
|
not DANSWER_BOT_USE_QUOTES
|
||||||
|
if channel_config is None
|
||||||
|
else channel_config.response_type == SlackBotResponseType.CITATIONS
|
||||||
|
)
|
||||||
|
|
||||||
# List of user id to send message to, if None, send to everyone in channel
|
# List of user id to send message to, if None, send to everyone in channel
|
||||||
send_to: list[str] | None = None
|
send_to: list[str] | None = None
|
||||||
respond_tag_only = False
|
respond_tag_only = False
|
||||||
@ -259,6 +270,7 @@ def handle_message(
|
|||||||
answer_generation_timeout=answer_generation_timeout,
|
answer_generation_timeout=answer_generation_timeout,
|
||||||
enable_reflexion=reflexion,
|
enable_reflexion=reflexion,
|
||||||
bypass_acl=bypass_acl,
|
bypass_acl=bypass_acl,
|
||||||
|
use_citations=use_citations,
|
||||||
)
|
)
|
||||||
if not answer.error_msg:
|
if not answer.error_msg:
|
||||||
return answer
|
return answer
|
||||||
@ -387,7 +399,10 @@ def handle_message(
|
|||||||
source_filters=retrieval_info.applied_source_filters,
|
source_filters=retrieval_info.applied_source_filters,
|
||||||
time_cutoff=retrieval_info.applied_time_cutoff,
|
time_cutoff=retrieval_info.applied_time_cutoff,
|
||||||
favor_recent=retrieval_info.recency_bias_multiplier > 1,
|
favor_recent=retrieval_info.recency_bias_multiplier > 1,
|
||||||
skip_quotes=persona is not None, # currently Personas don't support quotes
|
# currently Personas don't support quotes
|
||||||
|
# if citations are enabled, also don't use quotes
|
||||||
|
skip_quotes=persona is not None or use_citations,
|
||||||
|
process_message_for_citations=use_citations,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get the chunks fed to the LLM only, then fill with other docs
|
# Get the chunks fed to the LLM only, then fill with other docs
|
||||||
@ -397,16 +412,33 @@ def handle_message(
|
|||||||
doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
|
doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
|
||||||
]
|
]
|
||||||
priority_ordered_docs = llm_docs + remaining_docs
|
priority_ordered_docs = llm_docs + remaining_docs
|
||||||
document_blocks = (
|
|
||||||
build_documents_blocks(
|
document_blocks = []
|
||||||
|
citations_block = []
|
||||||
|
# if citations are enabled, only show cited documents
|
||||||
|
if use_citations:
|
||||||
|
citations = answer.citations or []
|
||||||
|
cited_docs = []
|
||||||
|
for citation in citations:
|
||||||
|
matching_doc = next(
|
||||||
|
(d for d in top_docs if d.document_id == citation.document_id),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if matching_doc:
|
||||||
|
cited_docs.append((citation.citation_num, matching_doc))
|
||||||
|
|
||||||
|
cited_docs.sort()
|
||||||
|
citations_block = build_sources_blocks(cited_documents=cited_docs)
|
||||||
|
elif priority_ordered_docs:
|
||||||
|
document_blocks = build_documents_blocks(
|
||||||
documents=priority_ordered_docs,
|
documents=priority_ordered_docs,
|
||||||
message_id=answer.chat_message_id,
|
message_id=answer.chat_message_id,
|
||||||
)
|
)
|
||||||
if priority_ordered_docs
|
document_blocks = [DividerBlock()] + document_blocks
|
||||||
else []
|
|
||||||
)
|
|
||||||
|
|
||||||
all_blocks = restate_question_block + answer_blocks + document_blocks
|
all_blocks = (
|
||||||
|
restate_question_block + answer_blocks + citations_block + document_blocks
|
||||||
|
)
|
||||||
|
|
||||||
if channel_conf and channel_conf.get("follow_up_tags") is not None:
|
if channel_conf and channel_conf.get("follow_up_tags") is not None:
|
||||||
all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id))
|
all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id))
|
||||||
|
58
backend/danswer/danswerbot/slack/icons.py
Normal file
58
backend/danswer/danswerbot/slack/icons.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
|
|
||||||
|
|
||||||
|
def source_to_github_img_link(source: DocumentSource) -> str | None:
|
||||||
|
# TODO: store these images somewhere better
|
||||||
|
if source == DocumentSource.WEB.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png"
|
||||||
|
if source == DocumentSource.FILE.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||||
|
if source == DocumentSource.GOOGLE_SITES.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png"
|
||||||
|
if source == DocumentSource.SLACK.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png"
|
||||||
|
if source == DocumentSource.GMAIL.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png"
|
||||||
|
if source == DocumentSource.GOOGLE_DRIVE.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png"
|
||||||
|
if source == DocumentSource.GITHUB.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png"
|
||||||
|
if source == DocumentSource.GITLAB.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png"
|
||||||
|
if source == DocumentSource.CONFLUENCE.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png"
|
||||||
|
if source == DocumentSource.JIRA.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png"
|
||||||
|
if source == DocumentSource.NOTION.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png"
|
||||||
|
if source == DocumentSource.ZENDESK.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png"
|
||||||
|
if source == DocumentSource.GONG.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png"
|
||||||
|
if source == DocumentSource.LINEAR.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png"
|
||||||
|
if source == DocumentSource.PRODUCTBOARD.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp"
|
||||||
|
if source == DocumentSource.SLAB.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png"
|
||||||
|
if source == DocumentSource.ZULIP.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png"
|
||||||
|
if source == DocumentSource.GURU.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png"
|
||||||
|
if source == DocumentSource.HUBSPOT.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png"
|
||||||
|
if source == DocumentSource.DOCUMENT360.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png"
|
||||||
|
if source == DocumentSource.BOOKSTACK.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png"
|
||||||
|
if source == DocumentSource.LOOPIO.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png"
|
||||||
|
if source == DocumentSource.SHAREPOINT.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png"
|
||||||
|
if source == DocumentSource.REQUESTTRACKER.value:
|
||||||
|
# just use file icon for now
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||||
|
if source == DocumentSource.INGESTION_API.value:
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||||
|
|
||||||
|
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
@ -346,8 +346,12 @@ def read_slack_thread(
|
|||||||
if len(blocks) <= 1:
|
if len(blocks) <= 1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# The useful block is the second one after the header block that says AI Answer
|
# For the old flow, the useful block is the second one after the header block that says AI Answer
|
||||||
message = reply["blocks"][1]["text"]["text"]
|
if reply["blocks"][0]["text"]["text"] == "AI Answer":
|
||||||
|
message = reply["blocks"][1]["text"]["text"]
|
||||||
|
else:
|
||||||
|
# for the new flow, the answer is the first block
|
||||||
|
message = reply["blocks"][0]["text"]["text"]
|
||||||
|
|
||||||
if message.startswith("_Filters"):
|
if message.startswith("_Filters"):
|
||||||
if len(blocks) <= 2:
|
if len(blocks) <= 2:
|
||||||
|
@ -811,6 +811,11 @@ class ChannelConfig(TypedDict):
|
|||||||
follow_up_tags: NotRequired[list[str]]
|
follow_up_tags: NotRequired[list[str]]
|
||||||
|
|
||||||
|
|
||||||
|
class SlackBotResponseType(str, PyEnum):
|
||||||
|
QUOTES = "quotes"
|
||||||
|
CITATIONS = "citations"
|
||||||
|
|
||||||
|
|
||||||
class SlackBotConfig(Base):
|
class SlackBotConfig(Base):
|
||||||
__tablename__ = "slack_bot_config"
|
__tablename__ = "slack_bot_config"
|
||||||
|
|
||||||
@ -822,6 +827,9 @@ class SlackBotConfig(Base):
|
|||||||
channel_config: Mapped[ChannelConfig] = mapped_column(
|
channel_config: Mapped[ChannelConfig] = mapped_column(
|
||||||
postgresql.JSONB(), nullable=False
|
postgresql.JSONB(), nullable=False
|
||||||
)
|
)
|
||||||
|
response_type: Mapped[SlackBotResponseType] = mapped_column(
|
||||||
|
Enum(SlackBotResponseType, native_enum=False), nullable=False
|
||||||
|
)
|
||||||
|
|
||||||
persona: Mapped[Persona | None] = relationship("Persona")
|
persona: Mapped[Persona | None] = relationship("Persona")
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from danswer.db.models import ChannelConfig
|
|||||||
from danswer.db.models import Persona
|
from danswer.db.models import Persona
|
||||||
from danswer.db.models import Persona__DocumentSet
|
from danswer.db.models import Persona__DocumentSet
|
||||||
from danswer.db.models import SlackBotConfig
|
from danswer.db.models import SlackBotConfig
|
||||||
|
from danswer.db.models import SlackBotResponseType
|
||||||
from danswer.search.models import RecencyBiasSetting
|
from danswer.search.models import RecencyBiasSetting
|
||||||
|
|
||||||
|
|
||||||
@ -72,11 +73,13 @@ def create_slack_bot_persona(
|
|||||||
def insert_slack_bot_config(
|
def insert_slack_bot_config(
|
||||||
persona_id: int | None,
|
persona_id: int | None,
|
||||||
channel_config: ChannelConfig,
|
channel_config: ChannelConfig,
|
||||||
|
response_type: SlackBotResponseType,
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
) -> SlackBotConfig:
|
) -> SlackBotConfig:
|
||||||
slack_bot_config = SlackBotConfig(
|
slack_bot_config = SlackBotConfig(
|
||||||
persona_id=persona_id,
|
persona_id=persona_id,
|
||||||
channel_config=channel_config,
|
channel_config=channel_config,
|
||||||
|
response_type=response_type,
|
||||||
)
|
)
|
||||||
db_session.add(slack_bot_config)
|
db_session.add(slack_bot_config)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
@ -88,6 +91,7 @@ def update_slack_bot_config(
|
|||||||
slack_bot_config_id: int,
|
slack_bot_config_id: int,
|
||||||
persona_id: int | None,
|
persona_id: int | None,
|
||||||
channel_config: ChannelConfig,
|
channel_config: ChannelConfig,
|
||||||
|
response_type: SlackBotResponseType,
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
) -> SlackBotConfig:
|
) -> SlackBotConfig:
|
||||||
slack_bot_config = db_session.scalar(
|
slack_bot_config = db_session.scalar(
|
||||||
@ -105,6 +109,7 @@ def update_slack_bot_config(
|
|||||||
# will encounter `violates foreign key constraint` errors
|
# will encounter `violates foreign key constraint` errors
|
||||||
slack_bot_config.persona_id = persona_id
|
slack_bot_config.persona_id = persona_id
|
||||||
slack_bot_config.channel_config = channel_config
|
slack_bot_config.channel_config = channel_config
|
||||||
|
slack_bot_config.response_type = response_type
|
||||||
|
|
||||||
# if the persona has changed, then clean up the old persona
|
# if the persona has changed, then clean up the old persona
|
||||||
if persona_id != existing_persona_id and existing_persona_id:
|
if persona_id != existing_persona_id and existing_persona_id:
|
||||||
|
@ -3,10 +3,18 @@ from collections.abc import Callable
|
|||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
|
from langchain.schema.messages import BaseMessage
|
||||||
|
from langchain.schema.messages import HumanMessage
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from danswer.chat.chat_utils import build_chat_system_message
|
||||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||||
|
from danswer.chat.chat_utils import extract_citations_from_stream
|
||||||
from danswer.chat.chat_utils import get_chunks_for_qa
|
from danswer.chat.chat_utils import get_chunks_for_qa
|
||||||
|
from danswer.chat.chat_utils import llm_doc_from_inference_chunk
|
||||||
|
from danswer.chat.chat_utils import map_document_id_order
|
||||||
|
from danswer.chat.chat_utils import reorganize_citations
|
||||||
|
from danswer.chat.models import CitationInfo
|
||||||
from danswer.chat.models import DanswerAnswerPiece
|
from danswer.chat.models import DanswerAnswerPiece
|
||||||
from danswer.chat.models import DanswerContext
|
from danswer.chat.models import DanswerContext
|
||||||
from danswer.chat.models import DanswerContexts
|
from danswer.chat.models import DanswerContexts
|
||||||
@ -26,16 +34,23 @@ from danswer.db.chat import get_persona_by_id
|
|||||||
from danswer.db.chat import get_prompt_by_id
|
from danswer.db.chat import get_prompt_by_id
|
||||||
from danswer.db.chat import translate_db_message_to_chat_message_detail
|
from danswer.db.chat import translate_db_message_to_chat_message_detail
|
||||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||||
|
from danswer.db.models import Prompt
|
||||||
from danswer.db.models import User
|
from danswer.db.models import User
|
||||||
from danswer.document_index.factory import get_default_document_index
|
from danswer.document_index.factory import get_default_document_index
|
||||||
from danswer.indexing.models import InferenceChunk
|
from danswer.indexing.models import InferenceChunk
|
||||||
|
from danswer.llm.factory import get_default_llm
|
||||||
from danswer.llm.utils import get_default_llm_token_encode
|
from danswer.llm.utils import get_default_llm_token_encode
|
||||||
|
from danswer.llm.utils import get_default_llm_tokenizer
|
||||||
from danswer.one_shot_answer.factory import get_question_answer_model
|
from danswer.one_shot_answer.factory import get_question_answer_model
|
||||||
from danswer.one_shot_answer.models import DirectQARequest
|
from danswer.one_shot_answer.models import DirectQARequest
|
||||||
from danswer.one_shot_answer.models import OneShotQAResponse
|
from danswer.one_shot_answer.models import OneShotQAResponse
|
||||||
from danswer.one_shot_answer.models import QueryRephrase
|
from danswer.one_shot_answer.models import QueryRephrase
|
||||||
|
from danswer.one_shot_answer.models import ThreadMessage
|
||||||
from danswer.one_shot_answer.qa_block import no_gen_ai_response
|
from danswer.one_shot_answer.qa_block import no_gen_ai_response
|
||||||
from danswer.one_shot_answer.qa_utils import combine_message_thread
|
from danswer.one_shot_answer.qa_utils import combine_message_thread
|
||||||
|
from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
|
||||||
|
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||||
|
from danswer.prompts.prompt_utils import build_task_prompt_reminders
|
||||||
from danswer.search.models import RerankMetricsContainer
|
from danswer.search.models import RerankMetricsContainer
|
||||||
from danswer.search.models import RetrievalMetricsContainer
|
from danswer.search.models import RetrievalMetricsContainer
|
||||||
from danswer.search.models import SavedSearchDoc
|
from danswer.search.models import SavedSearchDoc
|
||||||
@ -51,6 +66,118 @@ from danswer.utils.timing import log_generator_function_time
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
AnswerObjectIterator = Iterator[
|
||||||
|
QueryRephrase
|
||||||
|
| QADocsResponse
|
||||||
|
| LLMRelevanceFilterResponse
|
||||||
|
| DanswerAnswerPiece
|
||||||
|
| DanswerQuotes
|
||||||
|
| DanswerContexts
|
||||||
|
| StreamingError
|
||||||
|
| ChatMessageDetail
|
||||||
|
| CitationInfo
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def quote_based_qa(
|
||||||
|
prompt: Prompt,
|
||||||
|
query_message: ThreadMessage,
|
||||||
|
history_str: str,
|
||||||
|
context_chunks: list[InferenceChunk],
|
||||||
|
llm_override: str | None,
|
||||||
|
timeout: int,
|
||||||
|
use_chain_of_thought: bool,
|
||||||
|
return_contexts: bool,
|
||||||
|
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
|
||||||
|
) -> AnswerObjectIterator:
|
||||||
|
qa_model = get_question_answer_model(
|
||||||
|
prompt=prompt,
|
||||||
|
timeout=timeout,
|
||||||
|
chain_of_thought=use_chain_of_thought,
|
||||||
|
llm_version=llm_override,
|
||||||
|
)
|
||||||
|
|
||||||
|
full_prompt_str = (
|
||||||
|
qa_model.build_prompt(
|
||||||
|
query=query_message.message,
|
||||||
|
history_str=history_str,
|
||||||
|
context_chunks=context_chunks,
|
||||||
|
)
|
||||||
|
if qa_model is not None
|
||||||
|
else "Gen AI Disabled"
|
||||||
|
)
|
||||||
|
|
||||||
|
response_packets = (
|
||||||
|
qa_model.answer_question_stream(
|
||||||
|
prompt=full_prompt_str,
|
||||||
|
llm_context_docs=context_chunks,
|
||||||
|
metrics_callback=llm_metrics_callback,
|
||||||
|
)
|
||||||
|
if qa_model is not None
|
||||||
|
else no_gen_ai_response()
|
||||||
|
)
|
||||||
|
|
||||||
|
if qa_model is not None and return_contexts:
|
||||||
|
contexts = DanswerContexts(
|
||||||
|
contexts=[
|
||||||
|
DanswerContext(
|
||||||
|
content=context_chunk.content,
|
||||||
|
document_id=context_chunk.document_id,
|
||||||
|
semantic_identifier=context_chunk.semantic_identifier,
|
||||||
|
blurb=context_chunk.semantic_identifier,
|
||||||
|
)
|
||||||
|
for context_chunk in context_chunks
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
response_packets = itertools.chain(response_packets, [contexts])
|
||||||
|
|
||||||
|
yield from response_packets
|
||||||
|
|
||||||
|
|
||||||
|
def citation_based_qa(
|
||||||
|
prompt: Prompt,
|
||||||
|
query_message: ThreadMessage,
|
||||||
|
history_str: str,
|
||||||
|
context_chunks: list[InferenceChunk],
|
||||||
|
llm_override: str | None,
|
||||||
|
timeout: int,
|
||||||
|
) -> AnswerObjectIterator:
|
||||||
|
llm_tokenizer = get_default_llm_tokenizer()
|
||||||
|
|
||||||
|
system_prompt_or_none, _ = build_chat_system_message(
|
||||||
|
prompt=prompt,
|
||||||
|
context_exists=True,
|
||||||
|
llm_tokenizer_encode_func=llm_tokenizer.encode,
|
||||||
|
)
|
||||||
|
|
||||||
|
task_prompt_with_reminder = build_task_prompt_reminders(prompt)
|
||||||
|
|
||||||
|
context_docs_str = build_complete_context_str(context_chunks)
|
||||||
|
user_message = HumanMessage(
|
||||||
|
content=CITATIONS_PROMPT.format(
|
||||||
|
task_prompt=task_prompt_with_reminder,
|
||||||
|
user_query=query_message.message,
|
||||||
|
history_block=history_str,
|
||||||
|
context_docs_str=context_docs_str,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = get_default_llm(
|
||||||
|
timeout=timeout,
|
||||||
|
gen_ai_model_version_override=llm_override,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm_prompt: list[BaseMessage] = [user_message]
|
||||||
|
if system_prompt_or_none is not None:
|
||||||
|
llm_prompt = [system_prompt_or_none] + llm_prompt
|
||||||
|
|
||||||
|
llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in context_chunks]
|
||||||
|
doc_id_to_rank_map = map_document_id_order(llm_docs)
|
||||||
|
|
||||||
|
tokens = llm.stream(llm_prompt)
|
||||||
|
yield from extract_citations_from_stream(tokens, llm_docs, doc_id_to_rank_map)
|
||||||
|
|
||||||
|
|
||||||
def stream_answer_objects(
|
def stream_answer_objects(
|
||||||
query_req: DirectQARequest,
|
query_req: DirectQARequest,
|
||||||
@ -66,20 +193,12 @@ def stream_answer_objects(
|
|||||||
default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE,
|
default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE,
|
||||||
timeout: int = QA_TIMEOUT,
|
timeout: int = QA_TIMEOUT,
|
||||||
bypass_acl: bool = False,
|
bypass_acl: bool = False,
|
||||||
|
use_citations: bool = False,
|
||||||
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
||||||
| None = None,
|
| None = None,
|
||||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||||
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
|
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
|
||||||
) -> Iterator[
|
) -> AnswerObjectIterator:
|
||||||
QueryRephrase
|
|
||||||
| QADocsResponse
|
|
||||||
| LLMRelevanceFilterResponse
|
|
||||||
| DanswerAnswerPiece
|
|
||||||
| DanswerQuotes
|
|
||||||
| DanswerContexts
|
|
||||||
| StreamingError
|
|
||||||
| ChatMessageDetail
|
|
||||||
]:
|
|
||||||
"""Streams in order:
|
"""Streams in order:
|
||||||
1. [always] Retrieved documents, stops flow if nothing is found
|
1. [always] Retrieved documents, stops flow if nothing is found
|
||||||
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
|
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
|
||||||
@ -216,63 +335,51 @@ def stream_answer_objects(
|
|||||||
persona_id=query_req.persona_id, user_id=user_id, db_session=db_session
|
persona_id=query_req.persona_id, user_id=user_id, db_session=db_session
|
||||||
)
|
)
|
||||||
llm_override = persona.llm_model_version_override
|
llm_override = persona.llm_model_version_override
|
||||||
|
if prompt is None:
|
||||||
qa_model = get_question_answer_model(
|
if not chat_session.persona.prompts:
|
||||||
prompt=prompt,
|
raise RuntimeError(
|
||||||
timeout=timeout,
|
"Persona does not have any prompts - this should never happen"
|
||||||
chain_of_thought=query_req.chain_of_thought,
|
)
|
||||||
llm_version=llm_override,
|
prompt = chat_session.persona.prompts[0]
|
||||||
)
|
|
||||||
|
|
||||||
full_prompt_str = (
|
|
||||||
qa_model.build_prompt(
|
|
||||||
query=query_msg.message, history_str=history_str, context_chunks=llm_chunks
|
|
||||||
)
|
|
||||||
if qa_model is not None
|
|
||||||
else "Gen AI Disabled"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create the first User query message
|
# Create the first User query message
|
||||||
new_user_message = create_new_chat_message(
|
new_user_message = create_new_chat_message(
|
||||||
chat_session_id=chat_session.id,
|
chat_session_id=chat_session.id,
|
||||||
parent_message=root_message,
|
parent_message=root_message,
|
||||||
prompt_id=query_req.prompt_id,
|
prompt_id=query_req.prompt_id,
|
||||||
message=full_prompt_str,
|
message=query_msg.message,
|
||||||
token_count=len(llm_tokenizer(full_prompt_str)),
|
token_count=len(llm_tokenizer(query_msg.message)),
|
||||||
message_type=MessageType.USER,
|
message_type=MessageType.USER,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
commit=True,
|
commit=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
response_packets = (
|
if use_citations:
|
||||||
qa_model.answer_question_stream(
|
qa_stream = citation_based_qa(
|
||||||
prompt=full_prompt_str,
|
prompt=prompt,
|
||||||
llm_context_docs=llm_chunks,
|
query_message=query_msg,
|
||||||
metrics_callback=llm_metrics_callback,
|
history_str=history_str,
|
||||||
|
context_chunks=llm_chunks,
|
||||||
|
llm_override=llm_override,
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
if qa_model is not None
|
else:
|
||||||
else no_gen_ai_response()
|
qa_stream = quote_based_qa(
|
||||||
)
|
prompt=prompt,
|
||||||
|
query_message=query_msg,
|
||||||
if qa_model is not None and query_req.return_contexts:
|
history_str=history_str,
|
||||||
contexts = DanswerContexts(
|
context_chunks=llm_chunks,
|
||||||
contexts=[
|
llm_override=llm_override,
|
||||||
DanswerContext(
|
timeout=timeout,
|
||||||
content=context_doc.content,
|
use_chain_of_thought=False,
|
||||||
document_id=context_doc.document_id,
|
return_contexts=False,
|
||||||
semantic_identifier=context_doc.semantic_identifier,
|
llm_metrics_callback=llm_metrics_callback,
|
||||||
blurb=context_doc.semantic_identifier,
|
|
||||||
)
|
|
||||||
for context_doc in llm_chunks
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
response_packets = itertools.chain(response_packets, [contexts])
|
|
||||||
|
|
||||||
# Capture outputs and errors
|
# Capture outputs and errors
|
||||||
llm_output = ""
|
llm_output = ""
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
for packet in response_packets:
|
for packet in qa_stream:
|
||||||
logger.debug(packet)
|
logger.debug(packet)
|
||||||
|
|
||||||
if isinstance(packet, DanswerAnswerPiece):
|
if isinstance(packet, DanswerAnswerPiece):
|
||||||
@ -333,6 +440,7 @@ def get_search_answer(
|
|||||||
answer_generation_timeout: int = QA_TIMEOUT,
|
answer_generation_timeout: int = QA_TIMEOUT,
|
||||||
enable_reflexion: bool = False,
|
enable_reflexion: bool = False,
|
||||||
bypass_acl: bool = False,
|
bypass_acl: bool = False,
|
||||||
|
use_citations: bool = False,
|
||||||
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
||||||
| None = None,
|
| None = None,
|
||||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||||
@ -348,6 +456,7 @@ def get_search_answer(
|
|||||||
max_history_tokens=max_history_tokens,
|
max_history_tokens=max_history_tokens,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
bypass_acl=bypass_acl,
|
bypass_acl=bypass_acl,
|
||||||
|
use_citations=use_citations,
|
||||||
timeout=answer_generation_timeout,
|
timeout=answer_generation_timeout,
|
||||||
retrieval_metrics_callback=retrieval_metrics_callback,
|
retrieval_metrics_callback=retrieval_metrics_callback,
|
||||||
rerank_metrics_callback=rerank_metrics_callback,
|
rerank_metrics_callback=rerank_metrics_callback,
|
||||||
@ -366,6 +475,11 @@ def get_search_answer(
|
|||||||
qa_response.llm_chunks_indices = packet.relevant_chunk_indices
|
qa_response.llm_chunks_indices = packet.relevant_chunk_indices
|
||||||
elif isinstance(packet, DanswerQuotes):
|
elif isinstance(packet, DanswerQuotes):
|
||||||
qa_response.quotes = packet
|
qa_response.quotes = packet
|
||||||
|
elif isinstance(packet, CitationInfo):
|
||||||
|
if qa_response.citations:
|
||||||
|
qa_response.citations.append(packet)
|
||||||
|
else:
|
||||||
|
qa_response.citations = [packet]
|
||||||
elif isinstance(packet, DanswerContexts):
|
elif isinstance(packet, DanswerContexts):
|
||||||
qa_response.contexts = packet
|
qa_response.contexts = packet
|
||||||
elif isinstance(packet, StreamingError):
|
elif isinstance(packet, StreamingError):
|
||||||
@ -384,4 +498,10 @@ def get_search_answer(
|
|||||||
else:
|
else:
|
||||||
qa_response.answer_valid = True
|
qa_response.answer_valid = True
|
||||||
|
|
||||||
|
if use_citations and qa_response.answer and qa_response.citations:
|
||||||
|
# Reorganize citation nums to be in the same order as the answer
|
||||||
|
qa_response.answer, qa_response.citations = reorganize_citations(
|
||||||
|
qa_response.answer, qa_response.citations
|
||||||
|
)
|
||||||
|
|
||||||
return qa_response
|
return qa_response
|
||||||
|
@ -4,6 +4,7 @@ from pydantic import BaseModel
|
|||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from pydantic import root_validator
|
from pydantic import root_validator
|
||||||
|
|
||||||
|
from danswer.chat.models import CitationInfo
|
||||||
from danswer.chat.models import DanswerContexts
|
from danswer.chat.models import DanswerContexts
|
||||||
from danswer.chat.models import DanswerQuotes
|
from danswer.chat.models import DanswerQuotes
|
||||||
from danswer.chat.models import QADocsResponse
|
from danswer.chat.models import QADocsResponse
|
||||||
@ -51,6 +52,7 @@ class OneShotQAResponse(BaseModel):
|
|||||||
answer: str | None = None
|
answer: str | None = None
|
||||||
rephrase: str | None = None
|
rephrase: str | None = None
|
||||||
quotes: DanswerQuotes | None = None
|
quotes: DanswerQuotes | None = None
|
||||||
|
citations: list[CitationInfo] | None = None
|
||||||
docs: QADocsResponse | None = None
|
docs: QADocsResponse | None = None
|
||||||
llm_chunks_indices: list[int] | None = None
|
llm_chunks_indices: list[int] | None = None
|
||||||
error_msg: str | None = None
|
error_msg: str | None = None
|
||||||
|
@ -4,7 +4,6 @@ from collections.abc import Callable
|
|||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
from danswer.chat.chat_utils import build_complete_context_str
|
|
||||||
from danswer.chat.models import AnswerQuestionStreamReturn
|
from danswer.chat.models import AnswerQuestionStreamReturn
|
||||||
from danswer.chat.models import DanswerAnswer
|
from danswer.chat.models import DanswerAnswer
|
||||||
from danswer.chat.models import DanswerAnswerPiece
|
from danswer.chat.models import DanswerAnswerPiece
|
||||||
@ -33,6 +32,7 @@ from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTE
|
|||||||
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
||||||
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT
|
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT
|
||||||
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT
|
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT
|
||||||
|
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
from danswer.utils.text_processing import clean_up_code_blocks
|
from danswer.utils.text_processing import clean_up_code_blocks
|
||||||
from danswer.utils.text_processing import escape_newlines
|
from danswer.utils.text_processing import escape_newlines
|
||||||
|
@ -17,8 +17,6 @@ Remember to provide inline citations in the format [1], [2], [3], etc.
|
|||||||
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
|
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
|
|
||||||
|
|
||||||
CHAT_USER_PROMPT = f"""
|
CHAT_USER_PROMPT = f"""
|
||||||
Refer to the following context documents when responding to me.{{optional_ignore_statement}}
|
Refer to the following context documents when responding to me.{{optional_ignore_statement}}
|
||||||
CONTEXT:
|
CONTEXT:
|
||||||
|
@ -12,3 +12,18 @@ QUOTE_PAT = "Quote:"
|
|||||||
QUOTES_PAT_PLURAL = "Quotes:"
|
QUOTES_PAT_PLURAL = "Quotes:"
|
||||||
INVALID_PAT = "Invalid:"
|
INVALID_PAT = "Invalid:"
|
||||||
SOURCES_KEY = "sources"
|
SOURCES_KEY = "sources"
|
||||||
|
|
||||||
|
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
|
||||||
|
|
||||||
|
REQUIRE_CITATION_STATEMENT = """
|
||||||
|
Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \
|
||||||
|
DO NOT provide a reference section at the end and DO NOT provide any links following the citations.
|
||||||
|
""".rstrip()
|
||||||
|
|
||||||
|
NO_CITATION_STATEMENT = """
|
||||||
|
Do not provide any citations even if there are examples in the chat history.
|
||||||
|
""".rstrip()
|
||||||
|
|
||||||
|
CITATION_REMINDER = """
|
||||||
|
Remember to provide inline citations in the format [1], [2], [3], etc.
|
||||||
|
"""
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
# It is used also for the one shot direct QA flow
|
# It is used also for the one shot direct QA flow
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||||
from danswer.prompts.constants import FINAL_QUERY_PAT
|
from danswer.prompts.constants import FINAL_QUERY_PAT
|
||||||
from danswer.prompts.constants import GENERAL_SEP_PAT
|
from danswer.prompts.constants import GENERAL_SEP_PAT
|
||||||
from danswer.prompts.constants import QUESTION_PAT
|
from danswer.prompts.constants import QUESTION_PAT
|
||||||
@ -96,6 +97,22 @@ SAMPLE RESPONSE:
|
|||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
# similar to the chat flow, but with the option of including a
|
||||||
|
# "conversation history" block
|
||||||
|
CITATIONS_PROMPT = f"""
|
||||||
|
Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT}
|
||||||
|
CONTEXT:
|
||||||
|
{GENERAL_SEP_PAT}
|
||||||
|
{{context_docs_str}}
|
||||||
|
{GENERAL_SEP_PAT}
|
||||||
|
|
||||||
|
{{history_block}}{{task_prompt}}
|
||||||
|
|
||||||
|
{QUESTION_PAT.upper()}
|
||||||
|
{{user_query}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
# For weak LLM which only takes one chunk and cannot output json
|
# For weak LLM which only takes one chunk and cannot output json
|
||||||
# Also not requiring quotes as it tends to not work
|
# Also not requiring quotes as it tends to not work
|
||||||
WEAK_LLM_PROMPT = f"""
|
WEAK_LLM_PROMPT = f"""
|
||||||
|
@ -1,5 +1,15 @@
|
|||||||
|
from collections.abc import Sequence
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from danswer.chat.models import LlmDoc
|
||||||
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.db.models import Prompt
|
||||||
|
from danswer.indexing.models import InferenceChunk
|
||||||
|
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||||
|
from danswer.prompts.constants import CODE_BLOCK_PAT
|
||||||
|
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||||
|
|
||||||
|
|
||||||
def get_current_llm_day_time() -> str:
|
def get_current_llm_day_time() -> str:
|
||||||
current_datetime = datetime.now()
|
current_datetime = datetime.now()
|
||||||
@ -7,3 +17,78 @@ def get_current_llm_day_time() -> str:
|
|||||||
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
|
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
|
||||||
day_of_week = current_datetime.strftime("%A")
|
day_of_week = current_datetime.strftime("%A")
|
||||||
return f"The current day and time is {day_of_week} {formatted_datetime}"
|
return f"The current day and time is {day_of_week} {formatted_datetime}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_task_prompt_reminders(
|
||||||
|
prompt: Prompt,
|
||||||
|
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
|
||||||
|
citation_str: str = CITATION_REMINDER,
|
||||||
|
language_hint_str: str = LANGUAGE_HINT,
|
||||||
|
) -> str:
|
||||||
|
base_task = prompt.task_prompt
|
||||||
|
citation_or_nothing = citation_str if prompt.include_citations else ""
|
||||||
|
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
|
||||||
|
return base_task + citation_or_nothing + language_hint_or_nothing
|
||||||
|
|
||||||
|
|
||||||
|
# Maps connector enum string to a more natural language representation for the LLM
|
||||||
|
# If not on the list, uses the original but slightly cleaned up, see below
|
||||||
|
CONNECTOR_NAME_MAP = {
|
||||||
|
"web": "Website",
|
||||||
|
"requesttracker": "Request Tracker",
|
||||||
|
"github": "GitHub",
|
||||||
|
"file": "File Upload",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def clean_up_source(source_str: str) -> str:
|
||||||
|
if source_str in CONNECTOR_NAME_MAP:
|
||||||
|
return CONNECTOR_NAME_MAP[source_str]
|
||||||
|
return source_str.replace("_", " ").title()
|
||||||
|
|
||||||
|
|
||||||
|
def build_doc_context_str(
|
||||||
|
semantic_identifier: str,
|
||||||
|
source_type: DocumentSource,
|
||||||
|
content: str,
|
||||||
|
metadata_dict: dict[str, str | list[str]],
|
||||||
|
updated_at: datetime | None,
|
||||||
|
ind: int,
|
||||||
|
include_metadata: bool = True,
|
||||||
|
) -> str:
|
||||||
|
context_str = ""
|
||||||
|
if include_metadata:
|
||||||
|
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
|
||||||
|
context_str += f"Source: {clean_up_source(source_type)}\n"
|
||||||
|
|
||||||
|
for k, v in metadata_dict.items():
|
||||||
|
if isinstance(v, list):
|
||||||
|
v_str = ", ".join(v)
|
||||||
|
context_str += f"{k.capitalize()}: {v_str}\n"
|
||||||
|
else:
|
||||||
|
context_str += f"{k.capitalize()}: {v}\n"
|
||||||
|
|
||||||
|
if updated_at:
|
||||||
|
update_str = updated_at.strftime("%B %d, %Y %H:%M")
|
||||||
|
context_str += f"Updated: {update_str}\n"
|
||||||
|
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
|
||||||
|
return context_str
|
||||||
|
|
||||||
|
|
||||||
|
def build_complete_context_str(
|
||||||
|
context_docs: Sequence[LlmDoc | InferenceChunk],
|
||||||
|
include_metadata: bool = True,
|
||||||
|
) -> str:
|
||||||
|
context_str = ""
|
||||||
|
for ind, doc in enumerate(context_docs, start=1):
|
||||||
|
context_str += build_doc_context_str(
|
||||||
|
semantic_identifier=doc.semantic_identifier,
|
||||||
|
source_type=doc.source_type,
|
||||||
|
content=doc.content,
|
||||||
|
metadata_dict=doc.metadata,
|
||||||
|
updated_at=doc.updated_at,
|
||||||
|
ind=ind,
|
||||||
|
include_metadata=include_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
return context_str.strip()
|
||||||
|
@ -2,8 +2,8 @@ from danswer.llm.utils import check_number_of_tokens
|
|||||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||||
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
|
|
||||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||||
|
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||||
|
|
||||||
|
@ -5,12 +5,12 @@ from fastapi import Query
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from danswer.auth.users import current_user
|
from danswer.auth.users import current_user
|
||||||
from danswer.chat.chat_utils import build_doc_context_str
|
|
||||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||||
from danswer.db.engine import get_session
|
from danswer.db.engine import get_session
|
||||||
from danswer.db.models import User
|
from danswer.db.models import User
|
||||||
from danswer.document_index.factory import get_default_document_index
|
from danswer.document_index.factory import get_default_document_index
|
||||||
from danswer.llm.utils import get_default_llm_token_encode
|
from danswer.llm.utils import get_default_llm_token_encode
|
||||||
|
from danswer.prompts.prompt_utils import build_doc_context_str
|
||||||
from danswer.search.access_filters import build_access_filters_for_user
|
from danswer.search.access_filters import build_access_filters_for_user
|
||||||
from danswer.search.models import IndexFilters
|
from danswer.search.models import IndexFilters
|
||||||
from danswer.server.documents.models import ChunkInfo
|
from danswer.server.documents.models import ChunkInfo
|
||||||
|
@ -9,6 +9,8 @@ from danswer.configs.constants import AuthType
|
|||||||
from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
|
from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
|
||||||
from danswer.db.models import AllowedAnswerFilters
|
from danswer.db.models import AllowedAnswerFilters
|
||||||
from danswer.db.models import ChannelConfig
|
from danswer.db.models import ChannelConfig
|
||||||
|
from danswer.db.models import SlackBotConfig as SlackBotConfigModel
|
||||||
|
from danswer.db.models import SlackBotResponseType
|
||||||
from danswer.server.features.persona.models import PersonaSnapshot
|
from danswer.server.features.persona.models import PersonaSnapshot
|
||||||
|
|
||||||
|
|
||||||
@ -81,6 +83,7 @@ class SlackBotConfigCreationRequest(BaseModel):
|
|||||||
answer_filters: list[AllowedAnswerFilters] = []
|
answer_filters: list[AllowedAnswerFilters] = []
|
||||||
# list of user emails
|
# list of user emails
|
||||||
follow_up_tags: list[str] | None = None
|
follow_up_tags: list[str] | None = None
|
||||||
|
response_type: SlackBotResponseType
|
||||||
|
|
||||||
@validator("answer_filters", pre=True)
|
@validator("answer_filters", pre=True)
|
||||||
def validate_filters(cls, value: list[str]) -> list[str]:
|
def validate_filters(cls, value: list[str]) -> list[str]:
|
||||||
@ -104,6 +107,22 @@ class SlackBotConfig(BaseModel):
|
|||||||
id: int
|
id: int
|
||||||
persona: PersonaSnapshot | None
|
persona: PersonaSnapshot | None
|
||||||
channel_config: ChannelConfig
|
channel_config: ChannelConfig
|
||||||
|
response_type: SlackBotResponseType
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_model(
|
||||||
|
cls, slack_bot_config_model: SlackBotConfigModel
|
||||||
|
) -> "SlackBotConfig":
|
||||||
|
return cls(
|
||||||
|
id=slack_bot_config_model.id,
|
||||||
|
persona=(
|
||||||
|
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
||||||
|
if slack_bot_config_model.persona
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
channel_config=slack_bot_config_model.channel_config,
|
||||||
|
response_type=slack_bot_config_model.response_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ModelVersionResponse(BaseModel):
|
class ModelVersionResponse(BaseModel):
|
||||||
|
@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config
|
|||||||
from danswer.db.slack_bot_config import remove_slack_bot_config
|
from danswer.db.slack_bot_config import remove_slack_bot_config
|
||||||
from danswer.db.slack_bot_config import update_slack_bot_config
|
from danswer.db.slack_bot_config import update_slack_bot_config
|
||||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||||
from danswer.server.features.persona.models import PersonaSnapshot
|
|
||||||
from danswer.server.manage.models import SlackBotConfig
|
from danswer.server.manage.models import SlackBotConfig
|
||||||
from danswer.server.manage.models import SlackBotConfigCreationRequest
|
from danswer.server.manage.models import SlackBotConfigCreationRequest
|
||||||
from danswer.server.manage.models import SlackBotTokens
|
from danswer.server.manage.models import SlackBotTokens
|
||||||
@ -108,17 +107,10 @@ def create_slack_bot_config(
|
|||||||
slack_bot_config_model = insert_slack_bot_config(
|
slack_bot_config_model = insert_slack_bot_config(
|
||||||
persona_id=persona_id,
|
persona_id=persona_id,
|
||||||
channel_config=channel_config,
|
channel_config=channel_config,
|
||||||
|
response_type=slack_bot_config_creation_request.response_type,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
)
|
)
|
||||||
return SlackBotConfig(
|
return SlackBotConfig.from_model(slack_bot_config_model)
|
||||||
id=slack_bot_config_model.id,
|
|
||||||
persona=(
|
|
||||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
|
||||||
if slack_bot_config_model.persona
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
channel_config=slack_bot_config_model.channel_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@router.patch("/admin/slack-bot/config/{slack_bot_config_id}")
|
@router.patch("/admin/slack-bot/config/{slack_bot_config_id}")
|
||||||
@ -170,17 +162,10 @@ def patch_slack_bot_config(
|
|||||||
slack_bot_config_id=slack_bot_config_id,
|
slack_bot_config_id=slack_bot_config_id,
|
||||||
persona_id=persona_id,
|
persona_id=persona_id,
|
||||||
channel_config=channel_config,
|
channel_config=channel_config,
|
||||||
|
response_type=slack_bot_config_creation_request.response_type,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
)
|
)
|
||||||
return SlackBotConfig(
|
return SlackBotConfig.from_model(slack_bot_config_model)
|
||||||
id=slack_bot_config_model.id,
|
|
||||||
persona=(
|
|
||||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
|
||||||
if slack_bot_config_model.persona
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
channel_config=slack_bot_config_model.channel_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/admin/slack-bot/config/{slack_bot_config_id}")
|
@router.delete("/admin/slack-bot/config/{slack_bot_config_id}")
|
||||||
@ -201,15 +186,7 @@ def list_slack_bot_configs(
|
|||||||
) -> list[SlackBotConfig]:
|
) -> list[SlackBotConfig]:
|
||||||
slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session)
|
slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session)
|
||||||
return [
|
return [
|
||||||
SlackBotConfig(
|
SlackBotConfig.from_model(slack_bot_config_model)
|
||||||
id=slack_bot_config_model.id,
|
|
||||||
persona=(
|
|
||||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
|
||||||
if slack_bot_config_model.persona
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
channel_config=slack_bot_config_model.channel_config,
|
|
||||||
)
|
|
||||||
for slack_bot_config_model in slack_bot_config_models
|
for slack_bot_config_model in slack_bot_config_models
|
||||||
]
|
]
|
||||||
|
|
||||||
|
BIN
backend/slackbot_images/Confluence.png
Normal file
BIN
backend/slackbot_images/Confluence.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1013 B |
BIN
backend/slackbot_images/File.png
Normal file
BIN
backend/slackbot_images/File.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 KiB |
BIN
backend/slackbot_images/Guru.png
Normal file
BIN
backend/slackbot_images/Guru.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.0 KiB |
BIN
backend/slackbot_images/Jira.png
Normal file
BIN
backend/slackbot_images/Jira.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 829 B |
3
backend/slackbot_images/README.md
Normal file
3
backend/slackbot_images/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
This folder contains images needed by the Danswer Slack Bot. When possible, we use the images
|
||||||
|
within `web/public`, but sometimes those images do not work for the Slack Bot.
|
BIN
backend/slackbot_images/Web.png
Normal file
BIN
backend/slackbot_images/Web.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
BIN
backend/slackbot_images/Zendesk.png
Normal file
BIN
backend/slackbot_images/Zendesk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({
|
|||||||
!isPersonaASlackBotPersona(existingSlackBotConfig.persona)
|
!isPersonaASlackBotPersona(existingSlackBotConfig.persona)
|
||||||
? existingSlackBotConfig.persona.id
|
? existingSlackBotConfig.persona.id
|
||||||
: null,
|
: null,
|
||||||
|
response_type: existingSlackBotConfig?.response_type || "citations",
|
||||||
}}
|
}}
|
||||||
validationSchema={Yup.object().shape({
|
validationSchema={Yup.object().shape({
|
||||||
channel_names: Yup.array().of(Yup.string()),
|
channel_names: Yup.array().of(Yup.string()),
|
||||||
|
response_type: Yup.string()
|
||||||
|
.oneOf(["quotes", "citations"])
|
||||||
|
.required(),
|
||||||
answer_validity_check_enabled: Yup.boolean().required(),
|
answer_validity_check_enabled: Yup.boolean().required(),
|
||||||
questionmark_prefilter_enabled: Yup.boolean().required(),
|
questionmark_prefilter_enabled: Yup.boolean().required(),
|
||||||
respond_tag_only: Yup.boolean().required(),
|
respond_tag_only: Yup.boolean().required(),
|
||||||
@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({
|
|||||||
</div>
|
</div>
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<SelectorFormField
|
||||||
|
name="response_type"
|
||||||
|
label="Response Format"
|
||||||
|
subtext={
|
||||||
|
<>
|
||||||
|
If set to Citations, DanswerBot will respond with a direct
|
||||||
|
answer with inline citations. It will also provide links
|
||||||
|
to these cited documents below the answer. When in doubt,
|
||||||
|
choose this option.
|
||||||
|
<br />
|
||||||
|
<br />
|
||||||
|
If set to Quotes, DanswerBot will respond with a direct
|
||||||
|
answer as well as with quotes pulled from the context
|
||||||
|
documents to support that answer. DanswerBot will also
|
||||||
|
give a list of relevant documents. Choose this option if
|
||||||
|
you want a very detailed response AND/OR a list of
|
||||||
|
relevant documents would be useful just in case the LLM
|
||||||
|
missed anything.
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
options={[
|
||||||
|
{ name: "Citations", value: "citations" },
|
||||||
|
{ name: "Quotes", value: "quotes" },
|
||||||
|
]}
|
||||||
|
/>
|
||||||
|
|
||||||
<Divider />
|
<Divider />
|
||||||
|
|
||||||
<SectionHeader>When should DanswerBot respond?</SectionHeader>
|
<SectionHeader>When should DanswerBot respond?</SectionHeader>
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
import { ChannelConfig, SlackBotTokens } from "@/lib/types";
|
import {
|
||||||
|
ChannelConfig,
|
||||||
|
SlackBotResponseType,
|
||||||
|
SlackBotTokens,
|
||||||
|
} from "@/lib/types";
|
||||||
import { Persona } from "../personas/interfaces";
|
import { Persona } from "../personas/interfaces";
|
||||||
|
|
||||||
interface SlackBotConfigCreationRequest {
|
interface SlackBotConfigCreationRequest {
|
||||||
@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest {
|
|||||||
respond_team_member_list: string[];
|
respond_team_member_list: string[];
|
||||||
follow_up_tags?: string[];
|
follow_up_tags?: string[];
|
||||||
usePersona: boolean;
|
usePersona: boolean;
|
||||||
|
response_type: SlackBotResponseType;
|
||||||
}
|
}
|
||||||
|
|
||||||
const buildFiltersFromCreationRequest = (
|
const buildFiltersFromCreationRequest = (
|
||||||
@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = (
|
|||||||
...(creationRequest.usePersona
|
...(creationRequest.usePersona
|
||||||
? { persona_id: creationRequest.persona_id }
|
? { persona_id: creationRequest.persona_id }
|
||||||
: { document_sets: creationRequest.document_sets }),
|
: { document_sets: creationRequest.document_sets }),
|
||||||
|
response_type: creationRequest.response_type,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -231,7 +231,7 @@ interface SelectorFormFieldProps {
|
|||||||
name: string;
|
name: string;
|
||||||
label?: string;
|
label?: string;
|
||||||
options: StringOrNumberOption[];
|
options: StringOrNumberOption[];
|
||||||
subtext?: string;
|
subtext?: string | JSX.Element;
|
||||||
includeDefault?: boolean;
|
includeDefault?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -370,10 +370,13 @@ export interface ChannelConfig {
|
|||||||
follow_up_tags?: string[];
|
follow_up_tags?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type SlackBotResponseType = "quotes" | "citations";
|
||||||
|
|
||||||
export interface SlackBotConfig {
|
export interface SlackBotConfig {
|
||||||
id: number;
|
id: number;
|
||||||
persona: Persona | null;
|
persona: Persona | null;
|
||||||
channel_config: ChannelConfig;
|
channel_config: ChannelConfig;
|
||||||
|
response_type: SlackBotResponseType;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SlackBotTokens {
|
export interface SlackBotTokens {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user