Improve slack flow

This commit is contained in:
Weves 2024-02-22 07:06:26 -08:00 committed by Chris Weaver
parent 0b0665044f
commit 7869f23e12
32 changed files with 696 additions and 184 deletions

View File

@ -0,0 +1,39 @@
"""Add slack bot display type
Revision ID: fcd135795f21
Revises: 0a2b51deb0b8
Create Date: 2024-03-04 17:03:27.116284
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "fcd135795f21"
down_revision = "0a2b51deb0b8"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"slack_bot_config",
sa.Column(
"response_type",
sa.Enum(
"QUOTES",
"CITATIONS",
name="slackbotresponsetype",
native_enum=False,
),
nullable=True,
),
)
op.execute(
"UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
)
op.alter_column("slack_bot_config", "response_type", nullable=False)
def downgrade() -> None:
op.drop_column("slack_bot_config", "response_type")

View File

@ -1,7 +1,7 @@
import re import re
from collections.abc import Callable from collections.abc import Callable
from collections.abc import Iterator from collections.abc import Iterator
from datetime import datetime from collections.abc import Sequence
from functools import lru_cache from functools import lru_cache
from typing import cast from typing import cast
@ -16,7 +16,6 @@ from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import LlmDoc from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.chat_configs import STOP_STREAM_PAT from danswer.configs.chat_configs import STOP_STREAM_PAT
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import IGNORE_FOR_QA from danswer.configs.constants import IGNORE_FOR_QA
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
@ -34,13 +33,12 @@ from danswer.llm.utils import tokenizer_trim_content
from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from danswer.prompts.constants import CODE_BLOCK_PAT from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.constants import TRIPLE_BACKTICK from danswer.prompts.constants import TRIPLE_BACKTICK
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT from danswer.prompts.prompt_utils import build_complete_context_str
from danswer.prompts.prompt_utils import build_task_prompt_reminders
from danswer.prompts.prompt_utils import get_current_llm_day_time from danswer.prompts.prompt_utils import get_current_llm_day_time
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
from danswer.prompts.token_counts import ( from danswer.prompts.token_counts import (
@ -53,68 +51,6 @@ from danswer.utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
# Maps connector enum string to a more natural language representation for the LLM
# If not on the list, uses the original but slightly cleaned up, see below
CONNECTOR_NAME_MAP = {
"web": "Website",
"requesttracker": "Request Tracker",
"github": "GitHub",
"file": "File Upload",
}
def clean_up_source(source_str: str) -> str:
if source_str in CONNECTOR_NAME_MAP:
return CONNECTOR_NAME_MAP[source_str]
return source_str.replace("_", " ").title()
def build_doc_context_str(
semantic_identifier: str,
source_type: DocumentSource,
content: str,
metadata_dict: dict[str, str | list[str]],
updated_at: datetime | None,
ind: int,
include_metadata: bool = True,
) -> str:
context_str = ""
if include_metadata:
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
context_str += f"Source: {clean_up_source(source_type)}\n"
for k, v in metadata_dict.items():
if isinstance(v, list):
v_str = ", ".join(v)
context_str += f"{k.capitalize()}: {v_str}\n"
else:
context_str += f"{k.capitalize()}: {v}\n"
if updated_at:
update_str = updated_at.strftime("%B %d, %Y %H:%M")
context_str += f"Updated: {update_str}\n"
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
return context_str
def build_complete_context_str(
context_docs: list[LlmDoc | InferenceChunk],
include_metadata: bool = True,
) -> str:
context_str = ""
for ind, doc in enumerate(context_docs, start=1):
context_str += build_doc_context_str(
semantic_identifier=doc.semantic_identifier,
source_type=doc.source_type,
content=doc.content,
metadata_dict=doc.metadata,
updated_at=doc.updated_at,
ind=ind,
include_metadata=include_metadata,
)
return context_str.strip()
@lru_cache() @lru_cache()
def build_chat_system_message( def build_chat_system_message(
@ -147,18 +83,6 @@ def build_chat_system_message(
return system_msg, token_count return system_msg, token_count
def build_task_prompt_reminders(
prompt: Prompt,
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
citation_str: str = CITATION_REMINDER,
language_hint_str: str = LANGUAGE_HINT,
) -> str:
base_task = prompt.task_prompt
citation_or_nothing = citation_str if prompt.include_citations else ""
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
return base_task + citation_or_nothing + language_hint_or_nothing
def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc: def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
return LlmDoc( return LlmDoc(
document_id=inf_chunk.document_id, document_id=inf_chunk.document_id,
@ -172,7 +96,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
def map_document_id_order( def map_document_id_order(
chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
) -> dict[str, int]: ) -> dict[str, int]:
order_mapping = {} order_mapping = {}
current = 1 if one_indexed else 0 current = 1 if one_indexed else 0
@ -568,6 +492,63 @@ def extract_citations_from_stream(
yield DanswerAnswerPiece(answer_piece=curr_segment) yield DanswerAnswerPiece(answer_piece=curr_segment)
def reorganize_citations(
answer: str, citations: list[CitationInfo]
) -> tuple[str, list[CitationInfo]]:
"""For a complete, citation-aware response, we want to reorganize the citations so that
they are in the order of the documents that were used in the response. This just looks nicer / avoids
confusion ("Why is there [7] when only 2 documents are cited?")."""
# Regular expression to find all instances of [[x]](LINK)
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
all_citation_matches = re.findall(pattern, answer)
new_citation_info: dict[int, CitationInfo] = {}
for citation_match in all_citation_matches:
try:
citation_num = int(citation_match[0])
if citation_num in new_citation_info:
continue
matching_citation = next(
iter([c for c in citations if c.citation_num == int(citation_num)]),
None,
)
if matching_citation is None:
continue
new_citation_info[citation_num] = CitationInfo(
citation_num=len(new_citation_info) + 1,
document_id=matching_citation.document_id,
)
except Exception:
pass
# Function to replace citations with their new number
def slack_link_format(match: re.Match) -> str:
link_text = match.group(1)
try:
citation_num = int(link_text)
if citation_num in new_citation_info:
link_text = new_citation_info[citation_num].citation_num
except Exception:
pass
link_url = match.group(2)
return f"[[{link_text}]]({link_url})"
# Substitute all matches in the input text
new_answer = re.sub(pattern, slack_link_format, answer)
# if any citations weren't parsable, just add them back to be safe
for citation in citations:
if citation.citation_num not in new_citation_info:
new_citation_info[citation.citation_num] = citation
return new_answer, list(new_citation_info.values())
def get_prompt_tokens(prompt: Prompt) -> int: def get_prompt_tokens(prompt: Prompt) -> int:
# Note: currently custom prompts do not allow datetime aware, only default prompts # Note: currently custom prompts do not allow datetime aware, only default prompts
return ( return (

View File

@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
from danswer.chat.chat_utils import build_chat_system_message from danswer.chat.chat_utils import build_chat_system_message
from danswer.chat.chat_utils import build_chat_user_message from danswer.chat.chat_utils import build_chat_user_message
from danswer.chat.chat_utils import build_doc_context_str
from danswer.chat.chat_utils import compute_max_document_tokens from danswer.chat.chat_utils import compute_max_document_tokens
from danswer.chat.chat_utils import compute_max_llm_input_tokens from danswer.chat.chat_utils import compute_max_llm_input_tokens
from danswer.chat.chat_utils import create_chat_chain from danswer.chat.chat_utils import create_chat_chain
@ -51,6 +50,7 @@ from danswer.llm.utils import get_default_llm_version
from danswer.llm.utils import get_max_input_tokens from danswer.llm.utils import get_max_input_tokens
from danswer.llm.utils import tokenizer_trim_content from danswer.llm.utils import tokenizer_trim_content
from danswer.llm.utils import translate_history_to_basemessages from danswer.llm.utils import translate_history_to_basemessages
from danswer.prompts.prompt_utils import build_doc_context_str
from danswer.search.models import OptionalSearchSetting from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails from danswer.search.models import RetrievalDetails
from danswer.search.request_preprocessing import retrieval_preprocessing from danswer.search.request_preprocessing import retrieval_preprocessing

View File

@ -52,6 +52,8 @@ ENABLE_DANSWERBOT_REFLEXION = (
) )
# Currently not support chain of thought, probably will add back later # Currently not support chain of thought, probably will add back later
DANSWER_BOT_DISABLE_COT = True DANSWER_BOT_DISABLE_COT = True
# if set, will default DanswerBot to use quotes and reference documents
DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true"
# Maximum Questions Per Minute, Default Uncapped # Maximum Questions Per Minute, Default Uncapped
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None

View File

@ -1,15 +1,20 @@
import re
from datetime import datetime from datetime import datetime
from re import Match
import pytz import pytz
import timeago # type: ignore import timeago # type: ignore
from slack_sdk.models.blocks import ActionsBlock from slack_sdk.models.blocks import ActionsBlock
from slack_sdk.models.blocks import Block from slack_sdk.models.blocks import Block
from slack_sdk.models.blocks import ButtonElement from slack_sdk.models.blocks import ButtonElement
from slack_sdk.models.blocks import ContextBlock
from slack_sdk.models.blocks import DividerBlock from slack_sdk.models.blocks import DividerBlock
from slack_sdk.models.blocks import HeaderBlock from slack_sdk.models.blocks import HeaderBlock
from slack_sdk.models.blocks import Option from slack_sdk.models.blocks import Option
from slack_sdk.models.blocks import RadioButtonsElement from slack_sdk.models.blocks import RadioButtonsElement
from slack_sdk.models.blocks import SectionBlock from slack_sdk.models.blocks import SectionBlock
from slack_sdk.models.blocks.basic_components import MarkdownTextObject
from slack_sdk.models.blocks.block_elements import ImageElement
from danswer.chat.models import DanswerQuote from danswer.chat.models import DanswerQuote
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
from danswer.danswerbot.slack.icons import source_to_github_img_link
from danswer.danswerbot.slack.utils import build_feedback_id from danswer.danswerbot.slack.utils import build_feedback_id
from danswer.danswerbot.slack.utils import remove_slack_text_interactions from danswer.danswerbot.slack.utils import remove_slack_text_interactions
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
@ -29,7 +35,35 @@ from danswer.search.models import SavedSearchDoc
from danswer.utils.text_processing import decode_escapes from danswer.utils.text_processing import decode_escapes
from danswer.utils.text_processing import replace_whitespaces_w_space from danswer.utils.text_processing import replace_whitespaces_w_space
_MAX_BLURB_LEN = 75 _MAX_BLURB_LEN = 45
def _process_citations_for_slack(text: str) -> str:
"""
Converts instances of [[x]](LINK) in the input text to Slack's link format <LINK|[x]>.
Args:
- text (str): The input string containing markdown links.
Returns:
- str: The string with markdown links converted to Slack format.
"""
# Regular expression to find all instances of [[x]](LINK)
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
# Function to replace each found instance with Slack's format
def slack_link_format(match: Match) -> str:
link_text = match.group(1)
link_url = match.group(2)
return f"<{link_url}|[{link_text}]>"
# Substitute all matches in the input text
return re.sub(pattern, slack_link_format, text)
def clean_markdown_link_text(text: str) -> str:
# Remove any newlines within the text
return text.replace("\n", " ").strip()
def build_qa_feedback_block(message_id: int) -> Block: def build_qa_feedback_block(message_id: int) -> Block:
@ -38,13 +72,12 @@ def build_qa_feedback_block(message_id: int) -> Block:
elements=[ elements=[
ButtonElement( ButtonElement(
action_id=LIKE_BLOCK_ACTION_ID, action_id=LIKE_BLOCK_ACTION_ID,
text="👍", text="👍 Helpful",
style="primary", style="primary",
), ),
ButtonElement( ButtonElement(
action_id=DISLIKE_BLOCK_ACTION_ID, action_id=DISLIKE_BLOCK_ACTION_ID,
text="👎", text="👎 Not helpful",
style="danger",
), ),
], ],
) )
@ -164,6 +197,80 @@ def build_documents_blocks(
return section_blocks return section_blocks
def build_sources_blocks(
cited_documents: list[tuple[int, SavedSearchDoc]],
num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
) -> list[Block]:
if not cited_documents:
return [
SectionBlock(
text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔"
)
]
seen_docs_identifiers = set()
section_blocks: list[Block] = [SectionBlock(text="*Sources:*")]
included_docs = 0
for citation_num, d in cited_documents:
if d.document_id in seen_docs_identifiers:
continue
seen_docs_identifiers.add(d.document_id)
doc_sem_id = d.semantic_identifier
if d.source_type == DocumentSource.SLACK.value:
# for legacy reasons, before the switch to how Slack semantic identifiers are constructed
if "#" not in doc_sem_id:
doc_sem_id = "#" + doc_sem_id
# this is needed to try and prevent the line from overflowing
# if it does overflow, the image gets placed above the title and it
# looks bad
doc_sem_id = (
doc_sem_id[:_MAX_BLURB_LEN] + "..."
if len(doc_sem_id) > _MAX_BLURB_LEN
else doc_sem_id
)
owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None
days_ago_str = (
timeago.format(d.updated_at, datetime.now(pytz.utc))
if d.updated_at
else None
)
final_metadata_str = " | ".join(
([owner_str] if owner_str else [])
+ ([days_ago_str] if days_ago_str else [])
)
document_title = clean_markdown_link_text(doc_sem_id)
img_link = source_to_github_img_link(d.source_type)
section_blocks.append(
ContextBlock(
elements=(
[
ImageElement(
image_url=img_link,
alt_text=f"{d.source_type.value} logo",
)
]
if img_link
else []
)
+ [
MarkdownTextObject(
text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
),
]
)
)
if included_docs >= num_docs_to_display:
break
return section_blocks
def build_quotes_block( def build_quotes_block(
quotes: list[DanswerQuote], quotes: list[DanswerQuote],
) -> list[Block]: ) -> list[Block]:
@ -214,6 +321,7 @@ def build_qa_response_blocks(
time_cutoff: datetime | None, time_cutoff: datetime | None,
favor_recent: bool, favor_recent: bool,
skip_quotes: bool = False, skip_quotes: bool = False,
process_message_for_citations: bool = False,
skip_ai_feedback: bool = False, skip_ai_feedback: bool = False,
) -> list[Block]: ) -> list[Block]:
if DISABLE_GENERATIVE_AI: if DISABLE_GENERATIVE_AI:
@ -221,8 +329,6 @@ def build_qa_response_blocks(
quotes_blocks: list[Block] = [] quotes_blocks: list[Block] = []
ai_answer_header = HeaderBlock(text="AI Answer")
filter_block: Block | None = None filter_block: Block | None = None
if time_cutoff or favor_recent or source_filters: if time_cutoff or favor_recent or source_filters:
filter_text = "Filters: " filter_text = "Filters: "
@ -247,6 +353,8 @@ def build_qa_response_blocks(
) )
else: else:
answer_processed = decode_escapes(remove_slack_text_interactions(answer)) answer_processed = decode_escapes(remove_slack_text_interactions(answer))
if process_message_for_citations:
answer_processed = _process_citations_for_slack(answer_processed)
answer_block = SectionBlock(text=answer_processed) answer_block = SectionBlock(text=answer_processed)
if quotes: if quotes:
quotes_blocks = build_quotes_block(quotes) quotes_blocks = build_quotes_block(quotes)
@ -259,7 +367,7 @@ def build_qa_response_blocks(
) )
] ]
response_blocks: list[Block] = [ai_answer_header] response_blocks: list[Block] = []
if filter_block is not None: if filter_block is not None:
response_blocks.append(filter_block) response_blocks.append(filter_block)
@ -271,7 +379,6 @@ def build_qa_response_blocks(
if not skip_quotes: if not skip_quotes:
response_blocks.extend(quotes_blocks) response_blocks.extend(quotes_blocks)
response_blocks.append(DividerBlock())
return response_blocks return response_blocks

View File

@ -9,6 +9,7 @@ from typing import TypeVar
from retry import retry from retry import retry
from slack_sdk import WebClient from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError from slack_sdk.errors import SlackApiError
from slack_sdk.models.blocks import DividerBlock
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from danswer.chat.chat_utils import compute_max_document_tokens from danswer.chat.chat_utils import compute_max_document_tokens
@ -18,12 +19,14 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANS
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
from danswer.danswerbot.slack.blocks import build_documents_blocks from danswer.danswerbot.slack.blocks import build_documents_blocks
from danswer.danswerbot.slack.blocks import build_follow_up_block from danswer.danswerbot.slack.blocks import build_follow_up_block
from danswer.danswerbot.slack.blocks import build_qa_response_blocks from danswer.danswerbot.slack.blocks import build_qa_response_blocks
from danswer.danswerbot.slack.blocks import build_sources_blocks
from danswer.danswerbot.slack.blocks import get_restate_blocks from danswer.danswerbot.slack.blocks import get_restate_blocks
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.models import SlackMessageInfo
@ -35,6 +38,7 @@ from danswer.danswerbot.slack.utils import SlackRateLimiter
from danswer.danswerbot.slack.utils import update_emote_react from danswer.danswerbot.slack.utils import update_emote_react
from danswer.db.engine import get_sqlalchemy_engine from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import SlackBotConfig from danswer.db.models import SlackBotConfig
from danswer.db.models import SlackBotResponseType
from danswer.llm.utils import check_number_of_tokens from danswer.llm.utils import check_number_of_tokens
from danswer.llm.utils import get_default_llm_version from danswer.llm.utils import get_default_llm_version
from danswer.llm.utils import get_max_input_tokens from danswer.llm.utils import get_max_input_tokens
@ -137,6 +141,13 @@ def handle_message(
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
# figure out if we want to use citations or quotes
use_citations = (
not DANSWER_BOT_USE_QUOTES
if channel_config is None
else channel_config.response_type == SlackBotResponseType.CITATIONS
)
# List of user id to send message to, if None, send to everyone in channel # List of user id to send message to, if None, send to everyone in channel
send_to: list[str] | None = None send_to: list[str] | None = None
respond_tag_only = False respond_tag_only = False
@ -259,6 +270,7 @@ def handle_message(
answer_generation_timeout=answer_generation_timeout, answer_generation_timeout=answer_generation_timeout,
enable_reflexion=reflexion, enable_reflexion=reflexion,
bypass_acl=bypass_acl, bypass_acl=bypass_acl,
use_citations=use_citations,
) )
if not answer.error_msg: if not answer.error_msg:
return answer return answer
@ -387,7 +399,10 @@ def handle_message(
source_filters=retrieval_info.applied_source_filters, source_filters=retrieval_info.applied_source_filters,
time_cutoff=retrieval_info.applied_time_cutoff, time_cutoff=retrieval_info.applied_time_cutoff,
favor_recent=retrieval_info.recency_bias_multiplier > 1, favor_recent=retrieval_info.recency_bias_multiplier > 1,
skip_quotes=persona is not None, # currently Personas don't support quotes # currently Personas don't support quotes
# if citations are enabled, also don't use quotes
skip_quotes=persona is not None or use_citations,
process_message_for_citations=use_citations,
) )
# Get the chunks fed to the LLM only, then fill with other docs # Get the chunks fed to the LLM only, then fill with other docs
@ -397,16 +412,33 @@ def handle_message(
doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
] ]
priority_ordered_docs = llm_docs + remaining_docs priority_ordered_docs = llm_docs + remaining_docs
document_blocks = (
build_documents_blocks( document_blocks = []
citations_block = []
# if citations are enabled, only show cited documents
if use_citations:
citations = answer.citations or []
cited_docs = []
for citation in citations:
matching_doc = next(
(d for d in top_docs if d.document_id == citation.document_id),
None,
)
if matching_doc:
cited_docs.append((citation.citation_num, matching_doc))
cited_docs.sort()
citations_block = build_sources_blocks(cited_documents=cited_docs)
elif priority_ordered_docs:
document_blocks = build_documents_blocks(
documents=priority_ordered_docs, documents=priority_ordered_docs,
message_id=answer.chat_message_id, message_id=answer.chat_message_id,
) )
if priority_ordered_docs document_blocks = [DividerBlock()] + document_blocks
else []
)
all_blocks = restate_question_block + answer_blocks + document_blocks all_blocks = (
restate_question_block + answer_blocks + citations_block + document_blocks
)
if channel_conf and channel_conf.get("follow_up_tags") is not None: if channel_conf and channel_conf.get("follow_up_tags") is not None:
all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id)) all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id))

View File

@ -0,0 +1,58 @@
from danswer.configs.constants import DocumentSource
def source_to_github_img_link(source: DocumentSource) -> str | None:
# TODO: store these images somewhere better
if source == DocumentSource.WEB.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png"
if source == DocumentSource.FILE.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
if source == DocumentSource.GOOGLE_SITES.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png"
if source == DocumentSource.SLACK.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png"
if source == DocumentSource.GMAIL.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png"
if source == DocumentSource.GOOGLE_DRIVE.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png"
if source == DocumentSource.GITHUB.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png"
if source == DocumentSource.GITLAB.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png"
if source == DocumentSource.CONFLUENCE.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png"
if source == DocumentSource.JIRA.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png"
if source == DocumentSource.NOTION.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png"
if source == DocumentSource.ZENDESK.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png"
if source == DocumentSource.GONG.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png"
if source == DocumentSource.LINEAR.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png"
if source == DocumentSource.PRODUCTBOARD.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp"
if source == DocumentSource.SLAB.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png"
if source == DocumentSource.ZULIP.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png"
if source == DocumentSource.GURU.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png"
if source == DocumentSource.HUBSPOT.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png"
if source == DocumentSource.DOCUMENT360.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png"
if source == DocumentSource.BOOKSTACK.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png"
if source == DocumentSource.LOOPIO.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png"
if source == DocumentSource.SHAREPOINT.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png"
if source == DocumentSource.REQUESTTRACKER.value:
# just use file icon for now
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
if source == DocumentSource.INGESTION_API.value:
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"

View File

@ -346,8 +346,12 @@ def read_slack_thread(
if len(blocks) <= 1: if len(blocks) <= 1:
continue continue
# The useful block is the second one after the header block that says AI Answer # For the old flow, the useful block is the second one after the header block that says AI Answer
message = reply["blocks"][1]["text"]["text"] if reply["blocks"][0]["text"]["text"] == "AI Answer":
message = reply["blocks"][1]["text"]["text"]
else:
# for the new flow, the answer is the first block
message = reply["blocks"][0]["text"]["text"]
if message.startswith("_Filters"): if message.startswith("_Filters"):
if len(blocks) <= 2: if len(blocks) <= 2:

View File

@ -811,6 +811,11 @@ class ChannelConfig(TypedDict):
follow_up_tags: NotRequired[list[str]] follow_up_tags: NotRequired[list[str]]
class SlackBotResponseType(str, PyEnum):
QUOTES = "quotes"
CITATIONS = "citations"
class SlackBotConfig(Base): class SlackBotConfig(Base):
__tablename__ = "slack_bot_config" __tablename__ = "slack_bot_config"
@ -822,6 +827,9 @@ class SlackBotConfig(Base):
channel_config: Mapped[ChannelConfig] = mapped_column( channel_config: Mapped[ChannelConfig] = mapped_column(
postgresql.JSONB(), nullable=False postgresql.JSONB(), nullable=False
) )
response_type: Mapped[SlackBotResponseType] = mapped_column(
Enum(SlackBotResponseType, native_enum=False), nullable=False
)
persona: Mapped[Persona | None] = relationship("Persona") persona: Mapped[Persona | None] = relationship("Persona")

View File

@ -11,6 +11,7 @@ from danswer.db.models import ChannelConfig
from danswer.db.models import Persona from danswer.db.models import Persona
from danswer.db.models import Persona__DocumentSet from danswer.db.models import Persona__DocumentSet
from danswer.db.models import SlackBotConfig from danswer.db.models import SlackBotConfig
from danswer.db.models import SlackBotResponseType
from danswer.search.models import RecencyBiasSetting from danswer.search.models import RecencyBiasSetting
@ -72,11 +73,13 @@ def create_slack_bot_persona(
def insert_slack_bot_config( def insert_slack_bot_config(
persona_id: int | None, persona_id: int | None,
channel_config: ChannelConfig, channel_config: ChannelConfig,
response_type: SlackBotResponseType,
db_session: Session, db_session: Session,
) -> SlackBotConfig: ) -> SlackBotConfig:
slack_bot_config = SlackBotConfig( slack_bot_config = SlackBotConfig(
persona_id=persona_id, persona_id=persona_id,
channel_config=channel_config, channel_config=channel_config,
response_type=response_type,
) )
db_session.add(slack_bot_config) db_session.add(slack_bot_config)
db_session.commit() db_session.commit()
@ -88,6 +91,7 @@ def update_slack_bot_config(
slack_bot_config_id: int, slack_bot_config_id: int,
persona_id: int | None, persona_id: int | None,
channel_config: ChannelConfig, channel_config: ChannelConfig,
response_type: SlackBotResponseType,
db_session: Session, db_session: Session,
) -> SlackBotConfig: ) -> SlackBotConfig:
slack_bot_config = db_session.scalar( slack_bot_config = db_session.scalar(
@ -105,6 +109,7 @@ def update_slack_bot_config(
# will encounter `violates foreign key constraint` errors # will encounter `violates foreign key constraint` errors
slack_bot_config.persona_id = persona_id slack_bot_config.persona_id = persona_id
slack_bot_config.channel_config = channel_config slack_bot_config.channel_config = channel_config
slack_bot_config.response_type = response_type
# if the persona has changed, then clean up the old persona # if the persona has changed, then clean up the old persona
if persona_id != existing_persona_id and existing_persona_id: if persona_id != existing_persona_id and existing_persona_id:

View File

@ -3,10 +3,18 @@ from collections.abc import Callable
from collections.abc import Iterator from collections.abc import Iterator
from typing import cast from typing import cast
from langchain.schema.messages import BaseMessage
from langchain.schema.messages import HumanMessage
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from danswer.chat.chat_utils import build_chat_system_message
from danswer.chat.chat_utils import compute_max_document_tokens from danswer.chat.chat_utils import compute_max_document_tokens
from danswer.chat.chat_utils import extract_citations_from_stream
from danswer.chat.chat_utils import get_chunks_for_qa from danswer.chat.chat_utils import get_chunks_for_qa
from danswer.chat.chat_utils import llm_doc_from_inference_chunk
from danswer.chat.chat_utils import map_document_id_order
from danswer.chat.chat_utils import reorganize_citations
from danswer.chat.models import CitationInfo
from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import DanswerContext from danswer.chat.models import DanswerContext
from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerContexts
@ -26,16 +34,23 @@ from danswer.db.chat import get_persona_by_id
from danswer.db.chat import get_prompt_by_id from danswer.db.chat import get_prompt_by_id
from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.chat import translate_db_message_to_chat_message_detail
from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_current_db_embedding_model
from danswer.db.models import Prompt
from danswer.db.models import User from danswer.db.models import User
from danswer.document_index.factory import get_default_document_index from danswer.document_index.factory import get_default_document_index
from danswer.indexing.models import InferenceChunk from danswer.indexing.models import InferenceChunk
from danswer.llm.factory import get_default_llm
from danswer.llm.utils import get_default_llm_token_encode from danswer.llm.utils import get_default_llm_token_encode
from danswer.llm.utils import get_default_llm_tokenizer
from danswer.one_shot_answer.factory import get_question_answer_model from danswer.one_shot_answer.factory import get_question_answer_model
from danswer.one_shot_answer.models import DirectQARequest from danswer.one_shot_answer.models import DirectQARequest
from danswer.one_shot_answer.models import OneShotQAResponse from danswer.one_shot_answer.models import OneShotQAResponse
from danswer.one_shot_answer.models import QueryRephrase from danswer.one_shot_answer.models import QueryRephrase
from danswer.one_shot_answer.models import ThreadMessage
from danswer.one_shot_answer.qa_block import no_gen_ai_response from danswer.one_shot_answer.qa_block import no_gen_ai_response
from danswer.one_shot_answer.qa_utils import combine_message_thread from danswer.one_shot_answer.qa_utils import combine_message_thread
from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
from danswer.prompts.prompt_utils import build_complete_context_str
from danswer.prompts.prompt_utils import build_task_prompt_reminders
from danswer.search.models import RerankMetricsContainer from danswer.search.models import RerankMetricsContainer
from danswer.search.models import RetrievalMetricsContainer from danswer.search.models import RetrievalMetricsContainer
from danswer.search.models import SavedSearchDoc from danswer.search.models import SavedSearchDoc
@ -51,6 +66,118 @@ from danswer.utils.timing import log_generator_function_time
logger = setup_logger() logger = setup_logger()
AnswerObjectIterator = Iterator[
QueryRephrase
| QADocsResponse
| LLMRelevanceFilterResponse
| DanswerAnswerPiece
| DanswerQuotes
| DanswerContexts
| StreamingError
| ChatMessageDetail
| CitationInfo
]
def quote_based_qa(
prompt: Prompt,
query_message: ThreadMessage,
history_str: str,
context_chunks: list[InferenceChunk],
llm_override: str | None,
timeout: int,
use_chain_of_thought: bool,
return_contexts: bool,
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
) -> AnswerObjectIterator:
qa_model = get_question_answer_model(
prompt=prompt,
timeout=timeout,
chain_of_thought=use_chain_of_thought,
llm_version=llm_override,
)
full_prompt_str = (
qa_model.build_prompt(
query=query_message.message,
history_str=history_str,
context_chunks=context_chunks,
)
if qa_model is not None
else "Gen AI Disabled"
)
response_packets = (
qa_model.answer_question_stream(
prompt=full_prompt_str,
llm_context_docs=context_chunks,
metrics_callback=llm_metrics_callback,
)
if qa_model is not None
else no_gen_ai_response()
)
if qa_model is not None and return_contexts:
contexts = DanswerContexts(
contexts=[
DanswerContext(
content=context_chunk.content,
document_id=context_chunk.document_id,
semantic_identifier=context_chunk.semantic_identifier,
blurb=context_chunk.semantic_identifier,
)
for context_chunk in context_chunks
]
)
response_packets = itertools.chain(response_packets, [contexts])
yield from response_packets
def citation_based_qa(
prompt: Prompt,
query_message: ThreadMessage,
history_str: str,
context_chunks: list[InferenceChunk],
llm_override: str | None,
timeout: int,
) -> AnswerObjectIterator:
llm_tokenizer = get_default_llm_tokenizer()
system_prompt_or_none, _ = build_chat_system_message(
prompt=prompt,
context_exists=True,
llm_tokenizer_encode_func=llm_tokenizer.encode,
)
task_prompt_with_reminder = build_task_prompt_reminders(prompt)
context_docs_str = build_complete_context_str(context_chunks)
user_message = HumanMessage(
content=CITATIONS_PROMPT.format(
task_prompt=task_prompt_with_reminder,
user_query=query_message.message,
history_block=history_str,
context_docs_str=context_docs_str,
)
)
llm = get_default_llm(
timeout=timeout,
gen_ai_model_version_override=llm_override,
)
llm_prompt: list[BaseMessage] = [user_message]
if system_prompt_or_none is not None:
llm_prompt = [system_prompt_or_none] + llm_prompt
llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in context_chunks]
doc_id_to_rank_map = map_document_id_order(llm_docs)
tokens = llm.stream(llm_prompt)
yield from extract_citations_from_stream(tokens, llm_docs, doc_id_to_rank_map)
def stream_answer_objects( def stream_answer_objects(
query_req: DirectQARequest, query_req: DirectQARequest,
@ -66,20 +193,12 @@ def stream_answer_objects(
default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE, default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE,
timeout: int = QA_TIMEOUT, timeout: int = QA_TIMEOUT,
bypass_acl: bool = False, bypass_acl: bool = False,
use_citations: bool = False,
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
| None = None, | None = None,
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
) -> Iterator[ ) -> AnswerObjectIterator:
QueryRephrase
| QADocsResponse
| LLMRelevanceFilterResponse
| DanswerAnswerPiece
| DanswerQuotes
| DanswerContexts
| StreamingError
| ChatMessageDetail
]:
"""Streams in order: """Streams in order:
1. [always] Retrieved documents, stops flow if nothing is found 1. [always] Retrieved documents, stops flow if nothing is found
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
@ -216,63 +335,51 @@ def stream_answer_objects(
persona_id=query_req.persona_id, user_id=user_id, db_session=db_session persona_id=query_req.persona_id, user_id=user_id, db_session=db_session
) )
llm_override = persona.llm_model_version_override llm_override = persona.llm_model_version_override
if prompt is None:
qa_model = get_question_answer_model( if not chat_session.persona.prompts:
prompt=prompt, raise RuntimeError(
timeout=timeout, "Persona does not have any prompts - this should never happen"
chain_of_thought=query_req.chain_of_thought, )
llm_version=llm_override, prompt = chat_session.persona.prompts[0]
)
full_prompt_str = (
qa_model.build_prompt(
query=query_msg.message, history_str=history_str, context_chunks=llm_chunks
)
if qa_model is not None
else "Gen AI Disabled"
)
# Create the first User query message # Create the first User query message
new_user_message = create_new_chat_message( new_user_message = create_new_chat_message(
chat_session_id=chat_session.id, chat_session_id=chat_session.id,
parent_message=root_message, parent_message=root_message,
prompt_id=query_req.prompt_id, prompt_id=query_req.prompt_id,
message=full_prompt_str, message=query_msg.message,
token_count=len(llm_tokenizer(full_prompt_str)), token_count=len(llm_tokenizer(query_msg.message)),
message_type=MessageType.USER, message_type=MessageType.USER,
db_session=db_session, db_session=db_session,
commit=True, commit=True,
) )
response_packets = ( if use_citations:
qa_model.answer_question_stream( qa_stream = citation_based_qa(
prompt=full_prompt_str, prompt=prompt,
llm_context_docs=llm_chunks, query_message=query_msg,
metrics_callback=llm_metrics_callback, history_str=history_str,
context_chunks=llm_chunks,
llm_override=llm_override,
timeout=timeout,
) )
if qa_model is not None else:
else no_gen_ai_response() qa_stream = quote_based_qa(
) prompt=prompt,
query_message=query_msg,
if qa_model is not None and query_req.return_contexts: history_str=history_str,
contexts = DanswerContexts( context_chunks=llm_chunks,
contexts=[ llm_override=llm_override,
DanswerContext( timeout=timeout,
content=context_doc.content, use_chain_of_thought=False,
document_id=context_doc.document_id, return_contexts=False,
semantic_identifier=context_doc.semantic_identifier, llm_metrics_callback=llm_metrics_callback,
blurb=context_doc.semantic_identifier,
)
for context_doc in llm_chunks
]
) )
response_packets = itertools.chain(response_packets, [contexts])
# Capture outputs and errors # Capture outputs and errors
llm_output = "" llm_output = ""
error: str | None = None error: str | None = None
for packet in response_packets: for packet in qa_stream:
logger.debug(packet) logger.debug(packet)
if isinstance(packet, DanswerAnswerPiece): if isinstance(packet, DanswerAnswerPiece):
@ -333,6 +440,7 @@ def get_search_answer(
answer_generation_timeout: int = QA_TIMEOUT, answer_generation_timeout: int = QA_TIMEOUT,
enable_reflexion: bool = False, enable_reflexion: bool = False,
bypass_acl: bool = False, bypass_acl: bool = False,
use_citations: bool = False,
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
| None = None, | None = None,
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
@ -348,6 +456,7 @@ def get_search_answer(
max_history_tokens=max_history_tokens, max_history_tokens=max_history_tokens,
db_session=db_session, db_session=db_session,
bypass_acl=bypass_acl, bypass_acl=bypass_acl,
use_citations=use_citations,
timeout=answer_generation_timeout, timeout=answer_generation_timeout,
retrieval_metrics_callback=retrieval_metrics_callback, retrieval_metrics_callback=retrieval_metrics_callback,
rerank_metrics_callback=rerank_metrics_callback, rerank_metrics_callback=rerank_metrics_callback,
@ -366,6 +475,11 @@ def get_search_answer(
qa_response.llm_chunks_indices = packet.relevant_chunk_indices qa_response.llm_chunks_indices = packet.relevant_chunk_indices
elif isinstance(packet, DanswerQuotes): elif isinstance(packet, DanswerQuotes):
qa_response.quotes = packet qa_response.quotes = packet
elif isinstance(packet, CitationInfo):
if qa_response.citations:
qa_response.citations.append(packet)
else:
qa_response.citations = [packet]
elif isinstance(packet, DanswerContexts): elif isinstance(packet, DanswerContexts):
qa_response.contexts = packet qa_response.contexts = packet
elif isinstance(packet, StreamingError): elif isinstance(packet, StreamingError):
@ -384,4 +498,10 @@ def get_search_answer(
else: else:
qa_response.answer_valid = True qa_response.answer_valid = True
if use_citations and qa_response.answer and qa_response.citations:
# Reorganize citation nums to be in the same order as the answer
qa_response.answer, qa_response.citations = reorganize_citations(
qa_response.answer, qa_response.citations
)
return qa_response return qa_response

View File

@ -4,6 +4,7 @@ from pydantic import BaseModel
from pydantic import Field from pydantic import Field
from pydantic import root_validator from pydantic import root_validator
from danswer.chat.models import CitationInfo
from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerContexts
from danswer.chat.models import DanswerQuotes from danswer.chat.models import DanswerQuotes
from danswer.chat.models import QADocsResponse from danswer.chat.models import QADocsResponse
@ -51,6 +52,7 @@ class OneShotQAResponse(BaseModel):
answer: str | None = None answer: str | None = None
rephrase: str | None = None rephrase: str | None = None
quotes: DanswerQuotes | None = None quotes: DanswerQuotes | None = None
citations: list[CitationInfo] | None = None
docs: QADocsResponse | None = None docs: QADocsResponse | None = None
llm_chunks_indices: list[int] | None = None llm_chunks_indices: list[int] | None = None
error_msg: str | None = None error_msg: str | None = None

View File

@ -4,7 +4,6 @@ from collections.abc import Callable
from collections.abc import Iterator from collections.abc import Iterator
from typing import cast from typing import cast
from danswer.chat.chat_utils import build_complete_context_str
from danswer.chat.models import AnswerQuestionStreamReturn from danswer.chat.models import AnswerQuestionStreamReturn
from danswer.chat.models import DanswerAnswer from danswer.chat.models import DanswerAnswer
from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerAnswerPiece
@ -33,6 +32,7 @@ from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTE
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT
from danswer.prompts.prompt_utils import build_complete_context_str
from danswer.utils.logger import setup_logger from danswer.utils.logger import setup_logger
from danswer.utils.text_processing import clean_up_code_blocks from danswer.utils.text_processing import clean_up_code_blocks
from danswer.utils.text_processing import escape_newlines from danswer.utils.text_processing import escape_newlines

View File

@ -17,8 +17,6 @@ Remember to provide inline citations in the format [1], [2], [3], etc.
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}." ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
CHAT_USER_PROMPT = f""" CHAT_USER_PROMPT = f"""
Refer to the following context documents when responding to me.{{optional_ignore_statement}} Refer to the following context documents when responding to me.{{optional_ignore_statement}}
CONTEXT: CONTEXT:

View File

@ -12,3 +12,18 @@ QUOTE_PAT = "Quote:"
QUOTES_PAT_PLURAL = "Quotes:" QUOTES_PAT_PLURAL = "Quotes:"
INVALID_PAT = "Invalid:" INVALID_PAT = "Invalid:"
SOURCES_KEY = "sources" SOURCES_KEY = "sources"
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
REQUIRE_CITATION_STATEMENT = """
Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \
DO NOT provide a reference section at the end and DO NOT provide any links following the citations.
""".rstrip()
NO_CITATION_STATEMENT = """
Do not provide any citations even if there are examples in the chat history.
""".rstrip()
CITATION_REMINDER = """
Remember to provide inline citations in the format [1], [2], [3], etc.
"""

View File

@ -2,6 +2,7 @@
# It is used also for the one shot direct QA flow # It is used also for the one shot direct QA flow
import json import json
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.constants import FINAL_QUERY_PAT from danswer.prompts.constants import FINAL_QUERY_PAT
from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.prompts.constants import GENERAL_SEP_PAT
from danswer.prompts.constants import QUESTION_PAT from danswer.prompts.constants import QUESTION_PAT
@ -96,6 +97,22 @@ SAMPLE RESPONSE:
""".strip() """.strip()
# similar to the chat flow, but with the option of including a
# "conversation history" block
CITATIONS_PROMPT = f"""
Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT}
CONTEXT:
{GENERAL_SEP_PAT}
{{context_docs_str}}
{GENERAL_SEP_PAT}
{{history_block}}{{task_prompt}}
{QUESTION_PAT.upper()}
{{user_query}}
"""
# For weak LLM which only takes one chunk and cannot output json # For weak LLM which only takes one chunk and cannot output json
# Also not requiring quotes as it tends to not work # Also not requiring quotes as it tends to not work
WEAK_LLM_PROMPT = f""" WEAK_LLM_PROMPT = f"""

View File

@ -1,5 +1,15 @@
from collections.abc import Sequence
from datetime import datetime from datetime import datetime
from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.constants import DocumentSource
from danswer.db.models import Prompt
from danswer.indexing.models import InferenceChunk
from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.constants import CODE_BLOCK_PAT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
def get_current_llm_day_time() -> str: def get_current_llm_day_time() -> str:
current_datetime = datetime.now() current_datetime = datetime.now()
@ -7,3 +17,78 @@ def get_current_llm_day_time() -> str:
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M") formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
day_of_week = current_datetime.strftime("%A") day_of_week = current_datetime.strftime("%A")
return f"The current day and time is {day_of_week} {formatted_datetime}" return f"The current day and time is {day_of_week} {formatted_datetime}"
def build_task_prompt_reminders(
prompt: Prompt,
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
citation_str: str = CITATION_REMINDER,
language_hint_str: str = LANGUAGE_HINT,
) -> str:
base_task = prompt.task_prompt
citation_or_nothing = citation_str if prompt.include_citations else ""
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
return base_task + citation_or_nothing + language_hint_or_nothing
# Maps connector enum string to a more natural language representation for the LLM
# If not on the list, uses the original but slightly cleaned up, see below
CONNECTOR_NAME_MAP = {
"web": "Website",
"requesttracker": "Request Tracker",
"github": "GitHub",
"file": "File Upload",
}
def clean_up_source(source_str: str) -> str:
if source_str in CONNECTOR_NAME_MAP:
return CONNECTOR_NAME_MAP[source_str]
return source_str.replace("_", " ").title()
def build_doc_context_str(
semantic_identifier: str,
source_type: DocumentSource,
content: str,
metadata_dict: dict[str, str | list[str]],
updated_at: datetime | None,
ind: int,
include_metadata: bool = True,
) -> str:
context_str = ""
if include_metadata:
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
context_str += f"Source: {clean_up_source(source_type)}\n"
for k, v in metadata_dict.items():
if isinstance(v, list):
v_str = ", ".join(v)
context_str += f"{k.capitalize()}: {v_str}\n"
else:
context_str += f"{k.capitalize()}: {v}\n"
if updated_at:
update_str = updated_at.strftime("%B %d, %Y %H:%M")
context_str += f"Updated: {update_str}\n"
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
return context_str
def build_complete_context_str(
context_docs: Sequence[LlmDoc | InferenceChunk],
include_metadata: bool = True,
) -> str:
context_str = ""
for ind, doc in enumerate(context_docs, start=1):
context_str += build_doc_context_str(
semantic_identifier=doc.semantic_identifier,
source_type=doc.source_type,
content=doc.content,
metadata_dict=doc.metadata,
updated_at=doc.updated_at,
ind=ind,
include_metadata=include_metadata,
)
return context_str.strip()

View File

@ -2,8 +2,8 @@ from danswer.llm.utils import check_number_of_tokens
from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
from danswer.prompts.chat_prompts import CITATION_REMINDER from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.prompt_utils import get_current_llm_day_time from danswer.prompts.prompt_utils import get_current_llm_day_time

View File

@ -5,12 +5,12 @@ from fastapi import Query
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from danswer.auth.users import current_user from danswer.auth.users import current_user
from danswer.chat.chat_utils import build_doc_context_str
from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_current_db_embedding_model
from danswer.db.engine import get_session from danswer.db.engine import get_session
from danswer.db.models import User from danswer.db.models import User
from danswer.document_index.factory import get_default_document_index from danswer.document_index.factory import get_default_document_index
from danswer.llm.utils import get_default_llm_token_encode from danswer.llm.utils import get_default_llm_token_encode
from danswer.prompts.prompt_utils import build_doc_context_str
from danswer.search.access_filters import build_access_filters_for_user from danswer.search.access_filters import build_access_filters_for_user
from danswer.search.models import IndexFilters from danswer.search.models import IndexFilters
from danswer.server.documents.models import ChunkInfo from danswer.server.documents.models import ChunkInfo

View File

@ -9,6 +9,8 @@ from danswer.configs.constants import AuthType
from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
from danswer.db.models import AllowedAnswerFilters from danswer.db.models import AllowedAnswerFilters
from danswer.db.models import ChannelConfig from danswer.db.models import ChannelConfig
from danswer.db.models import SlackBotConfig as SlackBotConfigModel
from danswer.db.models import SlackBotResponseType
from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.features.persona.models import PersonaSnapshot
@ -81,6 +83,7 @@ class SlackBotConfigCreationRequest(BaseModel):
answer_filters: list[AllowedAnswerFilters] = [] answer_filters: list[AllowedAnswerFilters] = []
# list of user emails # list of user emails
follow_up_tags: list[str] | None = None follow_up_tags: list[str] | None = None
response_type: SlackBotResponseType
@validator("answer_filters", pre=True) @validator("answer_filters", pre=True)
def validate_filters(cls, value: list[str]) -> list[str]: def validate_filters(cls, value: list[str]) -> list[str]:
@ -104,6 +107,22 @@ class SlackBotConfig(BaseModel):
id: int id: int
persona: PersonaSnapshot | None persona: PersonaSnapshot | None
channel_config: ChannelConfig channel_config: ChannelConfig
response_type: SlackBotResponseType
@classmethod
def from_model(
cls, slack_bot_config_model: SlackBotConfigModel
) -> "SlackBotConfig":
return cls(
id=slack_bot_config_model.id,
persona=(
PersonaSnapshot.from_model(slack_bot_config_model.persona)
if slack_bot_config_model.persona
else None
),
channel_config=slack_bot_config_model.channel_config,
response_type=slack_bot_config_model.response_type,
)
class ModelVersionResponse(BaseModel): class ModelVersionResponse(BaseModel):

View File

@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config
from danswer.db.slack_bot_config import remove_slack_bot_config from danswer.db.slack_bot_config import remove_slack_bot_config
from danswer.db.slack_bot_config import update_slack_bot_config from danswer.db.slack_bot_config import update_slack_bot_config
from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.server.features.persona.models import PersonaSnapshot
from danswer.server.manage.models import SlackBotConfig from danswer.server.manage.models import SlackBotConfig
from danswer.server.manage.models import SlackBotConfigCreationRequest from danswer.server.manage.models import SlackBotConfigCreationRequest
from danswer.server.manage.models import SlackBotTokens from danswer.server.manage.models import SlackBotTokens
@ -108,17 +107,10 @@ def create_slack_bot_config(
slack_bot_config_model = insert_slack_bot_config( slack_bot_config_model = insert_slack_bot_config(
persona_id=persona_id, persona_id=persona_id,
channel_config=channel_config, channel_config=channel_config,
response_type=slack_bot_config_creation_request.response_type,
db_session=db_session, db_session=db_session,
) )
return SlackBotConfig( return SlackBotConfig.from_model(slack_bot_config_model)
id=slack_bot_config_model.id,
persona=(
PersonaSnapshot.from_model(slack_bot_config_model.persona)
if slack_bot_config_model.persona
else None
),
channel_config=slack_bot_config_model.channel_config,
)
@router.patch("/admin/slack-bot/config/{slack_bot_config_id}") @router.patch("/admin/slack-bot/config/{slack_bot_config_id}")
@ -170,17 +162,10 @@ def patch_slack_bot_config(
slack_bot_config_id=slack_bot_config_id, slack_bot_config_id=slack_bot_config_id,
persona_id=persona_id, persona_id=persona_id,
channel_config=channel_config, channel_config=channel_config,
response_type=slack_bot_config_creation_request.response_type,
db_session=db_session, db_session=db_session,
) )
return SlackBotConfig( return SlackBotConfig.from_model(slack_bot_config_model)
id=slack_bot_config_model.id,
persona=(
PersonaSnapshot.from_model(slack_bot_config_model.persona)
if slack_bot_config_model.persona
else None
),
channel_config=slack_bot_config_model.channel_config,
)
@router.delete("/admin/slack-bot/config/{slack_bot_config_id}") @router.delete("/admin/slack-bot/config/{slack_bot_config_id}")
@ -201,15 +186,7 @@ def list_slack_bot_configs(
) -> list[SlackBotConfig]: ) -> list[SlackBotConfig]:
slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session) slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session)
return [ return [
SlackBotConfig( SlackBotConfig.from_model(slack_bot_config_model)
id=slack_bot_config_model.id,
persona=(
PersonaSnapshot.from_model(slack_bot_config_model.persona)
if slack_bot_config_model.persona
else None
),
channel_config=slack_bot_config_model.channel_config,
)
for slack_bot_config_model in slack_bot_config_models for slack_bot_config_model in slack_bot_config_models
] ]

Binary file not shown.

After

Width:  |  Height:  |  Size: 1013 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 829 B

View File

@ -0,0 +1,3 @@
This folder contains images needed by the Danswer Slack Bot. When possible, we use the images
within `web/public`, but sometimes those images do not work for the Slack Bot.

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({
!isPersonaASlackBotPersona(existingSlackBotConfig.persona) !isPersonaASlackBotPersona(existingSlackBotConfig.persona)
? existingSlackBotConfig.persona.id ? existingSlackBotConfig.persona.id
: null, : null,
response_type: existingSlackBotConfig?.response_type || "citations",
}} }}
validationSchema={Yup.object().shape({ validationSchema={Yup.object().shape({
channel_names: Yup.array().of(Yup.string()), channel_names: Yup.array().of(Yup.string()),
response_type: Yup.string()
.oneOf(["quotes", "citations"])
.required(),
answer_validity_check_enabled: Yup.boolean().required(), answer_validity_check_enabled: Yup.boolean().required(),
questionmark_prefilter_enabled: Yup.boolean().required(), questionmark_prefilter_enabled: Yup.boolean().required(),
respond_tag_only: Yup.boolean().required(), respond_tag_only: Yup.boolean().required(),
@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({
</div> </div>
} }
/> />
<SelectorFormField
name="response_type"
label="Response Format"
subtext={
<>
If set to Citations, DanswerBot will respond with a direct
answer with inline citations. It will also provide links
to these cited documents below the answer. When in doubt,
choose this option.
<br />
<br />
If set to Quotes, DanswerBot will respond with a direct
answer as well as with quotes pulled from the context
documents to support that answer. DanswerBot will also
give a list of relevant documents. Choose this option if
you want a very detailed response AND/OR a list of
relevant documents would be useful just in case the LLM
missed anything.
</>
}
options={[
{ name: "Citations", value: "citations" },
{ name: "Quotes", value: "quotes" },
]}
/>
<Divider /> <Divider />
<SectionHeader>When should DanswerBot respond?</SectionHeader> <SectionHeader>When should DanswerBot respond?</SectionHeader>

View File

@ -1,4 +1,8 @@
import { ChannelConfig, SlackBotTokens } from "@/lib/types"; import {
ChannelConfig,
SlackBotResponseType,
SlackBotTokens,
} from "@/lib/types";
import { Persona } from "../personas/interfaces"; import { Persona } from "../personas/interfaces";
interface SlackBotConfigCreationRequest { interface SlackBotConfigCreationRequest {
@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest {
respond_team_member_list: string[]; respond_team_member_list: string[];
follow_up_tags?: string[]; follow_up_tags?: string[];
usePersona: boolean; usePersona: boolean;
response_type: SlackBotResponseType;
} }
const buildFiltersFromCreationRequest = ( const buildFiltersFromCreationRequest = (
@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = (
...(creationRequest.usePersona ...(creationRequest.usePersona
? { persona_id: creationRequest.persona_id } ? { persona_id: creationRequest.persona_id }
: { document_sets: creationRequest.document_sets }), : { document_sets: creationRequest.document_sets }),
response_type: creationRequest.response_type,
}); });
}; };

View File

@ -231,7 +231,7 @@ interface SelectorFormFieldProps {
name: string; name: string;
label?: string; label?: string;
options: StringOrNumberOption[]; options: StringOrNumberOption[];
subtext?: string; subtext?: string | JSX.Element;
includeDefault?: boolean; includeDefault?: boolean;
} }

View File

@ -370,10 +370,13 @@ export interface ChannelConfig {
follow_up_tags?: string[]; follow_up_tags?: string[];
} }
export type SlackBotResponseType = "quotes" | "citations";
export interface SlackBotConfig { export interface SlackBotConfig {
id: number; id: number;
persona: Persona | null; persona: Persona | null;
channel_config: ChannelConfig; channel_config: ChannelConfig;
response_type: SlackBotResponseType;
} }
export interface SlackBotTokens { export interface SlackBotTokens {