diff --git a/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py new file mode 100644 index 000000000..6c2dad18f --- /dev/null +++ b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py @@ -0,0 +1,39 @@ +"""Add slack bot display type + +Revision ID: fcd135795f21 +Revises: 0a2b51deb0b8 +Create Date: 2024-03-04 17:03:27.116284 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "fcd135795f21" +down_revision = "0a2b51deb0b8" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "slack_bot_config", + sa.Column( + "response_type", + sa.Enum( + "QUOTES", + "CITATIONS", + name="slackbotresponsetype", + native_enum=False, + ), + nullable=True, + ), + ) + op.execute( + "UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL" + ) + op.alter_column("slack_bot_config", "response_type", nullable=False) + + +def downgrade() -> None: + op.drop_column("slack_bot_config", "response_type") diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index b839ea3bf..fe97b0b39 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -1,7 +1,7 @@ import re from collections.abc import Callable from collections.abc import Iterator -from datetime import datetime +from collections.abc import Sequence from functools import lru_cache from typing import cast @@ -16,7 +16,6 @@ from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import STOP_STREAM_PAT -from danswer.configs.constants import DocumentSource from danswer.configs.constants import IGNORE_FOR_QA from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS @@ -34,13 +33,12 @@ from danswer.llm.utils import tokenizer_trim_content from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT from danswer.prompts.chat_prompts import CHAT_USER_PROMPT -from danswer.prompts.chat_prompts import CITATION_REMINDER -from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT -from danswer.prompts.constants import CODE_BLOCK_PAT +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.constants import TRIPLE_BACKTICK -from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT +from danswer.prompts.prompt_utils import build_complete_context_str +from danswer.prompts.prompt_utils import build_task_prompt_reminders from danswer.prompts.prompt_utils import get_current_llm_day_time from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT from danswer.prompts.token_counts import ( @@ -53,68 +51,6 @@ from danswer.utils.logger import setup_logger logger = setup_logger() -# Maps connector enum string to a more natural language representation for the LLM -# If not on the list, uses the original but slightly cleaned up, see below -CONNECTOR_NAME_MAP = { - "web": "Website", - "requesttracker": "Request Tracker", - "github": "GitHub", - "file": "File Upload", -} - - -def clean_up_source(source_str: str) -> str: - if source_str in CONNECTOR_NAME_MAP: - return CONNECTOR_NAME_MAP[source_str] - return source_str.replace("_", " ").title() - - -def build_doc_context_str( - semantic_identifier: str, - source_type: DocumentSource, - content: str, - metadata_dict: dict[str, str | list[str]], - updated_at: datetime | None, - ind: int, - include_metadata: bool = True, -) -> str: - context_str = "" - if include_metadata: - context_str += f"DOCUMENT {ind}: {semantic_identifier}\n" - context_str += f"Source: {clean_up_source(source_type)}\n" - - for k, v in metadata_dict.items(): - if isinstance(v, list): - v_str = ", ".join(v) - context_str += f"{k.capitalize()}: {v_str}\n" - else: - context_str += f"{k.capitalize()}: {v}\n" - - if updated_at: - update_str = updated_at.strftime("%B %d, %Y %H:%M") - context_str += f"Updated: {update_str}\n" - context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n" - return context_str - - -def build_complete_context_str( - context_docs: list[LlmDoc | InferenceChunk], - include_metadata: bool = True, -) -> str: - context_str = "" - for ind, doc in enumerate(context_docs, start=1): - context_str += build_doc_context_str( - semantic_identifier=doc.semantic_identifier, - source_type=doc.source_type, - content=doc.content, - metadata_dict=doc.metadata, - updated_at=doc.updated_at, - ind=ind, - include_metadata=include_metadata, - ) - - return context_str.strip() - @lru_cache() def build_chat_system_message( @@ -147,18 +83,6 @@ def build_chat_system_message( return system_msg, token_count -def build_task_prompt_reminders( - prompt: Prompt, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), - citation_str: str = CITATION_REMINDER, - language_hint_str: str = LANGUAGE_HINT, -) -> str: - base_task = prompt.task_prompt - citation_or_nothing = citation_str if prompt.include_citations else "" - language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else "" - return base_task + citation_or_nothing + language_hint_or_nothing - - def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc: return LlmDoc( document_id=inf_chunk.document_id, @@ -172,7 +96,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc: def map_document_id_order( - chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True + chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True ) -> dict[str, int]: order_mapping = {} current = 1 if one_indexed else 0 @@ -568,6 +492,63 @@ def extract_citations_from_stream( yield DanswerAnswerPiece(answer_piece=curr_segment) +def reorganize_citations( + answer: str, citations: list[CitationInfo] +) -> tuple[str, list[CitationInfo]]: + """For a complete, citation-aware response, we want to reorganize the citations so that + they are in the order of the documents that were used in the response. This just looks nicer / avoids + confusion ("Why is there [7] when only 2 documents are cited?").""" + + # Regular expression to find all instances of [[x]](LINK) + pattern = r"\[\[(.*?)\]\]\((.*?)\)" + + all_citation_matches = re.findall(pattern, answer) + + new_citation_info: dict[int, CitationInfo] = {} + for citation_match in all_citation_matches: + try: + citation_num = int(citation_match[0]) + if citation_num in new_citation_info: + continue + + matching_citation = next( + iter([c for c in citations if c.citation_num == int(citation_num)]), + None, + ) + if matching_citation is None: + continue + + new_citation_info[citation_num] = CitationInfo( + citation_num=len(new_citation_info) + 1, + document_id=matching_citation.document_id, + ) + except Exception: + pass + + # Function to replace citations with their new number + def slack_link_format(match: re.Match) -> str: + link_text = match.group(1) + try: + citation_num = int(link_text) + if citation_num in new_citation_info: + link_text = new_citation_info[citation_num].citation_num + except Exception: + pass + + link_url = match.group(2) + return f"[[{link_text}]]({link_url})" + + # Substitute all matches in the input text + new_answer = re.sub(pattern, slack_link_format, answer) + + # if any citations weren't parsable, just add them back to be safe + for citation in citations: + if citation.citation_num not in new_citation_info: + new_citation_info[citation.citation_num] = citation + + return new_answer, list(new_citation_info.values()) + + def get_prompt_tokens(prompt: Prompt) -> int: # Note: currently custom prompts do not allow datetime aware, only default prompts return ( diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 479feb257..5ebf8ab15 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -7,7 +7,6 @@ from sqlalchemy.orm import Session from danswer.chat.chat_utils import build_chat_system_message from danswer.chat.chat_utils import build_chat_user_message -from danswer.chat.chat_utils import build_doc_context_str from danswer.chat.chat_utils import compute_max_document_tokens from danswer.chat.chat_utils import compute_max_llm_input_tokens from danswer.chat.chat_utils import create_chat_chain @@ -51,6 +50,7 @@ from danswer.llm.utils import get_default_llm_version from danswer.llm.utils import get_max_input_tokens from danswer.llm.utils import tokenizer_trim_content from danswer.llm.utils import translate_history_to_basemessages +from danswer.prompts.prompt_utils import build_doc_context_str from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.search.request_preprocessing import retrieval_preprocessing diff --git a/backend/danswer/configs/danswerbot_configs.py b/backend/danswer/configs/danswerbot_configs.py index 484ba144b..5935c9b99 100644 --- a/backend/danswer/configs/danswerbot_configs.py +++ b/backend/danswer/configs/danswerbot_configs.py @@ -52,6 +52,8 @@ ENABLE_DANSWERBOT_REFLEXION = ( ) # Currently not support chain of thought, probably will add back later DANSWER_BOT_DISABLE_COT = True +# if set, will default DanswerBot to use quotes and reference documents +DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true" # Maximum Questions Per Minute, Default Uncapped DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None diff --git a/backend/danswer/danswerbot/slack/blocks.py b/backend/danswer/danswerbot/slack/blocks.py index 8851ecf45..0d26d78f3 100644 --- a/backend/danswer/danswerbot/slack/blocks.py +++ b/backend/danswer/danswerbot/slack/blocks.py @@ -1,15 +1,20 @@ +import re from datetime import datetime +from re import Match import pytz import timeago # type: ignore from slack_sdk.models.blocks import ActionsBlock from slack_sdk.models.blocks import Block from slack_sdk.models.blocks import ButtonElement +from slack_sdk.models.blocks import ContextBlock from slack_sdk.models.blocks import DividerBlock from slack_sdk.models.blocks import HeaderBlock from slack_sdk.models.blocks import Option from slack_sdk.models.blocks import RadioButtonsElement from slack_sdk.models.blocks import SectionBlock +from slack_sdk.models.blocks.basic_components import MarkdownTextObject +from slack_sdk.models.blocks.block_elements import ImageElement from danswer.chat.models import DanswerQuote from danswer.configs.app_configs import DISABLE_GENERATIVE_AI @@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID +from danswer.danswerbot.slack.icons import source_to_github_img_link from danswer.danswerbot.slack.utils import build_feedback_id from danswer.danswerbot.slack.utils import remove_slack_text_interactions from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack @@ -29,7 +35,35 @@ from danswer.search.models import SavedSearchDoc from danswer.utils.text_processing import decode_escapes from danswer.utils.text_processing import replace_whitespaces_w_space -_MAX_BLURB_LEN = 75 +_MAX_BLURB_LEN = 45 + + +def _process_citations_for_slack(text: str) -> str: + """ + Converts instances of [[x]](LINK) in the input text to Slack's link format . + + Args: + - text (str): The input string containing markdown links. + + Returns: + - str: The string with markdown links converted to Slack format. + """ + # Regular expression to find all instances of [[x]](LINK) + pattern = r"\[\[(.*?)\]\]\((.*?)\)" + + # Function to replace each found instance with Slack's format + def slack_link_format(match: Match) -> str: + link_text = match.group(1) + link_url = match.group(2) + return f"<{link_url}|[{link_text}]>" + + # Substitute all matches in the input text + return re.sub(pattern, slack_link_format, text) + + +def clean_markdown_link_text(text: str) -> str: + # Remove any newlines within the text + return text.replace("\n", " ").strip() def build_qa_feedback_block(message_id: int) -> Block: @@ -38,13 +72,12 @@ def build_qa_feedback_block(message_id: int) -> Block: elements=[ ButtonElement( action_id=LIKE_BLOCK_ACTION_ID, - text="👍", + text="👍 Helpful", style="primary", ), ButtonElement( action_id=DISLIKE_BLOCK_ACTION_ID, - text="👎", - style="danger", + text="👎 Not helpful", ), ], ) @@ -164,6 +197,80 @@ def build_documents_blocks( return section_blocks +def build_sources_blocks( + cited_documents: list[tuple[int, SavedSearchDoc]], + num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY, +) -> list[Block]: + if not cited_documents: + return [ + SectionBlock( + text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔" + ) + ] + + seen_docs_identifiers = set() + section_blocks: list[Block] = [SectionBlock(text="*Sources:*")] + included_docs = 0 + for citation_num, d in cited_documents: + if d.document_id in seen_docs_identifiers: + continue + seen_docs_identifiers.add(d.document_id) + + doc_sem_id = d.semantic_identifier + if d.source_type == DocumentSource.SLACK.value: + # for legacy reasons, before the switch to how Slack semantic identifiers are constructed + if "#" not in doc_sem_id: + doc_sem_id = "#" + doc_sem_id + + # this is needed to try and prevent the line from overflowing + # if it does overflow, the image gets placed above the title and it + # looks bad + doc_sem_id = ( + doc_sem_id[:_MAX_BLURB_LEN] + "..." + if len(doc_sem_id) > _MAX_BLURB_LEN + else doc_sem_id + ) + + owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None + days_ago_str = ( + timeago.format(d.updated_at, datetime.now(pytz.utc)) + if d.updated_at + else None + ) + final_metadata_str = " | ".join( + ([owner_str] if owner_str else []) + + ([days_ago_str] if days_ago_str else []) + ) + + document_title = clean_markdown_link_text(doc_sem_id) + img_link = source_to_github_img_link(d.source_type) + + section_blocks.append( + ContextBlock( + elements=( + [ + ImageElement( + image_url=img_link, + alt_text=f"{d.source_type.value} logo", + ) + ] + if img_link + else [] + ) + + [ + MarkdownTextObject( + text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}" + ), + ] + ) + ) + + if included_docs >= num_docs_to_display: + break + + return section_blocks + + def build_quotes_block( quotes: list[DanswerQuote], ) -> list[Block]: @@ -214,6 +321,7 @@ def build_qa_response_blocks( time_cutoff: datetime | None, favor_recent: bool, skip_quotes: bool = False, + process_message_for_citations: bool = False, skip_ai_feedback: bool = False, ) -> list[Block]: if DISABLE_GENERATIVE_AI: @@ -221,8 +329,6 @@ def build_qa_response_blocks( quotes_blocks: list[Block] = [] - ai_answer_header = HeaderBlock(text="AI Answer") - filter_block: Block | None = None if time_cutoff or favor_recent or source_filters: filter_text = "Filters: " @@ -247,6 +353,8 @@ def build_qa_response_blocks( ) else: answer_processed = decode_escapes(remove_slack_text_interactions(answer)) + if process_message_for_citations: + answer_processed = _process_citations_for_slack(answer_processed) answer_block = SectionBlock(text=answer_processed) if quotes: quotes_blocks = build_quotes_block(quotes) @@ -259,7 +367,7 @@ def build_qa_response_blocks( ) ] - response_blocks: list[Block] = [ai_answer_header] + response_blocks: list[Block] = [] if filter_block is not None: response_blocks.append(filter_block) @@ -271,7 +379,6 @@ def build_qa_response_blocks( if not skip_quotes: response_blocks.extend(quotes_blocks) - response_blocks.append(DividerBlock()) return response_blocks diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index 8427a9e18..1e065dd1d 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -9,6 +9,7 @@ from typing import TypeVar from retry import retry from slack_sdk import WebClient from slack_sdk.errors import SlackApiError +from slack_sdk.models.blocks import DividerBlock from sqlalchemy.orm import Session from danswer.chat.chat_utils import compute_max_document_tokens @@ -18,12 +19,14 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANS from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE +from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION from danswer.danswerbot.slack.blocks import build_documents_blocks from danswer.danswerbot.slack.blocks import build_follow_up_block from danswer.danswerbot.slack.blocks import build_qa_response_blocks +from danswer.danswerbot.slack.blocks import build_sources_blocks from danswer.danswerbot.slack.blocks import get_restate_blocks from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.models import SlackMessageInfo @@ -35,6 +38,7 @@ from danswer.danswerbot.slack.utils import SlackRateLimiter from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import SlackBotConfig +from danswer.db.models import SlackBotResponseType from danswer.llm.utils import check_number_of_tokens from danswer.llm.utils import get_default_llm_version from danswer.llm.utils import get_max_input_tokens @@ -137,6 +141,13 @@ def handle_message( should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False + # figure out if we want to use citations or quotes + use_citations = ( + not DANSWER_BOT_USE_QUOTES + if channel_config is None + else channel_config.response_type == SlackBotResponseType.CITATIONS + ) + # List of user id to send message to, if None, send to everyone in channel send_to: list[str] | None = None respond_tag_only = False @@ -259,6 +270,7 @@ def handle_message( answer_generation_timeout=answer_generation_timeout, enable_reflexion=reflexion, bypass_acl=bypass_acl, + use_citations=use_citations, ) if not answer.error_msg: return answer @@ -387,7 +399,10 @@ def handle_message( source_filters=retrieval_info.applied_source_filters, time_cutoff=retrieval_info.applied_time_cutoff, favor_recent=retrieval_info.recency_bias_multiplier > 1, - skip_quotes=persona is not None, # currently Personas don't support quotes + # currently Personas don't support quotes + # if citations are enabled, also don't use quotes + skip_quotes=persona is not None or use_citations, + process_message_for_citations=use_citations, ) # Get the chunks fed to the LLM only, then fill with other docs @@ -397,16 +412,33 @@ def handle_message( doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds ] priority_ordered_docs = llm_docs + remaining_docs - document_blocks = ( - build_documents_blocks( + + document_blocks = [] + citations_block = [] + # if citations are enabled, only show cited documents + if use_citations: + citations = answer.citations or [] + cited_docs = [] + for citation in citations: + matching_doc = next( + (d for d in top_docs if d.document_id == citation.document_id), + None, + ) + if matching_doc: + cited_docs.append((citation.citation_num, matching_doc)) + + cited_docs.sort() + citations_block = build_sources_blocks(cited_documents=cited_docs) + elif priority_ordered_docs: + document_blocks = build_documents_blocks( documents=priority_ordered_docs, message_id=answer.chat_message_id, ) - if priority_ordered_docs - else [] - ) + document_blocks = [DividerBlock()] + document_blocks - all_blocks = restate_question_block + answer_blocks + document_blocks + all_blocks = ( + restate_question_block + answer_blocks + citations_block + document_blocks + ) if channel_conf and channel_conf.get("follow_up_tags") is not None: all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id)) diff --git a/backend/danswer/danswerbot/slack/icons.py b/backend/danswer/danswerbot/slack/icons.py new file mode 100644 index 000000000..d2e8ea917 --- /dev/null +++ b/backend/danswer/danswerbot/slack/icons.py @@ -0,0 +1,58 @@ +from danswer.configs.constants import DocumentSource + + +def source_to_github_img_link(source: DocumentSource) -> str | None: + # TODO: store these images somewhere better + if source == DocumentSource.WEB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png" + if source == DocumentSource.FILE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + if source == DocumentSource.GOOGLE_SITES.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png" + if source == DocumentSource.SLACK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png" + if source == DocumentSource.GMAIL.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png" + if source == DocumentSource.GOOGLE_DRIVE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png" + if source == DocumentSource.GITHUB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png" + if source == DocumentSource.GITLAB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png" + if source == DocumentSource.CONFLUENCE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png" + if source == DocumentSource.JIRA.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png" + if source == DocumentSource.NOTION.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png" + if source == DocumentSource.ZENDESK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png" + if source == DocumentSource.GONG.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png" + if source == DocumentSource.LINEAR.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png" + if source == DocumentSource.PRODUCTBOARD.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp" + if source == DocumentSource.SLAB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png" + if source == DocumentSource.ZULIP.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png" + if source == DocumentSource.GURU.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png" + if source == DocumentSource.HUBSPOT.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png" + if source == DocumentSource.DOCUMENT360.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png" + if source == DocumentSource.BOOKSTACK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png" + if source == DocumentSource.LOOPIO.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png" + if source == DocumentSource.SHAREPOINT.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png" + if source == DocumentSource.REQUESTTRACKER.value: + # just use file icon for now + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + if source == DocumentSource.INGESTION_API.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index 753897e10..5d761dec0 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -346,8 +346,12 @@ def read_slack_thread( if len(blocks) <= 1: continue - # The useful block is the second one after the header block that says AI Answer - message = reply["blocks"][1]["text"]["text"] + # For the old flow, the useful block is the second one after the header block that says AI Answer + if reply["blocks"][0]["text"]["text"] == "AI Answer": + message = reply["blocks"][1]["text"]["text"] + else: + # for the new flow, the answer is the first block + message = reply["blocks"][0]["text"]["text"] if message.startswith("_Filters"): if len(blocks) <= 2: diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 98430fb23..5ca3bdbe9 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -811,6 +811,11 @@ class ChannelConfig(TypedDict): follow_up_tags: NotRequired[list[str]] +class SlackBotResponseType(str, PyEnum): + QUOTES = "quotes" + CITATIONS = "citations" + + class SlackBotConfig(Base): __tablename__ = "slack_bot_config" @@ -822,6 +827,9 @@ class SlackBotConfig(Base): channel_config: Mapped[ChannelConfig] = mapped_column( postgresql.JSONB(), nullable=False ) + response_type: Mapped[SlackBotResponseType] = mapped_column( + Enum(SlackBotResponseType, native_enum=False), nullable=False + ) persona: Mapped[Persona | None] = relationship("Persona") diff --git a/backend/danswer/db/slack_bot_config.py b/backend/danswer/db/slack_bot_config.py index 82ed77e3f..f2aeae7b3 100644 --- a/backend/danswer/db/slack_bot_config.py +++ b/backend/danswer/db/slack_bot_config.py @@ -11,6 +11,7 @@ from danswer.db.models import ChannelConfig from danswer.db.models import Persona from danswer.db.models import Persona__DocumentSet from danswer.db.models import SlackBotConfig +from danswer.db.models import SlackBotResponseType from danswer.search.models import RecencyBiasSetting @@ -72,11 +73,13 @@ def create_slack_bot_persona( def insert_slack_bot_config( persona_id: int | None, channel_config: ChannelConfig, + response_type: SlackBotResponseType, db_session: Session, ) -> SlackBotConfig: slack_bot_config = SlackBotConfig( persona_id=persona_id, channel_config=channel_config, + response_type=response_type, ) db_session.add(slack_bot_config) db_session.commit() @@ -88,6 +91,7 @@ def update_slack_bot_config( slack_bot_config_id: int, persona_id: int | None, channel_config: ChannelConfig, + response_type: SlackBotResponseType, db_session: Session, ) -> SlackBotConfig: slack_bot_config = db_session.scalar( @@ -105,6 +109,7 @@ def update_slack_bot_config( # will encounter `violates foreign key constraint` errors slack_bot_config.persona_id = persona_id slack_bot_config.channel_config = channel_config + slack_bot_config.response_type = response_type # if the persona has changed, then clean up the old persona if persona_id != existing_persona_id and existing_persona_id: diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index 03292eec1..4f4a931ae 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -3,10 +3,18 @@ from collections.abc import Callable from collections.abc import Iterator from typing import cast +from langchain.schema.messages import BaseMessage +from langchain.schema.messages import HumanMessage from sqlalchemy.orm import Session +from danswer.chat.chat_utils import build_chat_system_message from danswer.chat.chat_utils import compute_max_document_tokens +from danswer.chat.chat_utils import extract_citations_from_stream from danswer.chat.chat_utils import get_chunks_for_qa +from danswer.chat.chat_utils import llm_doc_from_inference_chunk +from danswer.chat.chat_utils import map_document_id_order +from danswer.chat.chat_utils import reorganize_citations +from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerContext from danswer.chat.models import DanswerContexts @@ -26,16 +34,23 @@ from danswer.db.chat import get_persona_by_id from danswer.db.chat import get_prompt_by_id from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.models import Prompt from danswer.db.models import User from danswer.document_index.factory import get_default_document_index from danswer.indexing.models import InferenceChunk +from danswer.llm.factory import get_default_llm from danswer.llm.utils import get_default_llm_token_encode +from danswer.llm.utils import get_default_llm_tokenizer from danswer.one_shot_answer.factory import get_question_answer_model from danswer.one_shot_answer.models import DirectQARequest from danswer.one_shot_answer.models import OneShotQAResponse from danswer.one_shot_answer.models import QueryRephrase +from danswer.one_shot_answer.models import ThreadMessage from danswer.one_shot_answer.qa_block import no_gen_ai_response from danswer.one_shot_answer.qa_utils import combine_message_thread +from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT +from danswer.prompts.prompt_utils import build_complete_context_str +from danswer.prompts.prompt_utils import build_task_prompt_reminders from danswer.search.models import RerankMetricsContainer from danswer.search.models import RetrievalMetricsContainer from danswer.search.models import SavedSearchDoc @@ -51,6 +66,118 @@ from danswer.utils.timing import log_generator_function_time logger = setup_logger() +AnswerObjectIterator = Iterator[ + QueryRephrase + | QADocsResponse + | LLMRelevanceFilterResponse + | DanswerAnswerPiece + | DanswerQuotes + | DanswerContexts + | StreamingError + | ChatMessageDetail + | CitationInfo +] + + +def quote_based_qa( + prompt: Prompt, + query_message: ThreadMessage, + history_str: str, + context_chunks: list[InferenceChunk], + llm_override: str | None, + timeout: int, + use_chain_of_thought: bool, + return_contexts: bool, + llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, +) -> AnswerObjectIterator: + qa_model = get_question_answer_model( + prompt=prompt, + timeout=timeout, + chain_of_thought=use_chain_of_thought, + llm_version=llm_override, + ) + + full_prompt_str = ( + qa_model.build_prompt( + query=query_message.message, + history_str=history_str, + context_chunks=context_chunks, + ) + if qa_model is not None + else "Gen AI Disabled" + ) + + response_packets = ( + qa_model.answer_question_stream( + prompt=full_prompt_str, + llm_context_docs=context_chunks, + metrics_callback=llm_metrics_callback, + ) + if qa_model is not None + else no_gen_ai_response() + ) + + if qa_model is not None and return_contexts: + contexts = DanswerContexts( + contexts=[ + DanswerContext( + content=context_chunk.content, + document_id=context_chunk.document_id, + semantic_identifier=context_chunk.semantic_identifier, + blurb=context_chunk.semantic_identifier, + ) + for context_chunk in context_chunks + ] + ) + + response_packets = itertools.chain(response_packets, [contexts]) + + yield from response_packets + + +def citation_based_qa( + prompt: Prompt, + query_message: ThreadMessage, + history_str: str, + context_chunks: list[InferenceChunk], + llm_override: str | None, + timeout: int, +) -> AnswerObjectIterator: + llm_tokenizer = get_default_llm_tokenizer() + + system_prompt_or_none, _ = build_chat_system_message( + prompt=prompt, + context_exists=True, + llm_tokenizer_encode_func=llm_tokenizer.encode, + ) + + task_prompt_with_reminder = build_task_prompt_reminders(prompt) + + context_docs_str = build_complete_context_str(context_chunks) + user_message = HumanMessage( + content=CITATIONS_PROMPT.format( + task_prompt=task_prompt_with_reminder, + user_query=query_message.message, + history_block=history_str, + context_docs_str=context_docs_str, + ) + ) + + llm = get_default_llm( + timeout=timeout, + gen_ai_model_version_override=llm_override, + ) + + llm_prompt: list[BaseMessage] = [user_message] + if system_prompt_or_none is not None: + llm_prompt = [system_prompt_or_none] + llm_prompt + + llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in context_chunks] + doc_id_to_rank_map = map_document_id_order(llm_docs) + + tokens = llm.stream(llm_prompt) + yield from extract_citations_from_stream(tokens, llm_docs, doc_id_to_rank_map) + def stream_answer_objects( query_req: DirectQARequest, @@ -66,20 +193,12 @@ def stream_answer_objects( default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE, timeout: int = QA_TIMEOUT, bypass_acl: bool = False, + use_citations: bool = False, retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, -) -> Iterator[ - QueryRephrase - | QADocsResponse - | LLMRelevanceFilterResponse - | DanswerAnswerPiece - | DanswerQuotes - | DanswerContexts - | StreamingError - | ChatMessageDetail -]: +) -> AnswerObjectIterator: """Streams in order: 1. [always] Retrieved documents, stops flow if nothing is found 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on @@ -216,63 +335,51 @@ def stream_answer_objects( persona_id=query_req.persona_id, user_id=user_id, db_session=db_session ) llm_override = persona.llm_model_version_override - - qa_model = get_question_answer_model( - prompt=prompt, - timeout=timeout, - chain_of_thought=query_req.chain_of_thought, - llm_version=llm_override, - ) - - full_prompt_str = ( - qa_model.build_prompt( - query=query_msg.message, history_str=history_str, context_chunks=llm_chunks - ) - if qa_model is not None - else "Gen AI Disabled" - ) + if prompt is None: + if not chat_session.persona.prompts: + raise RuntimeError( + "Persona does not have any prompts - this should never happen" + ) + prompt = chat_session.persona.prompts[0] # Create the first User query message new_user_message = create_new_chat_message( chat_session_id=chat_session.id, parent_message=root_message, prompt_id=query_req.prompt_id, - message=full_prompt_str, - token_count=len(llm_tokenizer(full_prompt_str)), + message=query_msg.message, + token_count=len(llm_tokenizer(query_msg.message)), message_type=MessageType.USER, db_session=db_session, commit=True, ) - response_packets = ( - qa_model.answer_question_stream( - prompt=full_prompt_str, - llm_context_docs=llm_chunks, - metrics_callback=llm_metrics_callback, + if use_citations: + qa_stream = citation_based_qa( + prompt=prompt, + query_message=query_msg, + history_str=history_str, + context_chunks=llm_chunks, + llm_override=llm_override, + timeout=timeout, ) - if qa_model is not None - else no_gen_ai_response() - ) - - if qa_model is not None and query_req.return_contexts: - contexts = DanswerContexts( - contexts=[ - DanswerContext( - content=context_doc.content, - document_id=context_doc.document_id, - semantic_identifier=context_doc.semantic_identifier, - blurb=context_doc.semantic_identifier, - ) - for context_doc in llm_chunks - ] + else: + qa_stream = quote_based_qa( + prompt=prompt, + query_message=query_msg, + history_str=history_str, + context_chunks=llm_chunks, + llm_override=llm_override, + timeout=timeout, + use_chain_of_thought=False, + return_contexts=False, + llm_metrics_callback=llm_metrics_callback, ) - response_packets = itertools.chain(response_packets, [contexts]) - # Capture outputs and errors llm_output = "" error: str | None = None - for packet in response_packets: + for packet in qa_stream: logger.debug(packet) if isinstance(packet, DanswerAnswerPiece): @@ -333,6 +440,7 @@ def get_search_answer( answer_generation_timeout: int = QA_TIMEOUT, enable_reflexion: bool = False, bypass_acl: bool = False, + use_citations: bool = False, retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, @@ -348,6 +456,7 @@ def get_search_answer( max_history_tokens=max_history_tokens, db_session=db_session, bypass_acl=bypass_acl, + use_citations=use_citations, timeout=answer_generation_timeout, retrieval_metrics_callback=retrieval_metrics_callback, rerank_metrics_callback=rerank_metrics_callback, @@ -366,6 +475,11 @@ def get_search_answer( qa_response.llm_chunks_indices = packet.relevant_chunk_indices elif isinstance(packet, DanswerQuotes): qa_response.quotes = packet + elif isinstance(packet, CitationInfo): + if qa_response.citations: + qa_response.citations.append(packet) + else: + qa_response.citations = [packet] elif isinstance(packet, DanswerContexts): qa_response.contexts = packet elif isinstance(packet, StreamingError): @@ -384,4 +498,10 @@ def get_search_answer( else: qa_response.answer_valid = True + if use_citations and qa_response.answer and qa_response.citations: + # Reorganize citation nums to be in the same order as the answer + qa_response.answer, qa_response.citations = reorganize_citations( + qa_response.answer, qa_response.citations + ) + return qa_response diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index 71f14ff7f..0fefc5a7b 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -4,6 +4,7 @@ from pydantic import BaseModel from pydantic import Field from pydantic import root_validator +from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes from danswer.chat.models import QADocsResponse @@ -51,6 +52,7 @@ class OneShotQAResponse(BaseModel): answer: str | None = None rephrase: str | None = None quotes: DanswerQuotes | None = None + citations: list[CitationInfo] | None = None docs: QADocsResponse | None = None llm_chunks_indices: list[int] | None = None error_msg: str | None = None diff --git a/backend/danswer/one_shot_answer/qa_block.py b/backend/danswer/one_shot_answer/qa_block.py index c7b702d26..68cb6e4a8 100644 --- a/backend/danswer/one_shot_answer/qa_block.py +++ b/backend/danswer/one_shot_answer/qa_block.py @@ -4,7 +4,6 @@ from collections.abc import Callable from collections.abc import Iterator from typing import cast -from danswer.chat.chat_utils import build_complete_context_str from danswer.chat.models import AnswerQuestionStreamReturn from danswer.chat.models import DanswerAnswer from danswer.chat.models import DanswerAnswerPiece @@ -33,6 +32,7 @@ from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTE from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT +from danswer.prompts.prompt_utils import build_complete_context_str from danswer.utils.logger import setup_logger from danswer.utils.text_processing import clean_up_code_blocks from danswer.utils.text_processing import escape_newlines diff --git a/backend/danswer/prompts/chat_prompts.py b/backend/danswer/prompts/chat_prompts.py index d83970a37..ec6963948 100644 --- a/backend/danswer/prompts/chat_prompts.py +++ b/backend/danswer/prompts/chat_prompts.py @@ -17,8 +17,6 @@ Remember to provide inline citations in the format [1], [2], [3], etc. ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}." -DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant." - CHAT_USER_PROMPT = f""" Refer to the following context documents when responding to me.{{optional_ignore_statement}} CONTEXT: diff --git a/backend/danswer/prompts/constants.py b/backend/danswer/prompts/constants.py index 5fb9dbf84..d4865b820 100644 --- a/backend/danswer/prompts/constants.py +++ b/backend/danswer/prompts/constants.py @@ -12,3 +12,18 @@ QUOTE_PAT = "Quote:" QUOTES_PAT_PLURAL = "Quotes:" INVALID_PAT = "Invalid:" SOURCES_KEY = "sources" + +DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant." + +REQUIRE_CITATION_STATEMENT = """ +Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \ +DO NOT provide a reference section at the end and DO NOT provide any links following the citations. +""".rstrip() + +NO_CITATION_STATEMENT = """ +Do not provide any citations even if there are examples in the chat history. +""".rstrip() + +CITATION_REMINDER = """ +Remember to provide inline citations in the format [1], [2], [3], etc. +""" diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index ddfdf2e08..6028ed896 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -2,6 +2,7 @@ # It is used also for the one shot direct QA flow import json +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.constants import FINAL_QUERY_PAT from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.prompts.constants import QUESTION_PAT @@ -96,6 +97,22 @@ SAMPLE RESPONSE: """.strip() +# similar to the chat flow, but with the option of including a +# "conversation history" block +CITATIONS_PROMPT = f""" +Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} +CONTEXT: +{GENERAL_SEP_PAT} +{{context_docs_str}} +{GENERAL_SEP_PAT} + +{{history_block}}{{task_prompt}} + +{QUESTION_PAT.upper()} +{{user_query}} +""" + + # For weak LLM which only takes one chunk and cannot output json # Also not requiring quotes as it tends to not work WEAK_LLM_PROMPT = f""" diff --git a/backend/danswer/prompts/prompt_utils.py b/backend/danswer/prompts/prompt_utils.py index 4c0de783f..dcc7c6f0f 100644 --- a/backend/danswer/prompts/prompt_utils.py +++ b/backend/danswer/prompts/prompt_utils.py @@ -1,5 +1,15 @@ +from collections.abc import Sequence from datetime import datetime +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.configs.constants import DocumentSource +from danswer.db.models import Prompt +from danswer.indexing.models import InferenceChunk +from danswer.prompts.chat_prompts import CITATION_REMINDER +from danswer.prompts.constants import CODE_BLOCK_PAT +from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT + def get_current_llm_day_time() -> str: current_datetime = datetime.now() @@ -7,3 +17,78 @@ def get_current_llm_day_time() -> str: formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M") day_of_week = current_datetime.strftime("%A") return f"The current day and time is {day_of_week} {formatted_datetime}" + + +def build_task_prompt_reminders( + prompt: Prompt, + use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), + citation_str: str = CITATION_REMINDER, + language_hint_str: str = LANGUAGE_HINT, +) -> str: + base_task = prompt.task_prompt + citation_or_nothing = citation_str if prompt.include_citations else "" + language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else "" + return base_task + citation_or_nothing + language_hint_or_nothing + + +# Maps connector enum string to a more natural language representation for the LLM +# If not on the list, uses the original but slightly cleaned up, see below +CONNECTOR_NAME_MAP = { + "web": "Website", + "requesttracker": "Request Tracker", + "github": "GitHub", + "file": "File Upload", +} + + +def clean_up_source(source_str: str) -> str: + if source_str in CONNECTOR_NAME_MAP: + return CONNECTOR_NAME_MAP[source_str] + return source_str.replace("_", " ").title() + + +def build_doc_context_str( + semantic_identifier: str, + source_type: DocumentSource, + content: str, + metadata_dict: dict[str, str | list[str]], + updated_at: datetime | None, + ind: int, + include_metadata: bool = True, +) -> str: + context_str = "" + if include_metadata: + context_str += f"DOCUMENT {ind}: {semantic_identifier}\n" + context_str += f"Source: {clean_up_source(source_type)}\n" + + for k, v in metadata_dict.items(): + if isinstance(v, list): + v_str = ", ".join(v) + context_str += f"{k.capitalize()}: {v_str}\n" + else: + context_str += f"{k.capitalize()}: {v}\n" + + if updated_at: + update_str = updated_at.strftime("%B %d, %Y %H:%M") + context_str += f"Updated: {update_str}\n" + context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n" + return context_str + + +def build_complete_context_str( + context_docs: Sequence[LlmDoc | InferenceChunk], + include_metadata: bool = True, +) -> str: + context_str = "" + for ind, doc in enumerate(context_docs, start=1): + context_str += build_doc_context_str( + semantic_identifier=doc.semantic_identifier, + source_type=doc.source_type, + content=doc.content, + metadata_dict=doc.metadata, + updated_at=doc.updated_at, + ind=ind, + include_metadata=include_metadata, + ) + + return context_str.strip() diff --git a/backend/danswer/prompts/token_counts.py b/backend/danswer/prompts/token_counts.py index 1cf0f80e5..2018ebaa7 100644 --- a/backend/danswer/prompts/token_counts.py +++ b/backend/danswer/prompts/token_counts.py @@ -2,8 +2,8 @@ from danswer.llm.utils import check_number_of_tokens from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import CHAT_USER_PROMPT from danswer.prompts.chat_prompts import CITATION_REMINDER -from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT from danswer.prompts.prompt_utils import get_current_llm_day_time diff --git a/backend/danswer/server/documents/document.py b/backend/danswer/server/documents/document.py index a0ba40254..ea080b033 100644 --- a/backend/danswer/server/documents/document.py +++ b/backend/danswer/server/documents/document.py @@ -5,12 +5,12 @@ from fastapi import Query from sqlalchemy.orm import Session from danswer.auth.users import current_user -from danswer.chat.chat_utils import build_doc_context_str from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session from danswer.db.models import User from danswer.document_index.factory import get_default_document_index from danswer.llm.utils import get_default_llm_token_encode +from danswer.prompts.prompt_utils import build_doc_context_str from danswer.search.access_filters import build_access_filters_for_user from danswer.search.models import IndexFilters from danswer.server.documents.models import ChunkInfo diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index a22060511..a2ea4c7ab 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -9,6 +9,8 @@ from danswer.configs.constants import AuthType from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS from danswer.db.models import AllowedAnswerFilters from danswer.db.models import ChannelConfig +from danswer.db.models import SlackBotConfig as SlackBotConfigModel +from danswer.db.models import SlackBotResponseType from danswer.server.features.persona.models import PersonaSnapshot @@ -81,6 +83,7 @@ class SlackBotConfigCreationRequest(BaseModel): answer_filters: list[AllowedAnswerFilters] = [] # list of user emails follow_up_tags: list[str] | None = None + response_type: SlackBotResponseType @validator("answer_filters", pre=True) def validate_filters(cls, value: list[str]) -> list[str]: @@ -104,6 +107,22 @@ class SlackBotConfig(BaseModel): id: int persona: PersonaSnapshot | None channel_config: ChannelConfig + response_type: SlackBotResponseType + + @classmethod + def from_model( + cls, slack_bot_config_model: SlackBotConfigModel + ) -> "SlackBotConfig": + return cls( + id=slack_bot_config_model.id, + persona=( + PersonaSnapshot.from_model(slack_bot_config_model.persona) + if slack_bot_config_model.persona + else None + ), + channel_config=slack_bot_config_model.channel_config, + response_type=slack_bot_config_model.response_type, + ) class ModelVersionResponse(BaseModel): diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py index 9720f1f5a..2ea59a631 100644 --- a/backend/danswer/server/manage/slack_bot.py +++ b/backend/danswer/server/manage/slack_bot.py @@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config from danswer.db.slack_bot_config import remove_slack_bot_config from danswer.db.slack_bot_config import update_slack_bot_config from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.manage.models import SlackBotConfig from danswer.server.manage.models import SlackBotConfigCreationRequest from danswer.server.manage.models import SlackBotTokens @@ -108,17 +107,10 @@ def create_slack_bot_config( slack_bot_config_model = insert_slack_bot_config( persona_id=persona_id, channel_config=channel_config, + response_type=slack_bot_config_creation_request.response_type, db_session=db_session, ) - return SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + return SlackBotConfig.from_model(slack_bot_config_model) @router.patch("/admin/slack-bot/config/{slack_bot_config_id}") @@ -170,17 +162,10 @@ def patch_slack_bot_config( slack_bot_config_id=slack_bot_config_id, persona_id=persona_id, channel_config=channel_config, + response_type=slack_bot_config_creation_request.response_type, db_session=db_session, ) - return SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + return SlackBotConfig.from_model(slack_bot_config_model) @router.delete("/admin/slack-bot/config/{slack_bot_config_id}") @@ -201,15 +186,7 @@ def list_slack_bot_configs( ) -> list[SlackBotConfig]: slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session) return [ - SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + SlackBotConfig.from_model(slack_bot_config_model) for slack_bot_config_model in slack_bot_config_models ] diff --git a/backend/slackbot_images/Confluence.png b/backend/slackbot_images/Confluence.png new file mode 100644 index 000000000..b201fb616 Binary files /dev/null and b/backend/slackbot_images/Confluence.png differ diff --git a/backend/slackbot_images/File.png b/backend/slackbot_images/File.png new file mode 100644 index 000000000..563d74939 Binary files /dev/null and b/backend/slackbot_images/File.png differ diff --git a/backend/slackbot_images/Guru.png b/backend/slackbot_images/Guru.png new file mode 100644 index 000000000..adfa459d6 Binary files /dev/null and b/backend/slackbot_images/Guru.png differ diff --git a/backend/slackbot_images/Jira.png b/backend/slackbot_images/Jira.png new file mode 100644 index 000000000..2056e6b93 Binary files /dev/null and b/backend/slackbot_images/Jira.png differ diff --git a/backend/slackbot_images/README.md b/backend/slackbot_images/README.md new file mode 100644 index 000000000..bb527d676 --- /dev/null +++ b/backend/slackbot_images/README.md @@ -0,0 +1,3 @@ + +This folder contains images needed by the Danswer Slack Bot. When possible, we use the images +within `web/public`, but sometimes those images do not work for the Slack Bot. diff --git a/backend/slackbot_images/Web.png b/backend/slackbot_images/Web.png new file mode 100644 index 000000000..33320416b Binary files /dev/null and b/backend/slackbot_images/Web.png differ diff --git a/backend/slackbot_images/Zendesk.png b/backend/slackbot_images/Zendesk.png new file mode 100644 index 000000000..689c14264 Binary files /dev/null and b/backend/slackbot_images/Zendesk.png differ diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx index 06efc724a..e7ee46151 100644 --- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx +++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx @@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({ !isPersonaASlackBotPersona(existingSlackBotConfig.persona) ? existingSlackBotConfig.persona.id : null, + response_type: existingSlackBotConfig?.response_type || "citations", }} validationSchema={Yup.object().shape({ channel_names: Yup.array().of(Yup.string()), + response_type: Yup.string() + .oneOf(["quotes", "citations"]) + .required(), answer_validity_check_enabled: Yup.boolean().required(), questionmark_prefilter_enabled: Yup.boolean().required(), respond_tag_only: Yup.boolean().required(), @@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({ } /> + + + If set to Citations, DanswerBot will respond with a direct + answer with inline citations. It will also provide links + to these cited documents below the answer. When in doubt, + choose this option. +
+
+ If set to Quotes, DanswerBot will respond with a direct + answer as well as with quotes pulled from the context + documents to support that answer. DanswerBot will also + give a list of relevant documents. Choose this option if + you want a very detailed response AND/OR a list of + relevant documents would be useful just in case the LLM + missed anything. + + } + options={[ + { name: "Citations", value: "citations" }, + { name: "Quotes", value: "quotes" }, + ]} + /> + When should DanswerBot respond? diff --git a/web/src/app/admin/bot/lib.ts b/web/src/app/admin/bot/lib.ts index 3c0db3274..2da96c4ef 100644 --- a/web/src/app/admin/bot/lib.ts +++ b/web/src/app/admin/bot/lib.ts @@ -1,4 +1,8 @@ -import { ChannelConfig, SlackBotTokens } from "@/lib/types"; +import { + ChannelConfig, + SlackBotResponseType, + SlackBotTokens, +} from "@/lib/types"; import { Persona } from "../personas/interfaces"; interface SlackBotConfigCreationRequest { @@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest { respond_team_member_list: string[]; follow_up_tags?: string[]; usePersona: boolean; + response_type: SlackBotResponseType; } const buildFiltersFromCreationRequest = ( @@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = ( ...(creationRequest.usePersona ? { persona_id: creationRequest.persona_id } : { document_sets: creationRequest.document_sets }), + response_type: creationRequest.response_type, }); }; diff --git a/web/src/components/admin/connectors/Field.tsx b/web/src/components/admin/connectors/Field.tsx index 21ddec9fb..10816bd50 100644 --- a/web/src/components/admin/connectors/Field.tsx +++ b/web/src/components/admin/connectors/Field.tsx @@ -231,7 +231,7 @@ interface SelectorFormFieldProps { name: string; label?: string; options: StringOrNumberOption[]; - subtext?: string; + subtext?: string | JSX.Element; includeDefault?: boolean; } diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 18b9e336f..44b5a4a95 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -370,10 +370,13 @@ export interface ChannelConfig { follow_up_tags?: string[]; } +export type SlackBotResponseType = "quotes" | "citations"; + export interface SlackBotConfig { id: number; persona: Persona | null; channel_config: ChannelConfig; + response_type: SlackBotResponseType; } export interface SlackBotTokens {