diff --git a/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py
new file mode 100644
index 000000000..6c2dad18f
--- /dev/null
+++ b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py
@@ -0,0 +1,39 @@
+"""Add slack bot display type
+
+Revision ID: fcd135795f21
+Revises: 0a2b51deb0b8
+Create Date: 2024-03-04 17:03:27.116284
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "fcd135795f21"
+down_revision = "0a2b51deb0b8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.add_column(
+ "slack_bot_config",
+ sa.Column(
+ "response_type",
+ sa.Enum(
+ "QUOTES",
+ "CITATIONS",
+ name="slackbotresponsetype",
+ native_enum=False,
+ ),
+ nullable=True,
+ ),
+ )
+ op.execute(
+ "UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
+ )
+ op.alter_column("slack_bot_config", "response_type", nullable=False)
+
+
+def downgrade() -> None:
+ op.drop_column("slack_bot_config", "response_type")
diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py
index b839ea3bf..fe97b0b39 100644
--- a/backend/danswer/chat/chat_utils.py
+++ b/backend/danswer/chat/chat_utils.py
@@ -1,7 +1,7 @@
import re
from collections.abc import Callable
from collections.abc import Iterator
-from datetime import datetime
+from collections.abc import Sequence
from functools import lru_cache
from typing import cast
@@ -16,7 +16,6 @@ from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.chat_configs import STOP_STREAM_PAT
-from danswer.configs.constants import DocumentSource
from danswer.configs.constants import IGNORE_FOR_QA
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
@@ -34,13 +33,12 @@ from danswer.llm.utils import tokenizer_trim_content
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
-from danswer.prompts.chat_prompts import CITATION_REMINDER
-from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
-from danswer.prompts.constants import CODE_BLOCK_PAT
+from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.constants import TRIPLE_BACKTICK
-from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
+from danswer.prompts.prompt_utils import build_complete_context_str
+from danswer.prompts.prompt_utils import build_task_prompt_reminders
from danswer.prompts.prompt_utils import get_current_llm_day_time
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
from danswer.prompts.token_counts import (
@@ -53,68 +51,6 @@ from danswer.utils.logger import setup_logger
logger = setup_logger()
-# Maps connector enum string to a more natural language representation for the LLM
-# If not on the list, uses the original but slightly cleaned up, see below
-CONNECTOR_NAME_MAP = {
- "web": "Website",
- "requesttracker": "Request Tracker",
- "github": "GitHub",
- "file": "File Upload",
-}
-
-
-def clean_up_source(source_str: str) -> str:
- if source_str in CONNECTOR_NAME_MAP:
- return CONNECTOR_NAME_MAP[source_str]
- return source_str.replace("_", " ").title()
-
-
-def build_doc_context_str(
- semantic_identifier: str,
- source_type: DocumentSource,
- content: str,
- metadata_dict: dict[str, str | list[str]],
- updated_at: datetime | None,
- ind: int,
- include_metadata: bool = True,
-) -> str:
- context_str = ""
- if include_metadata:
- context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
- context_str += f"Source: {clean_up_source(source_type)}\n"
-
- for k, v in metadata_dict.items():
- if isinstance(v, list):
- v_str = ", ".join(v)
- context_str += f"{k.capitalize()}: {v_str}\n"
- else:
- context_str += f"{k.capitalize()}: {v}\n"
-
- if updated_at:
- update_str = updated_at.strftime("%B %d, %Y %H:%M")
- context_str += f"Updated: {update_str}\n"
- context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
- return context_str
-
-
-def build_complete_context_str(
- context_docs: list[LlmDoc | InferenceChunk],
- include_metadata: bool = True,
-) -> str:
- context_str = ""
- for ind, doc in enumerate(context_docs, start=1):
- context_str += build_doc_context_str(
- semantic_identifier=doc.semantic_identifier,
- source_type=doc.source_type,
- content=doc.content,
- metadata_dict=doc.metadata,
- updated_at=doc.updated_at,
- ind=ind,
- include_metadata=include_metadata,
- )
-
- return context_str.strip()
-
@lru_cache()
def build_chat_system_message(
@@ -147,18 +83,6 @@ def build_chat_system_message(
return system_msg, token_count
-def build_task_prompt_reminders(
- prompt: Prompt,
- use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
- citation_str: str = CITATION_REMINDER,
- language_hint_str: str = LANGUAGE_HINT,
-) -> str:
- base_task = prompt.task_prompt
- citation_or_nothing = citation_str if prompt.include_citations else ""
- language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
- return base_task + citation_or_nothing + language_hint_or_nothing
-
-
def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
return LlmDoc(
document_id=inf_chunk.document_id,
@@ -172,7 +96,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
def map_document_id_order(
- chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True
+ chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
) -> dict[str, int]:
order_mapping = {}
current = 1 if one_indexed else 0
@@ -568,6 +492,63 @@ def extract_citations_from_stream(
yield DanswerAnswerPiece(answer_piece=curr_segment)
+def reorganize_citations(
+ answer: str, citations: list[CitationInfo]
+) -> tuple[str, list[CitationInfo]]:
+ """For a complete, citation-aware response, we want to reorganize the citations so that
+ they are in the order of the documents that were used in the response. This just looks nicer / avoids
+ confusion ("Why is there [7] when only 2 documents are cited?")."""
+
+ # Regular expression to find all instances of [[x]](LINK)
+ pattern = r"\[\[(.*?)\]\]\((.*?)\)"
+
+ all_citation_matches = re.findall(pattern, answer)
+
+ new_citation_info: dict[int, CitationInfo] = {}
+ for citation_match in all_citation_matches:
+ try:
+ citation_num = int(citation_match[0])
+ if citation_num in new_citation_info:
+ continue
+
+ matching_citation = next(
+ iter([c for c in citations if c.citation_num == int(citation_num)]),
+ None,
+ )
+ if matching_citation is None:
+ continue
+
+ new_citation_info[citation_num] = CitationInfo(
+ citation_num=len(new_citation_info) + 1,
+ document_id=matching_citation.document_id,
+ )
+ except Exception:
+ pass
+
+ # Function to replace citations with their new number
+ def slack_link_format(match: re.Match) -> str:
+ link_text = match.group(1)
+ try:
+ citation_num = int(link_text)
+ if citation_num in new_citation_info:
+ link_text = new_citation_info[citation_num].citation_num
+ except Exception:
+ pass
+
+ link_url = match.group(2)
+ return f"[[{link_text}]]({link_url})"
+
+ # Substitute all matches in the input text
+ new_answer = re.sub(pattern, slack_link_format, answer)
+
+ # if any citations weren't parsable, just add them back to be safe
+ for citation in citations:
+ if citation.citation_num not in new_citation_info:
+ new_citation_info[citation.citation_num] = citation
+
+ return new_answer, list(new_citation_info.values())
+
+
def get_prompt_tokens(prompt: Prompt) -> int:
# Note: currently custom prompts do not allow datetime aware, only default prompts
return (
diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py
index 479feb257..5ebf8ab15 100644
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
from danswer.chat.chat_utils import build_chat_system_message
from danswer.chat.chat_utils import build_chat_user_message
-from danswer.chat.chat_utils import build_doc_context_str
from danswer.chat.chat_utils import compute_max_document_tokens
from danswer.chat.chat_utils import compute_max_llm_input_tokens
from danswer.chat.chat_utils import create_chat_chain
@@ -51,6 +50,7 @@ from danswer.llm.utils import get_default_llm_version
from danswer.llm.utils import get_max_input_tokens
from danswer.llm.utils import tokenizer_trim_content
from danswer.llm.utils import translate_history_to_basemessages
+from danswer.prompts.prompt_utils import build_doc_context_str
from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails
from danswer.search.request_preprocessing import retrieval_preprocessing
diff --git a/backend/danswer/configs/danswerbot_configs.py b/backend/danswer/configs/danswerbot_configs.py
index 484ba144b..5935c9b99 100644
--- a/backend/danswer/configs/danswerbot_configs.py
+++ b/backend/danswer/configs/danswerbot_configs.py
@@ -52,6 +52,8 @@ ENABLE_DANSWERBOT_REFLEXION = (
)
# Currently not support chain of thought, probably will add back later
DANSWER_BOT_DISABLE_COT = True
+# if set, will default DanswerBot to use quotes and reference documents
+DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true"
# Maximum Questions Per Minute, Default Uncapped
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
diff --git a/backend/danswer/danswerbot/slack/blocks.py b/backend/danswer/danswerbot/slack/blocks.py
index 8851ecf45..0d26d78f3 100644
--- a/backend/danswer/danswerbot/slack/blocks.py
+++ b/backend/danswer/danswerbot/slack/blocks.py
@@ -1,15 +1,20 @@
+import re
from datetime import datetime
+from re import Match
import pytz
import timeago # type: ignore
from slack_sdk.models.blocks import ActionsBlock
from slack_sdk.models.blocks import Block
from slack_sdk.models.blocks import ButtonElement
+from slack_sdk.models.blocks import ContextBlock
from slack_sdk.models.blocks import DividerBlock
from slack_sdk.models.blocks import HeaderBlock
from slack_sdk.models.blocks import Option
from slack_sdk.models.blocks import RadioButtonsElement
from slack_sdk.models.blocks import SectionBlock
+from slack_sdk.models.blocks.basic_components import MarkdownTextObject
+from slack_sdk.models.blocks.block_elements import ImageElement
from danswer.chat.models import DanswerQuote
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
@@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
+from danswer.danswerbot.slack.icons import source_to_github_img_link
from danswer.danswerbot.slack.utils import build_feedback_id
from danswer.danswerbot.slack.utils import remove_slack_text_interactions
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
@@ -29,7 +35,35 @@ from danswer.search.models import SavedSearchDoc
from danswer.utils.text_processing import decode_escapes
from danswer.utils.text_processing import replace_whitespaces_w_space
-_MAX_BLURB_LEN = 75
+_MAX_BLURB_LEN = 45
+
+
+def _process_citations_for_slack(text: str) -> str:
+ """
+ Converts instances of [[x]](LINK) in the input text to Slack's link format .
+
+ Args:
+ - text (str): The input string containing markdown links.
+
+ Returns:
+ - str: The string with markdown links converted to Slack format.
+ """
+ # Regular expression to find all instances of [[x]](LINK)
+ pattern = r"\[\[(.*?)\]\]\((.*?)\)"
+
+ # Function to replace each found instance with Slack's format
+ def slack_link_format(match: Match) -> str:
+ link_text = match.group(1)
+ link_url = match.group(2)
+ return f"<{link_url}|[{link_text}]>"
+
+ # Substitute all matches in the input text
+ return re.sub(pattern, slack_link_format, text)
+
+
+def clean_markdown_link_text(text: str) -> str:
+ # Remove any newlines within the text
+ return text.replace("\n", " ").strip()
def build_qa_feedback_block(message_id: int) -> Block:
@@ -38,13 +72,12 @@ def build_qa_feedback_block(message_id: int) -> Block:
elements=[
ButtonElement(
action_id=LIKE_BLOCK_ACTION_ID,
- text="👍",
+ text="👍 Helpful",
style="primary",
),
ButtonElement(
action_id=DISLIKE_BLOCK_ACTION_ID,
- text="👎",
- style="danger",
+ text="👎 Not helpful",
),
],
)
@@ -164,6 +197,80 @@ def build_documents_blocks(
return section_blocks
+def build_sources_blocks(
+ cited_documents: list[tuple[int, SavedSearchDoc]],
+ num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
+) -> list[Block]:
+ if not cited_documents:
+ return [
+ SectionBlock(
+ text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔"
+ )
+ ]
+
+ seen_docs_identifiers = set()
+ section_blocks: list[Block] = [SectionBlock(text="*Sources:*")]
+ included_docs = 0
+ for citation_num, d in cited_documents:
+ if d.document_id in seen_docs_identifiers:
+ continue
+ seen_docs_identifiers.add(d.document_id)
+
+ doc_sem_id = d.semantic_identifier
+ if d.source_type == DocumentSource.SLACK.value:
+ # for legacy reasons, before the switch to how Slack semantic identifiers are constructed
+ if "#" not in doc_sem_id:
+ doc_sem_id = "#" + doc_sem_id
+
+ # this is needed to try and prevent the line from overflowing
+ # if it does overflow, the image gets placed above the title and it
+ # looks bad
+ doc_sem_id = (
+ doc_sem_id[:_MAX_BLURB_LEN] + "..."
+ if len(doc_sem_id) > _MAX_BLURB_LEN
+ else doc_sem_id
+ )
+
+ owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None
+ days_ago_str = (
+ timeago.format(d.updated_at, datetime.now(pytz.utc))
+ if d.updated_at
+ else None
+ )
+ final_metadata_str = " | ".join(
+ ([owner_str] if owner_str else [])
+ + ([days_ago_str] if days_ago_str else [])
+ )
+
+ document_title = clean_markdown_link_text(doc_sem_id)
+ img_link = source_to_github_img_link(d.source_type)
+
+ section_blocks.append(
+ ContextBlock(
+ elements=(
+ [
+ ImageElement(
+ image_url=img_link,
+ alt_text=f"{d.source_type.value} logo",
+ )
+ ]
+ if img_link
+ else []
+ )
+ + [
+ MarkdownTextObject(
+ text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
+ ),
+ ]
+ )
+ )
+
+ if included_docs >= num_docs_to_display:
+ break
+
+ return section_blocks
+
+
def build_quotes_block(
quotes: list[DanswerQuote],
) -> list[Block]:
@@ -214,6 +321,7 @@ def build_qa_response_blocks(
time_cutoff: datetime | None,
favor_recent: bool,
skip_quotes: bool = False,
+ process_message_for_citations: bool = False,
skip_ai_feedback: bool = False,
) -> list[Block]:
if DISABLE_GENERATIVE_AI:
@@ -221,8 +329,6 @@ def build_qa_response_blocks(
quotes_blocks: list[Block] = []
- ai_answer_header = HeaderBlock(text="AI Answer")
-
filter_block: Block | None = None
if time_cutoff or favor_recent or source_filters:
filter_text = "Filters: "
@@ -247,6 +353,8 @@ def build_qa_response_blocks(
)
else:
answer_processed = decode_escapes(remove_slack_text_interactions(answer))
+ if process_message_for_citations:
+ answer_processed = _process_citations_for_slack(answer_processed)
answer_block = SectionBlock(text=answer_processed)
if quotes:
quotes_blocks = build_quotes_block(quotes)
@@ -259,7 +367,7 @@ def build_qa_response_blocks(
)
]
- response_blocks: list[Block] = [ai_answer_header]
+ response_blocks: list[Block] = []
if filter_block is not None:
response_blocks.append(filter_block)
@@ -271,7 +379,6 @@ def build_qa_response_blocks(
if not skip_quotes:
response_blocks.extend(quotes_blocks)
- response_blocks.append(DividerBlock())
return response_blocks
diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py
index 8427a9e18..1e065dd1d 100644
--- a/backend/danswer/danswerbot/slack/handlers/handle_message.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py
@@ -9,6 +9,7 @@ from typing import TypeVar
from retry import retry
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
+from slack_sdk.models.blocks import DividerBlock
from sqlalchemy.orm import Session
from danswer.chat.chat_utils import compute_max_document_tokens
@@ -18,12 +19,14 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANS
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
+from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
from danswer.danswerbot.slack.blocks import build_documents_blocks
from danswer.danswerbot.slack.blocks import build_follow_up_block
from danswer.danswerbot.slack.blocks import build_qa_response_blocks
+from danswer.danswerbot.slack.blocks import build_sources_blocks
from danswer.danswerbot.slack.blocks import get_restate_blocks
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
from danswer.danswerbot.slack.models import SlackMessageInfo
@@ -35,6 +38,7 @@ from danswer.danswerbot.slack.utils import SlackRateLimiter
from danswer.danswerbot.slack.utils import update_emote_react
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import SlackBotConfig
+from danswer.db.models import SlackBotResponseType
from danswer.llm.utils import check_number_of_tokens
from danswer.llm.utils import get_default_llm_version
from danswer.llm.utils import get_max_input_tokens
@@ -137,6 +141,13 @@ def handle_message(
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
+ # figure out if we want to use citations or quotes
+ use_citations = (
+ not DANSWER_BOT_USE_QUOTES
+ if channel_config is None
+ else channel_config.response_type == SlackBotResponseType.CITATIONS
+ )
+
# List of user id to send message to, if None, send to everyone in channel
send_to: list[str] | None = None
respond_tag_only = False
@@ -259,6 +270,7 @@ def handle_message(
answer_generation_timeout=answer_generation_timeout,
enable_reflexion=reflexion,
bypass_acl=bypass_acl,
+ use_citations=use_citations,
)
if not answer.error_msg:
return answer
@@ -387,7 +399,10 @@ def handle_message(
source_filters=retrieval_info.applied_source_filters,
time_cutoff=retrieval_info.applied_time_cutoff,
favor_recent=retrieval_info.recency_bias_multiplier > 1,
- skip_quotes=persona is not None, # currently Personas don't support quotes
+ # currently Personas don't support quotes
+ # if citations are enabled, also don't use quotes
+ skip_quotes=persona is not None or use_citations,
+ process_message_for_citations=use_citations,
)
# Get the chunks fed to the LLM only, then fill with other docs
@@ -397,16 +412,33 @@ def handle_message(
doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
]
priority_ordered_docs = llm_docs + remaining_docs
- document_blocks = (
- build_documents_blocks(
+
+ document_blocks = []
+ citations_block = []
+ # if citations are enabled, only show cited documents
+ if use_citations:
+ citations = answer.citations or []
+ cited_docs = []
+ for citation in citations:
+ matching_doc = next(
+ (d for d in top_docs if d.document_id == citation.document_id),
+ None,
+ )
+ if matching_doc:
+ cited_docs.append((citation.citation_num, matching_doc))
+
+ cited_docs.sort()
+ citations_block = build_sources_blocks(cited_documents=cited_docs)
+ elif priority_ordered_docs:
+ document_blocks = build_documents_blocks(
documents=priority_ordered_docs,
message_id=answer.chat_message_id,
)
- if priority_ordered_docs
- else []
- )
+ document_blocks = [DividerBlock()] + document_blocks
- all_blocks = restate_question_block + answer_blocks + document_blocks
+ all_blocks = (
+ restate_question_block + answer_blocks + citations_block + document_blocks
+ )
if channel_conf and channel_conf.get("follow_up_tags") is not None:
all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id))
diff --git a/backend/danswer/danswerbot/slack/icons.py b/backend/danswer/danswerbot/slack/icons.py
new file mode 100644
index 000000000..d2e8ea917
--- /dev/null
+++ b/backend/danswer/danswerbot/slack/icons.py
@@ -0,0 +1,58 @@
+from danswer.configs.constants import DocumentSource
+
+
+def source_to_github_img_link(source: DocumentSource) -> str | None:
+ # TODO: store these images somewhere better
+ if source == DocumentSource.WEB.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png"
+ if source == DocumentSource.FILE.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
+ if source == DocumentSource.GOOGLE_SITES.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png"
+ if source == DocumentSource.SLACK.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png"
+ if source == DocumentSource.GMAIL.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png"
+ if source == DocumentSource.GOOGLE_DRIVE.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png"
+ if source == DocumentSource.GITHUB.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png"
+ if source == DocumentSource.GITLAB.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png"
+ if source == DocumentSource.CONFLUENCE.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png"
+ if source == DocumentSource.JIRA.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png"
+ if source == DocumentSource.NOTION.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png"
+ if source == DocumentSource.ZENDESK.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png"
+ if source == DocumentSource.GONG.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png"
+ if source == DocumentSource.LINEAR.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png"
+ if source == DocumentSource.PRODUCTBOARD.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp"
+ if source == DocumentSource.SLAB.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png"
+ if source == DocumentSource.ZULIP.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png"
+ if source == DocumentSource.GURU.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png"
+ if source == DocumentSource.HUBSPOT.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png"
+ if source == DocumentSource.DOCUMENT360.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png"
+ if source == DocumentSource.BOOKSTACK.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png"
+ if source == DocumentSource.LOOPIO.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png"
+ if source == DocumentSource.SHAREPOINT.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png"
+ if source == DocumentSource.REQUESTTRACKER.value:
+ # just use file icon for now
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
+ if source == DocumentSource.INGESTION_API.value:
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
+
+ return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py
index 753897e10..5d761dec0 100644
--- a/backend/danswer/danswerbot/slack/utils.py
+++ b/backend/danswer/danswerbot/slack/utils.py
@@ -346,8 +346,12 @@ def read_slack_thread(
if len(blocks) <= 1:
continue
- # The useful block is the second one after the header block that says AI Answer
- message = reply["blocks"][1]["text"]["text"]
+ # For the old flow, the useful block is the second one after the header block that says AI Answer
+ if reply["blocks"][0]["text"]["text"] == "AI Answer":
+ message = reply["blocks"][1]["text"]["text"]
+ else:
+ # for the new flow, the answer is the first block
+ message = reply["blocks"][0]["text"]["text"]
if message.startswith("_Filters"):
if len(blocks) <= 2:
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index 98430fb23..5ca3bdbe9 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -811,6 +811,11 @@ class ChannelConfig(TypedDict):
follow_up_tags: NotRequired[list[str]]
+class SlackBotResponseType(str, PyEnum):
+ QUOTES = "quotes"
+ CITATIONS = "citations"
+
+
class SlackBotConfig(Base):
__tablename__ = "slack_bot_config"
@@ -822,6 +827,9 @@ class SlackBotConfig(Base):
channel_config: Mapped[ChannelConfig] = mapped_column(
postgresql.JSONB(), nullable=False
)
+ response_type: Mapped[SlackBotResponseType] = mapped_column(
+ Enum(SlackBotResponseType, native_enum=False), nullable=False
+ )
persona: Mapped[Persona | None] = relationship("Persona")
diff --git a/backend/danswer/db/slack_bot_config.py b/backend/danswer/db/slack_bot_config.py
index 82ed77e3f..f2aeae7b3 100644
--- a/backend/danswer/db/slack_bot_config.py
+++ b/backend/danswer/db/slack_bot_config.py
@@ -11,6 +11,7 @@ from danswer.db.models import ChannelConfig
from danswer.db.models import Persona
from danswer.db.models import Persona__DocumentSet
from danswer.db.models import SlackBotConfig
+from danswer.db.models import SlackBotResponseType
from danswer.search.models import RecencyBiasSetting
@@ -72,11 +73,13 @@ def create_slack_bot_persona(
def insert_slack_bot_config(
persona_id: int | None,
channel_config: ChannelConfig,
+ response_type: SlackBotResponseType,
db_session: Session,
) -> SlackBotConfig:
slack_bot_config = SlackBotConfig(
persona_id=persona_id,
channel_config=channel_config,
+ response_type=response_type,
)
db_session.add(slack_bot_config)
db_session.commit()
@@ -88,6 +91,7 @@ def update_slack_bot_config(
slack_bot_config_id: int,
persona_id: int | None,
channel_config: ChannelConfig,
+ response_type: SlackBotResponseType,
db_session: Session,
) -> SlackBotConfig:
slack_bot_config = db_session.scalar(
@@ -105,6 +109,7 @@ def update_slack_bot_config(
# will encounter `violates foreign key constraint` errors
slack_bot_config.persona_id = persona_id
slack_bot_config.channel_config = channel_config
+ slack_bot_config.response_type = response_type
# if the persona has changed, then clean up the old persona
if persona_id != existing_persona_id and existing_persona_id:
diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py
index 03292eec1..4f4a931ae 100644
--- a/backend/danswer/one_shot_answer/answer_question.py
+++ b/backend/danswer/one_shot_answer/answer_question.py
@@ -3,10 +3,18 @@ from collections.abc import Callable
from collections.abc import Iterator
from typing import cast
+from langchain.schema.messages import BaseMessage
+from langchain.schema.messages import HumanMessage
from sqlalchemy.orm import Session
+from danswer.chat.chat_utils import build_chat_system_message
from danswer.chat.chat_utils import compute_max_document_tokens
+from danswer.chat.chat_utils import extract_citations_from_stream
from danswer.chat.chat_utils import get_chunks_for_qa
+from danswer.chat.chat_utils import llm_doc_from_inference_chunk
+from danswer.chat.chat_utils import map_document_id_order
+from danswer.chat.chat_utils import reorganize_citations
+from danswer.chat.models import CitationInfo
from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import DanswerContext
from danswer.chat.models import DanswerContexts
@@ -26,16 +34,23 @@ from danswer.db.chat import get_persona_by_id
from danswer.db.chat import get_prompt_by_id
from danswer.db.chat import translate_db_message_to_chat_message_detail
from danswer.db.embedding_model import get_current_db_embedding_model
+from danswer.db.models import Prompt
from danswer.db.models import User
from danswer.document_index.factory import get_default_document_index
from danswer.indexing.models import InferenceChunk
+from danswer.llm.factory import get_default_llm
from danswer.llm.utils import get_default_llm_token_encode
+from danswer.llm.utils import get_default_llm_tokenizer
from danswer.one_shot_answer.factory import get_question_answer_model
from danswer.one_shot_answer.models import DirectQARequest
from danswer.one_shot_answer.models import OneShotQAResponse
from danswer.one_shot_answer.models import QueryRephrase
+from danswer.one_shot_answer.models import ThreadMessage
from danswer.one_shot_answer.qa_block import no_gen_ai_response
from danswer.one_shot_answer.qa_utils import combine_message_thread
+from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
+from danswer.prompts.prompt_utils import build_complete_context_str
+from danswer.prompts.prompt_utils import build_task_prompt_reminders
from danswer.search.models import RerankMetricsContainer
from danswer.search.models import RetrievalMetricsContainer
from danswer.search.models import SavedSearchDoc
@@ -51,6 +66,118 @@ from danswer.utils.timing import log_generator_function_time
logger = setup_logger()
+AnswerObjectIterator = Iterator[
+ QueryRephrase
+ | QADocsResponse
+ | LLMRelevanceFilterResponse
+ | DanswerAnswerPiece
+ | DanswerQuotes
+ | DanswerContexts
+ | StreamingError
+ | ChatMessageDetail
+ | CitationInfo
+]
+
+
+def quote_based_qa(
+ prompt: Prompt,
+ query_message: ThreadMessage,
+ history_str: str,
+ context_chunks: list[InferenceChunk],
+ llm_override: str | None,
+ timeout: int,
+ use_chain_of_thought: bool,
+ return_contexts: bool,
+ llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
+) -> AnswerObjectIterator:
+ qa_model = get_question_answer_model(
+ prompt=prompt,
+ timeout=timeout,
+ chain_of_thought=use_chain_of_thought,
+ llm_version=llm_override,
+ )
+
+ full_prompt_str = (
+ qa_model.build_prompt(
+ query=query_message.message,
+ history_str=history_str,
+ context_chunks=context_chunks,
+ )
+ if qa_model is not None
+ else "Gen AI Disabled"
+ )
+
+ response_packets = (
+ qa_model.answer_question_stream(
+ prompt=full_prompt_str,
+ llm_context_docs=context_chunks,
+ metrics_callback=llm_metrics_callback,
+ )
+ if qa_model is not None
+ else no_gen_ai_response()
+ )
+
+ if qa_model is not None and return_contexts:
+ contexts = DanswerContexts(
+ contexts=[
+ DanswerContext(
+ content=context_chunk.content,
+ document_id=context_chunk.document_id,
+ semantic_identifier=context_chunk.semantic_identifier,
+ blurb=context_chunk.semantic_identifier,
+ )
+ for context_chunk in context_chunks
+ ]
+ )
+
+ response_packets = itertools.chain(response_packets, [contexts])
+
+ yield from response_packets
+
+
+def citation_based_qa(
+ prompt: Prompt,
+ query_message: ThreadMessage,
+ history_str: str,
+ context_chunks: list[InferenceChunk],
+ llm_override: str | None,
+ timeout: int,
+) -> AnswerObjectIterator:
+ llm_tokenizer = get_default_llm_tokenizer()
+
+ system_prompt_or_none, _ = build_chat_system_message(
+ prompt=prompt,
+ context_exists=True,
+ llm_tokenizer_encode_func=llm_tokenizer.encode,
+ )
+
+ task_prompt_with_reminder = build_task_prompt_reminders(prompt)
+
+ context_docs_str = build_complete_context_str(context_chunks)
+ user_message = HumanMessage(
+ content=CITATIONS_PROMPT.format(
+ task_prompt=task_prompt_with_reminder,
+ user_query=query_message.message,
+ history_block=history_str,
+ context_docs_str=context_docs_str,
+ )
+ )
+
+ llm = get_default_llm(
+ timeout=timeout,
+ gen_ai_model_version_override=llm_override,
+ )
+
+ llm_prompt: list[BaseMessage] = [user_message]
+ if system_prompt_or_none is not None:
+ llm_prompt = [system_prompt_or_none] + llm_prompt
+
+ llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in context_chunks]
+ doc_id_to_rank_map = map_document_id_order(llm_docs)
+
+ tokens = llm.stream(llm_prompt)
+ yield from extract_citations_from_stream(tokens, llm_docs, doc_id_to_rank_map)
+
def stream_answer_objects(
query_req: DirectQARequest,
@@ -66,20 +193,12 @@ def stream_answer_objects(
default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE,
timeout: int = QA_TIMEOUT,
bypass_acl: bool = False,
+ use_citations: bool = False,
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
| None = None,
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
-) -> Iterator[
- QueryRephrase
- | QADocsResponse
- | LLMRelevanceFilterResponse
- | DanswerAnswerPiece
- | DanswerQuotes
- | DanswerContexts
- | StreamingError
- | ChatMessageDetail
-]:
+) -> AnswerObjectIterator:
"""Streams in order:
1. [always] Retrieved documents, stops flow if nothing is found
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
@@ -216,63 +335,51 @@ def stream_answer_objects(
persona_id=query_req.persona_id, user_id=user_id, db_session=db_session
)
llm_override = persona.llm_model_version_override
-
- qa_model = get_question_answer_model(
- prompt=prompt,
- timeout=timeout,
- chain_of_thought=query_req.chain_of_thought,
- llm_version=llm_override,
- )
-
- full_prompt_str = (
- qa_model.build_prompt(
- query=query_msg.message, history_str=history_str, context_chunks=llm_chunks
- )
- if qa_model is not None
- else "Gen AI Disabled"
- )
+ if prompt is None:
+ if not chat_session.persona.prompts:
+ raise RuntimeError(
+ "Persona does not have any prompts - this should never happen"
+ )
+ prompt = chat_session.persona.prompts[0]
# Create the first User query message
new_user_message = create_new_chat_message(
chat_session_id=chat_session.id,
parent_message=root_message,
prompt_id=query_req.prompt_id,
- message=full_prompt_str,
- token_count=len(llm_tokenizer(full_prompt_str)),
+ message=query_msg.message,
+ token_count=len(llm_tokenizer(query_msg.message)),
message_type=MessageType.USER,
db_session=db_session,
commit=True,
)
- response_packets = (
- qa_model.answer_question_stream(
- prompt=full_prompt_str,
- llm_context_docs=llm_chunks,
- metrics_callback=llm_metrics_callback,
+ if use_citations:
+ qa_stream = citation_based_qa(
+ prompt=prompt,
+ query_message=query_msg,
+ history_str=history_str,
+ context_chunks=llm_chunks,
+ llm_override=llm_override,
+ timeout=timeout,
)
- if qa_model is not None
- else no_gen_ai_response()
- )
-
- if qa_model is not None and query_req.return_contexts:
- contexts = DanswerContexts(
- contexts=[
- DanswerContext(
- content=context_doc.content,
- document_id=context_doc.document_id,
- semantic_identifier=context_doc.semantic_identifier,
- blurb=context_doc.semantic_identifier,
- )
- for context_doc in llm_chunks
- ]
+ else:
+ qa_stream = quote_based_qa(
+ prompt=prompt,
+ query_message=query_msg,
+ history_str=history_str,
+ context_chunks=llm_chunks,
+ llm_override=llm_override,
+ timeout=timeout,
+ use_chain_of_thought=False,
+ return_contexts=False,
+ llm_metrics_callback=llm_metrics_callback,
)
- response_packets = itertools.chain(response_packets, [contexts])
-
# Capture outputs and errors
llm_output = ""
error: str | None = None
- for packet in response_packets:
+ for packet in qa_stream:
logger.debug(packet)
if isinstance(packet, DanswerAnswerPiece):
@@ -333,6 +440,7 @@ def get_search_answer(
answer_generation_timeout: int = QA_TIMEOUT,
enable_reflexion: bool = False,
bypass_acl: bool = False,
+ use_citations: bool = False,
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
| None = None,
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
@@ -348,6 +456,7 @@ def get_search_answer(
max_history_tokens=max_history_tokens,
db_session=db_session,
bypass_acl=bypass_acl,
+ use_citations=use_citations,
timeout=answer_generation_timeout,
retrieval_metrics_callback=retrieval_metrics_callback,
rerank_metrics_callback=rerank_metrics_callback,
@@ -366,6 +475,11 @@ def get_search_answer(
qa_response.llm_chunks_indices = packet.relevant_chunk_indices
elif isinstance(packet, DanswerQuotes):
qa_response.quotes = packet
+ elif isinstance(packet, CitationInfo):
+ if qa_response.citations:
+ qa_response.citations.append(packet)
+ else:
+ qa_response.citations = [packet]
elif isinstance(packet, DanswerContexts):
qa_response.contexts = packet
elif isinstance(packet, StreamingError):
@@ -384,4 +498,10 @@ def get_search_answer(
else:
qa_response.answer_valid = True
+ if use_citations and qa_response.answer and qa_response.citations:
+ # Reorganize citation nums to be in the same order as the answer
+ qa_response.answer, qa_response.citations = reorganize_citations(
+ qa_response.answer, qa_response.citations
+ )
+
return qa_response
diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py
index 71f14ff7f..0fefc5a7b 100644
--- a/backend/danswer/one_shot_answer/models.py
+++ b/backend/danswer/one_shot_answer/models.py
@@ -4,6 +4,7 @@ from pydantic import BaseModel
from pydantic import Field
from pydantic import root_validator
+from danswer.chat.models import CitationInfo
from danswer.chat.models import DanswerContexts
from danswer.chat.models import DanswerQuotes
from danswer.chat.models import QADocsResponse
@@ -51,6 +52,7 @@ class OneShotQAResponse(BaseModel):
answer: str | None = None
rephrase: str | None = None
quotes: DanswerQuotes | None = None
+ citations: list[CitationInfo] | None = None
docs: QADocsResponse | None = None
llm_chunks_indices: list[int] | None = None
error_msg: str | None = None
diff --git a/backend/danswer/one_shot_answer/qa_block.py b/backend/danswer/one_shot_answer/qa_block.py
index c7b702d26..68cb6e4a8 100644
--- a/backend/danswer/one_shot_answer/qa_block.py
+++ b/backend/danswer/one_shot_answer/qa_block.py
@@ -4,7 +4,6 @@ from collections.abc import Callable
from collections.abc import Iterator
from typing import cast
-from danswer.chat.chat_utils import build_complete_context_str
from danswer.chat.models import AnswerQuestionStreamReturn
from danswer.chat.models import DanswerAnswer
from danswer.chat.models import DanswerAnswerPiece
@@ -33,6 +32,7 @@ from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTE
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT
+from danswer.prompts.prompt_utils import build_complete_context_str
from danswer.utils.logger import setup_logger
from danswer.utils.text_processing import clean_up_code_blocks
from danswer.utils.text_processing import escape_newlines
diff --git a/backend/danswer/prompts/chat_prompts.py b/backend/danswer/prompts/chat_prompts.py
index d83970a37..ec6963948 100644
--- a/backend/danswer/prompts/chat_prompts.py
+++ b/backend/danswer/prompts/chat_prompts.py
@@ -17,8 +17,6 @@ Remember to provide inline citations in the format [1], [2], [3], etc.
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
-DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
-
CHAT_USER_PROMPT = f"""
Refer to the following context documents when responding to me.{{optional_ignore_statement}}
CONTEXT:
diff --git a/backend/danswer/prompts/constants.py b/backend/danswer/prompts/constants.py
index 5fb9dbf84..d4865b820 100644
--- a/backend/danswer/prompts/constants.py
+++ b/backend/danswer/prompts/constants.py
@@ -12,3 +12,18 @@ QUOTE_PAT = "Quote:"
QUOTES_PAT_PLURAL = "Quotes:"
INVALID_PAT = "Invalid:"
SOURCES_KEY = "sources"
+
+DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
+
+REQUIRE_CITATION_STATEMENT = """
+Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \
+DO NOT provide a reference section at the end and DO NOT provide any links following the citations.
+""".rstrip()
+
+NO_CITATION_STATEMENT = """
+Do not provide any citations even if there are examples in the chat history.
+""".rstrip()
+
+CITATION_REMINDER = """
+Remember to provide inline citations in the format [1], [2], [3], etc.
+"""
diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py
index ddfdf2e08..6028ed896 100644
--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@@ -2,6 +2,7 @@
# It is used also for the one shot direct QA flow
import json
+from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.constants import FINAL_QUERY_PAT
from danswer.prompts.constants import GENERAL_SEP_PAT
from danswer.prompts.constants import QUESTION_PAT
@@ -96,6 +97,22 @@ SAMPLE RESPONSE:
""".strip()
+# similar to the chat flow, but with the option of including a
+# "conversation history" block
+CITATIONS_PROMPT = f"""
+Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT}
+CONTEXT:
+{GENERAL_SEP_PAT}
+{{context_docs_str}}
+{GENERAL_SEP_PAT}
+
+{{history_block}}{{task_prompt}}
+
+{QUESTION_PAT.upper()}
+{{user_query}}
+"""
+
+
# For weak LLM which only takes one chunk and cannot output json
# Also not requiring quotes as it tends to not work
WEAK_LLM_PROMPT = f"""
diff --git a/backend/danswer/prompts/prompt_utils.py b/backend/danswer/prompts/prompt_utils.py
index 4c0de783f..dcc7c6f0f 100644
--- a/backend/danswer/prompts/prompt_utils.py
+++ b/backend/danswer/prompts/prompt_utils.py
@@ -1,5 +1,15 @@
+from collections.abc import Sequence
from datetime import datetime
+from danswer.chat.models import LlmDoc
+from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
+from danswer.configs.constants import DocumentSource
+from danswer.db.models import Prompt
+from danswer.indexing.models import InferenceChunk
+from danswer.prompts.chat_prompts import CITATION_REMINDER
+from danswer.prompts.constants import CODE_BLOCK_PAT
+from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
+
def get_current_llm_day_time() -> str:
current_datetime = datetime.now()
@@ -7,3 +17,78 @@ def get_current_llm_day_time() -> str:
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
day_of_week = current_datetime.strftime("%A")
return f"The current day and time is {day_of_week} {formatted_datetime}"
+
+
+def build_task_prompt_reminders(
+ prompt: Prompt,
+ use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
+ citation_str: str = CITATION_REMINDER,
+ language_hint_str: str = LANGUAGE_HINT,
+) -> str:
+ base_task = prompt.task_prompt
+ citation_or_nothing = citation_str if prompt.include_citations else ""
+ language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
+ return base_task + citation_or_nothing + language_hint_or_nothing
+
+
+# Maps connector enum string to a more natural language representation for the LLM
+# If not on the list, uses the original but slightly cleaned up, see below
+CONNECTOR_NAME_MAP = {
+ "web": "Website",
+ "requesttracker": "Request Tracker",
+ "github": "GitHub",
+ "file": "File Upload",
+}
+
+
+def clean_up_source(source_str: str) -> str:
+ if source_str in CONNECTOR_NAME_MAP:
+ return CONNECTOR_NAME_MAP[source_str]
+ return source_str.replace("_", " ").title()
+
+
+def build_doc_context_str(
+ semantic_identifier: str,
+ source_type: DocumentSource,
+ content: str,
+ metadata_dict: dict[str, str | list[str]],
+ updated_at: datetime | None,
+ ind: int,
+ include_metadata: bool = True,
+) -> str:
+ context_str = ""
+ if include_metadata:
+ context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
+ context_str += f"Source: {clean_up_source(source_type)}\n"
+
+ for k, v in metadata_dict.items():
+ if isinstance(v, list):
+ v_str = ", ".join(v)
+ context_str += f"{k.capitalize()}: {v_str}\n"
+ else:
+ context_str += f"{k.capitalize()}: {v}\n"
+
+ if updated_at:
+ update_str = updated_at.strftime("%B %d, %Y %H:%M")
+ context_str += f"Updated: {update_str}\n"
+ context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
+ return context_str
+
+
+def build_complete_context_str(
+ context_docs: Sequence[LlmDoc | InferenceChunk],
+ include_metadata: bool = True,
+) -> str:
+ context_str = ""
+ for ind, doc in enumerate(context_docs, start=1):
+ context_str += build_doc_context_str(
+ semantic_identifier=doc.semantic_identifier,
+ source_type=doc.source_type,
+ content=doc.content,
+ metadata_dict=doc.metadata,
+ updated_at=doc.updated_at,
+ ind=ind,
+ include_metadata=include_metadata,
+ )
+
+ return context_str.strip()
diff --git a/backend/danswer/prompts/token_counts.py b/backend/danswer/prompts/token_counts.py
index 1cf0f80e5..2018ebaa7 100644
--- a/backend/danswer/prompts/token_counts.py
+++ b/backend/danswer/prompts/token_counts.py
@@ -2,8 +2,8 @@ from danswer.llm.utils import check_number_of_tokens
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
from danswer.prompts.chat_prompts import CITATION_REMINDER
-from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
+from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.prompt_utils import get_current_llm_day_time
diff --git a/backend/danswer/server/documents/document.py b/backend/danswer/server/documents/document.py
index a0ba40254..ea080b033 100644
--- a/backend/danswer/server/documents/document.py
+++ b/backend/danswer/server/documents/document.py
@@ -5,12 +5,12 @@ from fastapi import Query
from sqlalchemy.orm import Session
from danswer.auth.users import current_user
-from danswer.chat.chat_utils import build_doc_context_str
from danswer.db.embedding_model import get_current_db_embedding_model
from danswer.db.engine import get_session
from danswer.db.models import User
from danswer.document_index.factory import get_default_document_index
from danswer.llm.utils import get_default_llm_token_encode
+from danswer.prompts.prompt_utils import build_doc_context_str
from danswer.search.access_filters import build_access_filters_for_user
from danswer.search.models import IndexFilters
from danswer.server.documents.models import ChunkInfo
diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py
index a22060511..a2ea4c7ab 100644
--- a/backend/danswer/server/manage/models.py
+++ b/backend/danswer/server/manage/models.py
@@ -9,6 +9,8 @@ from danswer.configs.constants import AuthType
from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
from danswer.db.models import AllowedAnswerFilters
from danswer.db.models import ChannelConfig
+from danswer.db.models import SlackBotConfig as SlackBotConfigModel
+from danswer.db.models import SlackBotResponseType
from danswer.server.features.persona.models import PersonaSnapshot
@@ -81,6 +83,7 @@ class SlackBotConfigCreationRequest(BaseModel):
answer_filters: list[AllowedAnswerFilters] = []
# list of user emails
follow_up_tags: list[str] | None = None
+ response_type: SlackBotResponseType
@validator("answer_filters", pre=True)
def validate_filters(cls, value: list[str]) -> list[str]:
@@ -104,6 +107,22 @@ class SlackBotConfig(BaseModel):
id: int
persona: PersonaSnapshot | None
channel_config: ChannelConfig
+ response_type: SlackBotResponseType
+
+ @classmethod
+ def from_model(
+ cls, slack_bot_config_model: SlackBotConfigModel
+ ) -> "SlackBotConfig":
+ return cls(
+ id=slack_bot_config_model.id,
+ persona=(
+ PersonaSnapshot.from_model(slack_bot_config_model.persona)
+ if slack_bot_config_model.persona
+ else None
+ ),
+ channel_config=slack_bot_config_model.channel_config,
+ response_type=slack_bot_config_model.response_type,
+ )
class ModelVersionResponse(BaseModel):
diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py
index 9720f1f5a..2ea59a631 100644
--- a/backend/danswer/server/manage/slack_bot.py
+++ b/backend/danswer/server/manage/slack_bot.py
@@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config
from danswer.db.slack_bot_config import remove_slack_bot_config
from danswer.db.slack_bot_config import update_slack_bot_config
from danswer.dynamic_configs.interface import ConfigNotFoundError
-from danswer.server.features.persona.models import PersonaSnapshot
from danswer.server.manage.models import SlackBotConfig
from danswer.server.manage.models import SlackBotConfigCreationRequest
from danswer.server.manage.models import SlackBotTokens
@@ -108,17 +107,10 @@ def create_slack_bot_config(
slack_bot_config_model = insert_slack_bot_config(
persona_id=persona_id,
channel_config=channel_config,
+ response_type=slack_bot_config_creation_request.response_type,
db_session=db_session,
)
- return SlackBotConfig(
- id=slack_bot_config_model.id,
- persona=(
- PersonaSnapshot.from_model(slack_bot_config_model.persona)
- if slack_bot_config_model.persona
- else None
- ),
- channel_config=slack_bot_config_model.channel_config,
- )
+ return SlackBotConfig.from_model(slack_bot_config_model)
@router.patch("/admin/slack-bot/config/{slack_bot_config_id}")
@@ -170,17 +162,10 @@ def patch_slack_bot_config(
slack_bot_config_id=slack_bot_config_id,
persona_id=persona_id,
channel_config=channel_config,
+ response_type=slack_bot_config_creation_request.response_type,
db_session=db_session,
)
- return SlackBotConfig(
- id=slack_bot_config_model.id,
- persona=(
- PersonaSnapshot.from_model(slack_bot_config_model.persona)
- if slack_bot_config_model.persona
- else None
- ),
- channel_config=slack_bot_config_model.channel_config,
- )
+ return SlackBotConfig.from_model(slack_bot_config_model)
@router.delete("/admin/slack-bot/config/{slack_bot_config_id}")
@@ -201,15 +186,7 @@ def list_slack_bot_configs(
) -> list[SlackBotConfig]:
slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session)
return [
- SlackBotConfig(
- id=slack_bot_config_model.id,
- persona=(
- PersonaSnapshot.from_model(slack_bot_config_model.persona)
- if slack_bot_config_model.persona
- else None
- ),
- channel_config=slack_bot_config_model.channel_config,
- )
+ SlackBotConfig.from_model(slack_bot_config_model)
for slack_bot_config_model in slack_bot_config_models
]
diff --git a/backend/slackbot_images/Confluence.png b/backend/slackbot_images/Confluence.png
new file mode 100644
index 000000000..b201fb616
Binary files /dev/null and b/backend/slackbot_images/Confluence.png differ
diff --git a/backend/slackbot_images/File.png b/backend/slackbot_images/File.png
new file mode 100644
index 000000000..563d74939
Binary files /dev/null and b/backend/slackbot_images/File.png differ
diff --git a/backend/slackbot_images/Guru.png b/backend/slackbot_images/Guru.png
new file mode 100644
index 000000000..adfa459d6
Binary files /dev/null and b/backend/slackbot_images/Guru.png differ
diff --git a/backend/slackbot_images/Jira.png b/backend/slackbot_images/Jira.png
new file mode 100644
index 000000000..2056e6b93
Binary files /dev/null and b/backend/slackbot_images/Jira.png differ
diff --git a/backend/slackbot_images/README.md b/backend/slackbot_images/README.md
new file mode 100644
index 000000000..bb527d676
--- /dev/null
+++ b/backend/slackbot_images/README.md
@@ -0,0 +1,3 @@
+
+This folder contains images needed by the Danswer Slack Bot. When possible, we use the images
+within `web/public`, but sometimes those images do not work for the Slack Bot.
diff --git a/backend/slackbot_images/Web.png b/backend/slackbot_images/Web.png
new file mode 100644
index 000000000..33320416b
Binary files /dev/null and b/backend/slackbot_images/Web.png differ
diff --git a/backend/slackbot_images/Zendesk.png b/backend/slackbot_images/Zendesk.png
new file mode 100644
index 000000000..689c14264
Binary files /dev/null and b/backend/slackbot_images/Zendesk.png differ
diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
index 06efc724a..e7ee46151 100644
--- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
+++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx
@@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({
!isPersonaASlackBotPersona(existingSlackBotConfig.persona)
? existingSlackBotConfig.persona.id
: null,
+ response_type: existingSlackBotConfig?.response_type || "citations",
}}
validationSchema={Yup.object().shape({
channel_names: Yup.array().of(Yup.string()),
+ response_type: Yup.string()
+ .oneOf(["quotes", "citations"])
+ .required(),
answer_validity_check_enabled: Yup.boolean().required(),
questionmark_prefilter_enabled: Yup.boolean().required(),
respond_tag_only: Yup.boolean().required(),
@@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({
}
/>
+
+
+ If set to Citations, DanswerBot will respond with a direct
+ answer with inline citations. It will also provide links
+ to these cited documents below the answer. When in doubt,
+ choose this option.
+
+
+ If set to Quotes, DanswerBot will respond with a direct
+ answer as well as with quotes pulled from the context
+ documents to support that answer. DanswerBot will also
+ give a list of relevant documents. Choose this option if
+ you want a very detailed response AND/OR a list of
+ relevant documents would be useful just in case the LLM
+ missed anything.
+ >
+ }
+ options={[
+ { name: "Citations", value: "citations" },
+ { name: "Quotes", value: "quotes" },
+ ]}
+ />
+
When should DanswerBot respond?
diff --git a/web/src/app/admin/bot/lib.ts b/web/src/app/admin/bot/lib.ts
index 3c0db3274..2da96c4ef 100644
--- a/web/src/app/admin/bot/lib.ts
+++ b/web/src/app/admin/bot/lib.ts
@@ -1,4 +1,8 @@
-import { ChannelConfig, SlackBotTokens } from "@/lib/types";
+import {
+ ChannelConfig,
+ SlackBotResponseType,
+ SlackBotTokens,
+} from "@/lib/types";
import { Persona } from "../personas/interfaces";
interface SlackBotConfigCreationRequest {
@@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest {
respond_team_member_list: string[];
follow_up_tags?: string[];
usePersona: boolean;
+ response_type: SlackBotResponseType;
}
const buildFiltersFromCreationRequest = (
@@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = (
...(creationRequest.usePersona
? { persona_id: creationRequest.persona_id }
: { document_sets: creationRequest.document_sets }),
+ response_type: creationRequest.response_type,
});
};
diff --git a/web/src/components/admin/connectors/Field.tsx b/web/src/components/admin/connectors/Field.tsx
index 21ddec9fb..10816bd50 100644
--- a/web/src/components/admin/connectors/Field.tsx
+++ b/web/src/components/admin/connectors/Field.tsx
@@ -231,7 +231,7 @@ interface SelectorFormFieldProps {
name: string;
label?: string;
options: StringOrNumberOption[];
- subtext?: string;
+ subtext?: string | JSX.Element;
includeDefault?: boolean;
}
diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts
index 18b9e336f..44b5a4a95 100644
--- a/web/src/lib/types.ts
+++ b/web/src/lib/types.ts
@@ -370,10 +370,13 @@ export interface ChannelConfig {
follow_up_tags?: string[];
}
+export type SlackBotResponseType = "quotes" | "citations";
+
export interface SlackBotConfig {
id: number;
persona: Persona | null;
channel_config: ChannelConfig;
+ response_type: SlackBotResponseType;
}
export interface SlackBotTokens {