mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-04 17:00:24 +02:00
Improve slack flow
This commit is contained in:
parent
0b0665044f
commit
7869f23e12
@ -0,0 +1,39 @@
|
||||
"""Add slack bot display type
|
||||
|
||||
Revision ID: fcd135795f21
|
||||
Revises: 0a2b51deb0b8
|
||||
Create Date: 2024-03-04 17:03:27.116284
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "fcd135795f21"
|
||||
down_revision = "0a2b51deb0b8"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"slack_bot_config",
|
||||
sa.Column(
|
||||
"response_type",
|
||||
sa.Enum(
|
||||
"QUOTES",
|
||||
"CITATIONS",
|
||||
name="slackbotresponsetype",
|
||||
native_enum=False,
|
||||
),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
op.execute(
|
||||
"UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
|
||||
)
|
||||
op.alter_column("slack_bot_config", "response_type", nullable=False)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("slack_bot_config", "response_type")
|
@ -1,7 +1,7 @@
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from collections.abc import Sequence
|
||||
from functools import lru_cache
|
||||
from typing import cast
|
||||
|
||||
@ -16,7 +16,6 @@ from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.chat_configs import STOP_STREAM_PAT
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import IGNORE_FOR_QA
|
||||
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||
from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
|
||||
@ -34,13 +33,12 @@ from danswer.llm.utils import tokenizer_trim_content
|
||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
|
||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT
|
||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||
from danswer.prompts.constants import CODE_BLOCK_PAT
|
||||
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.constants import TRIPLE_BACKTICK
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||
from danswer.prompts.prompt_utils import build_task_prompt_reminders
|
||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
|
||||
from danswer.prompts.token_counts import (
|
||||
@ -53,68 +51,6 @@ from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Maps connector enum string to a more natural language representation for the LLM
|
||||
# If not on the list, uses the original but slightly cleaned up, see below
|
||||
CONNECTOR_NAME_MAP = {
|
||||
"web": "Website",
|
||||
"requesttracker": "Request Tracker",
|
||||
"github": "GitHub",
|
||||
"file": "File Upload",
|
||||
}
|
||||
|
||||
|
||||
def clean_up_source(source_str: str) -> str:
|
||||
if source_str in CONNECTOR_NAME_MAP:
|
||||
return CONNECTOR_NAME_MAP[source_str]
|
||||
return source_str.replace("_", " ").title()
|
||||
|
||||
|
||||
def build_doc_context_str(
|
||||
semantic_identifier: str,
|
||||
source_type: DocumentSource,
|
||||
content: str,
|
||||
metadata_dict: dict[str, str | list[str]],
|
||||
updated_at: datetime | None,
|
||||
ind: int,
|
||||
include_metadata: bool = True,
|
||||
) -> str:
|
||||
context_str = ""
|
||||
if include_metadata:
|
||||
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
|
||||
context_str += f"Source: {clean_up_source(source_type)}\n"
|
||||
|
||||
for k, v in metadata_dict.items():
|
||||
if isinstance(v, list):
|
||||
v_str = ", ".join(v)
|
||||
context_str += f"{k.capitalize()}: {v_str}\n"
|
||||
else:
|
||||
context_str += f"{k.capitalize()}: {v}\n"
|
||||
|
||||
if updated_at:
|
||||
update_str = updated_at.strftime("%B %d, %Y %H:%M")
|
||||
context_str += f"Updated: {update_str}\n"
|
||||
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
|
||||
return context_str
|
||||
|
||||
|
||||
def build_complete_context_str(
|
||||
context_docs: list[LlmDoc | InferenceChunk],
|
||||
include_metadata: bool = True,
|
||||
) -> str:
|
||||
context_str = ""
|
||||
for ind, doc in enumerate(context_docs, start=1):
|
||||
context_str += build_doc_context_str(
|
||||
semantic_identifier=doc.semantic_identifier,
|
||||
source_type=doc.source_type,
|
||||
content=doc.content,
|
||||
metadata_dict=doc.metadata,
|
||||
updated_at=doc.updated_at,
|
||||
ind=ind,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
return context_str.strip()
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def build_chat_system_message(
|
||||
@ -147,18 +83,6 @@ def build_chat_system_message(
|
||||
return system_msg, token_count
|
||||
|
||||
|
||||
def build_task_prompt_reminders(
|
||||
prompt: Prompt,
|
||||
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
|
||||
citation_str: str = CITATION_REMINDER,
|
||||
language_hint_str: str = LANGUAGE_HINT,
|
||||
) -> str:
|
||||
base_task = prompt.task_prompt
|
||||
citation_or_nothing = citation_str if prompt.include_citations else ""
|
||||
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
|
||||
return base_task + citation_or_nothing + language_hint_or_nothing
|
||||
|
||||
|
||||
def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
|
||||
return LlmDoc(
|
||||
document_id=inf_chunk.document_id,
|
||||
@ -172,7 +96,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
|
||||
|
||||
|
||||
def map_document_id_order(
|
||||
chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True
|
||||
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
|
||||
) -> dict[str, int]:
|
||||
order_mapping = {}
|
||||
current = 1 if one_indexed else 0
|
||||
@ -568,6 +492,63 @@ def extract_citations_from_stream(
|
||||
yield DanswerAnswerPiece(answer_piece=curr_segment)
|
||||
|
||||
|
||||
def reorganize_citations(
|
||||
answer: str, citations: list[CitationInfo]
|
||||
) -> tuple[str, list[CitationInfo]]:
|
||||
"""For a complete, citation-aware response, we want to reorganize the citations so that
|
||||
they are in the order of the documents that were used in the response. This just looks nicer / avoids
|
||||
confusion ("Why is there [7] when only 2 documents are cited?")."""
|
||||
|
||||
# Regular expression to find all instances of [[x]](LINK)
|
||||
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
|
||||
|
||||
all_citation_matches = re.findall(pattern, answer)
|
||||
|
||||
new_citation_info: dict[int, CitationInfo] = {}
|
||||
for citation_match in all_citation_matches:
|
||||
try:
|
||||
citation_num = int(citation_match[0])
|
||||
if citation_num in new_citation_info:
|
||||
continue
|
||||
|
||||
matching_citation = next(
|
||||
iter([c for c in citations if c.citation_num == int(citation_num)]),
|
||||
None,
|
||||
)
|
||||
if matching_citation is None:
|
||||
continue
|
||||
|
||||
new_citation_info[citation_num] = CitationInfo(
|
||||
citation_num=len(new_citation_info) + 1,
|
||||
document_id=matching_citation.document_id,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Function to replace citations with their new number
|
||||
def slack_link_format(match: re.Match) -> str:
|
||||
link_text = match.group(1)
|
||||
try:
|
||||
citation_num = int(link_text)
|
||||
if citation_num in new_citation_info:
|
||||
link_text = new_citation_info[citation_num].citation_num
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
link_url = match.group(2)
|
||||
return f"[[{link_text}]]({link_url})"
|
||||
|
||||
# Substitute all matches in the input text
|
||||
new_answer = re.sub(pattern, slack_link_format, answer)
|
||||
|
||||
# if any citations weren't parsable, just add them back to be safe
|
||||
for citation in citations:
|
||||
if citation.citation_num not in new_citation_info:
|
||||
new_citation_info[citation.citation_num] = citation
|
||||
|
||||
return new_answer, list(new_citation_info.values())
|
||||
|
||||
|
||||
def get_prompt_tokens(prompt: Prompt) -> int:
|
||||
# Note: currently custom prompts do not allow datetime aware, only default prompts
|
||||
return (
|
||||
|
@ -7,7 +7,6 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.chat_utils import build_chat_system_message
|
||||
from danswer.chat.chat_utils import build_chat_user_message
|
||||
from danswer.chat.chat_utils import build_doc_context_str
|
||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||
from danswer.chat.chat_utils import compute_max_llm_input_tokens
|
||||
from danswer.chat.chat_utils import create_chat_chain
|
||||
@ -51,6 +50,7 @@ from danswer.llm.utils import get_default_llm_version
|
||||
from danswer.llm.utils import get_max_input_tokens
|
||||
from danswer.llm.utils import tokenizer_trim_content
|
||||
from danswer.llm.utils import translate_history_to_basemessages
|
||||
from danswer.prompts.prompt_utils import build_doc_context_str
|
||||
from danswer.search.models import OptionalSearchSetting
|
||||
from danswer.search.models import RetrievalDetails
|
||||
from danswer.search.request_preprocessing import retrieval_preprocessing
|
||||
|
@ -52,6 +52,8 @@ ENABLE_DANSWERBOT_REFLEXION = (
|
||||
)
|
||||
# Currently not support chain of thought, probably will add back later
|
||||
DANSWER_BOT_DISABLE_COT = True
|
||||
# if set, will default DanswerBot to use quotes and reference documents
|
||||
DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true"
|
||||
|
||||
# Maximum Questions Per Minute, Default Uncapped
|
||||
DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
|
||||
|
@ -1,15 +1,20 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from re import Match
|
||||
|
||||
import pytz
|
||||
import timeago # type: ignore
|
||||
from slack_sdk.models.blocks import ActionsBlock
|
||||
from slack_sdk.models.blocks import Block
|
||||
from slack_sdk.models.blocks import ButtonElement
|
||||
from slack_sdk.models.blocks import ContextBlock
|
||||
from slack_sdk.models.blocks import DividerBlock
|
||||
from slack_sdk.models.blocks import HeaderBlock
|
||||
from slack_sdk.models.blocks import Option
|
||||
from slack_sdk.models.blocks import RadioButtonsElement
|
||||
from slack_sdk.models.blocks import SectionBlock
|
||||
from slack_sdk.models.blocks.basic_components import MarkdownTextObject
|
||||
from slack_sdk.models.blocks.block_elements import ImageElement
|
||||
|
||||
from danswer.chat.models import DanswerQuote
|
||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||
@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
|
||||
from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
|
||||
from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
|
||||
from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
|
||||
from danswer.danswerbot.slack.icons import source_to_github_img_link
|
||||
from danswer.danswerbot.slack.utils import build_feedback_id
|
||||
from danswer.danswerbot.slack.utils import remove_slack_text_interactions
|
||||
from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack
|
||||
@ -29,7 +35,35 @@ from danswer.search.models import SavedSearchDoc
|
||||
from danswer.utils.text_processing import decode_escapes
|
||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||
|
||||
_MAX_BLURB_LEN = 75
|
||||
_MAX_BLURB_LEN = 45
|
||||
|
||||
|
||||
def _process_citations_for_slack(text: str) -> str:
|
||||
"""
|
||||
Converts instances of [[x]](LINK) in the input text to Slack's link format <LINK|[x]>.
|
||||
|
||||
Args:
|
||||
- text (str): The input string containing markdown links.
|
||||
|
||||
Returns:
|
||||
- str: The string with markdown links converted to Slack format.
|
||||
"""
|
||||
# Regular expression to find all instances of [[x]](LINK)
|
||||
pattern = r"\[\[(.*?)\]\]\((.*?)\)"
|
||||
|
||||
# Function to replace each found instance with Slack's format
|
||||
def slack_link_format(match: Match) -> str:
|
||||
link_text = match.group(1)
|
||||
link_url = match.group(2)
|
||||
return f"<{link_url}|[{link_text}]>"
|
||||
|
||||
# Substitute all matches in the input text
|
||||
return re.sub(pattern, slack_link_format, text)
|
||||
|
||||
|
||||
def clean_markdown_link_text(text: str) -> str:
|
||||
# Remove any newlines within the text
|
||||
return text.replace("\n", " ").strip()
|
||||
|
||||
|
||||
def build_qa_feedback_block(message_id: int) -> Block:
|
||||
@ -38,13 +72,12 @@ def build_qa_feedback_block(message_id: int) -> Block:
|
||||
elements=[
|
||||
ButtonElement(
|
||||
action_id=LIKE_BLOCK_ACTION_ID,
|
||||
text="👍",
|
||||
text="👍 Helpful",
|
||||
style="primary",
|
||||
),
|
||||
ButtonElement(
|
||||
action_id=DISLIKE_BLOCK_ACTION_ID,
|
||||
text="👎",
|
||||
style="danger",
|
||||
text="👎 Not helpful",
|
||||
),
|
||||
],
|
||||
)
|
||||
@ -164,6 +197,80 @@ def build_documents_blocks(
|
||||
return section_blocks
|
||||
|
||||
|
||||
def build_sources_blocks(
|
||||
cited_documents: list[tuple[int, SavedSearchDoc]],
|
||||
num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY,
|
||||
) -> list[Block]:
|
||||
if not cited_documents:
|
||||
return [
|
||||
SectionBlock(
|
||||
text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔"
|
||||
)
|
||||
]
|
||||
|
||||
seen_docs_identifiers = set()
|
||||
section_blocks: list[Block] = [SectionBlock(text="*Sources:*")]
|
||||
included_docs = 0
|
||||
for citation_num, d in cited_documents:
|
||||
if d.document_id in seen_docs_identifiers:
|
||||
continue
|
||||
seen_docs_identifiers.add(d.document_id)
|
||||
|
||||
doc_sem_id = d.semantic_identifier
|
||||
if d.source_type == DocumentSource.SLACK.value:
|
||||
# for legacy reasons, before the switch to how Slack semantic identifiers are constructed
|
||||
if "#" not in doc_sem_id:
|
||||
doc_sem_id = "#" + doc_sem_id
|
||||
|
||||
# this is needed to try and prevent the line from overflowing
|
||||
# if it does overflow, the image gets placed above the title and it
|
||||
# looks bad
|
||||
doc_sem_id = (
|
||||
doc_sem_id[:_MAX_BLURB_LEN] + "..."
|
||||
if len(doc_sem_id) > _MAX_BLURB_LEN
|
||||
else doc_sem_id
|
||||
)
|
||||
|
||||
owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None
|
||||
days_ago_str = (
|
||||
timeago.format(d.updated_at, datetime.now(pytz.utc))
|
||||
if d.updated_at
|
||||
else None
|
||||
)
|
||||
final_metadata_str = " | ".join(
|
||||
([owner_str] if owner_str else [])
|
||||
+ ([days_ago_str] if days_ago_str else [])
|
||||
)
|
||||
|
||||
document_title = clean_markdown_link_text(doc_sem_id)
|
||||
img_link = source_to_github_img_link(d.source_type)
|
||||
|
||||
section_blocks.append(
|
||||
ContextBlock(
|
||||
elements=(
|
||||
[
|
||||
ImageElement(
|
||||
image_url=img_link,
|
||||
alt_text=f"{d.source_type.value} logo",
|
||||
)
|
||||
]
|
||||
if img_link
|
||||
else []
|
||||
)
|
||||
+ [
|
||||
MarkdownTextObject(
|
||||
text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
|
||||
),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
if included_docs >= num_docs_to_display:
|
||||
break
|
||||
|
||||
return section_blocks
|
||||
|
||||
|
||||
def build_quotes_block(
|
||||
quotes: list[DanswerQuote],
|
||||
) -> list[Block]:
|
||||
@ -214,6 +321,7 @@ def build_qa_response_blocks(
|
||||
time_cutoff: datetime | None,
|
||||
favor_recent: bool,
|
||||
skip_quotes: bool = False,
|
||||
process_message_for_citations: bool = False,
|
||||
skip_ai_feedback: bool = False,
|
||||
) -> list[Block]:
|
||||
if DISABLE_GENERATIVE_AI:
|
||||
@ -221,8 +329,6 @@ def build_qa_response_blocks(
|
||||
|
||||
quotes_blocks: list[Block] = []
|
||||
|
||||
ai_answer_header = HeaderBlock(text="AI Answer")
|
||||
|
||||
filter_block: Block | None = None
|
||||
if time_cutoff or favor_recent or source_filters:
|
||||
filter_text = "Filters: "
|
||||
@ -247,6 +353,8 @@ def build_qa_response_blocks(
|
||||
)
|
||||
else:
|
||||
answer_processed = decode_escapes(remove_slack_text_interactions(answer))
|
||||
if process_message_for_citations:
|
||||
answer_processed = _process_citations_for_slack(answer_processed)
|
||||
answer_block = SectionBlock(text=answer_processed)
|
||||
if quotes:
|
||||
quotes_blocks = build_quotes_block(quotes)
|
||||
@ -259,7 +367,7 @@ def build_qa_response_blocks(
|
||||
)
|
||||
]
|
||||
|
||||
response_blocks: list[Block] = [ai_answer_header]
|
||||
response_blocks: list[Block] = []
|
||||
|
||||
if filter_block is not None:
|
||||
response_blocks.append(filter_block)
|
||||
@ -271,7 +379,6 @@ def build_qa_response_blocks(
|
||||
|
||||
if not skip_quotes:
|
||||
response_blocks.extend(quotes_blocks)
|
||||
response_blocks.append(DividerBlock())
|
||||
|
||||
return response_blocks
|
||||
|
||||
|
@ -9,6 +9,7 @@ from typing import TypeVar
|
||||
from retry import retry
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.errors import SlackApiError
|
||||
from slack_sdk.models.blocks import DividerBlock
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||
@ -18,12 +19,14 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANS
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES
|
||||
from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
|
||||
from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT
|
||||
from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION
|
||||
from danswer.danswerbot.slack.blocks import build_documents_blocks
|
||||
from danswer.danswerbot.slack.blocks import build_follow_up_block
|
||||
from danswer.danswerbot.slack.blocks import build_qa_response_blocks
|
||||
from danswer.danswerbot.slack.blocks import build_sources_blocks
|
||||
from danswer.danswerbot.slack.blocks import get_restate_blocks
|
||||
from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID
|
||||
from danswer.danswerbot.slack.models import SlackMessageInfo
|
||||
@ -35,6 +38,7 @@ from danswer.danswerbot.slack.utils import SlackRateLimiter
|
||||
from danswer.danswerbot.slack.utils import update_emote_react
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.models import SlackBotConfig
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.llm.utils import check_number_of_tokens
|
||||
from danswer.llm.utils import get_default_llm_version
|
||||
from danswer.llm.utils import get_max_input_tokens
|
||||
@ -137,6 +141,13 @@ def handle_message(
|
||||
|
||||
should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
|
||||
|
||||
# figure out if we want to use citations or quotes
|
||||
use_citations = (
|
||||
not DANSWER_BOT_USE_QUOTES
|
||||
if channel_config is None
|
||||
else channel_config.response_type == SlackBotResponseType.CITATIONS
|
||||
)
|
||||
|
||||
# List of user id to send message to, if None, send to everyone in channel
|
||||
send_to: list[str] | None = None
|
||||
respond_tag_only = False
|
||||
@ -259,6 +270,7 @@ def handle_message(
|
||||
answer_generation_timeout=answer_generation_timeout,
|
||||
enable_reflexion=reflexion,
|
||||
bypass_acl=bypass_acl,
|
||||
use_citations=use_citations,
|
||||
)
|
||||
if not answer.error_msg:
|
||||
return answer
|
||||
@ -387,7 +399,10 @@ def handle_message(
|
||||
source_filters=retrieval_info.applied_source_filters,
|
||||
time_cutoff=retrieval_info.applied_time_cutoff,
|
||||
favor_recent=retrieval_info.recency_bias_multiplier > 1,
|
||||
skip_quotes=persona is not None, # currently Personas don't support quotes
|
||||
# currently Personas don't support quotes
|
||||
# if citations are enabled, also don't use quotes
|
||||
skip_quotes=persona is not None or use_citations,
|
||||
process_message_for_citations=use_citations,
|
||||
)
|
||||
|
||||
# Get the chunks fed to the LLM only, then fill with other docs
|
||||
@ -397,16 +412,33 @@ def handle_message(
|
||||
doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
|
||||
]
|
||||
priority_ordered_docs = llm_docs + remaining_docs
|
||||
document_blocks = (
|
||||
build_documents_blocks(
|
||||
|
||||
document_blocks = []
|
||||
citations_block = []
|
||||
# if citations are enabled, only show cited documents
|
||||
if use_citations:
|
||||
citations = answer.citations or []
|
||||
cited_docs = []
|
||||
for citation in citations:
|
||||
matching_doc = next(
|
||||
(d for d in top_docs if d.document_id == citation.document_id),
|
||||
None,
|
||||
)
|
||||
if matching_doc:
|
||||
cited_docs.append((citation.citation_num, matching_doc))
|
||||
|
||||
cited_docs.sort()
|
||||
citations_block = build_sources_blocks(cited_documents=cited_docs)
|
||||
elif priority_ordered_docs:
|
||||
document_blocks = build_documents_blocks(
|
||||
documents=priority_ordered_docs,
|
||||
message_id=answer.chat_message_id,
|
||||
)
|
||||
if priority_ordered_docs
|
||||
else []
|
||||
)
|
||||
document_blocks = [DividerBlock()] + document_blocks
|
||||
|
||||
all_blocks = restate_question_block + answer_blocks + document_blocks
|
||||
all_blocks = (
|
||||
restate_question_block + answer_blocks + citations_block + document_blocks
|
||||
)
|
||||
|
||||
if channel_conf and channel_conf.get("follow_up_tags") is not None:
|
||||
all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id))
|
||||
|
58
backend/danswer/danswerbot/slack/icons.py
Normal file
58
backend/danswer/danswerbot/slack/icons.py
Normal file
@ -0,0 +1,58 @@
|
||||
from danswer.configs.constants import DocumentSource
|
||||
|
||||
|
||||
def source_to_github_img_link(source: DocumentSource) -> str | None:
|
||||
# TODO: store these images somewhere better
|
||||
if source == DocumentSource.WEB.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png"
|
||||
if source == DocumentSource.FILE.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||
if source == DocumentSource.GOOGLE_SITES.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png"
|
||||
if source == DocumentSource.SLACK.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png"
|
||||
if source == DocumentSource.GMAIL.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png"
|
||||
if source == DocumentSource.GOOGLE_DRIVE.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png"
|
||||
if source == DocumentSource.GITHUB.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png"
|
||||
if source == DocumentSource.GITLAB.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png"
|
||||
if source == DocumentSource.CONFLUENCE.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png"
|
||||
if source == DocumentSource.JIRA.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png"
|
||||
if source == DocumentSource.NOTION.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png"
|
||||
if source == DocumentSource.ZENDESK.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png"
|
||||
if source == DocumentSource.GONG.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png"
|
||||
if source == DocumentSource.LINEAR.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png"
|
||||
if source == DocumentSource.PRODUCTBOARD.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp"
|
||||
if source == DocumentSource.SLAB.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png"
|
||||
if source == DocumentSource.ZULIP.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png"
|
||||
if source == DocumentSource.GURU.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png"
|
||||
if source == DocumentSource.HUBSPOT.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png"
|
||||
if source == DocumentSource.DOCUMENT360.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png"
|
||||
if source == DocumentSource.BOOKSTACK.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png"
|
||||
if source == DocumentSource.LOOPIO.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png"
|
||||
if source == DocumentSource.SHAREPOINT.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png"
|
||||
if source == DocumentSource.REQUESTTRACKER.value:
|
||||
# just use file icon for now
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||
if source == DocumentSource.INGESTION_API.value:
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
||||
|
||||
return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png"
|
@ -346,8 +346,12 @@ def read_slack_thread(
|
||||
if len(blocks) <= 1:
|
||||
continue
|
||||
|
||||
# The useful block is the second one after the header block that says AI Answer
|
||||
# For the old flow, the useful block is the second one after the header block that says AI Answer
|
||||
if reply["blocks"][0]["text"]["text"] == "AI Answer":
|
||||
message = reply["blocks"][1]["text"]["text"]
|
||||
else:
|
||||
# for the new flow, the answer is the first block
|
||||
message = reply["blocks"][0]["text"]["text"]
|
||||
|
||||
if message.startswith("_Filters"):
|
||||
if len(blocks) <= 2:
|
||||
|
@ -811,6 +811,11 @@ class ChannelConfig(TypedDict):
|
||||
follow_up_tags: NotRequired[list[str]]
|
||||
|
||||
|
||||
class SlackBotResponseType(str, PyEnum):
|
||||
QUOTES = "quotes"
|
||||
CITATIONS = "citations"
|
||||
|
||||
|
||||
class SlackBotConfig(Base):
|
||||
__tablename__ = "slack_bot_config"
|
||||
|
||||
@ -822,6 +827,9 @@ class SlackBotConfig(Base):
|
||||
channel_config: Mapped[ChannelConfig] = mapped_column(
|
||||
postgresql.JSONB(), nullable=False
|
||||
)
|
||||
response_type: Mapped[SlackBotResponseType] = mapped_column(
|
||||
Enum(SlackBotResponseType, native_enum=False), nullable=False
|
||||
)
|
||||
|
||||
persona: Mapped[Persona | None] = relationship("Persona")
|
||||
|
||||
|
@ -11,6 +11,7 @@ from danswer.db.models import ChannelConfig
|
||||
from danswer.db.models import Persona
|
||||
from danswer.db.models import Persona__DocumentSet
|
||||
from danswer.db.models import SlackBotConfig
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.search.models import RecencyBiasSetting
|
||||
|
||||
|
||||
@ -72,11 +73,13 @@ def create_slack_bot_persona(
|
||||
def insert_slack_bot_config(
|
||||
persona_id: int | None,
|
||||
channel_config: ChannelConfig,
|
||||
response_type: SlackBotResponseType,
|
||||
db_session: Session,
|
||||
) -> SlackBotConfig:
|
||||
slack_bot_config = SlackBotConfig(
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=response_type,
|
||||
)
|
||||
db_session.add(slack_bot_config)
|
||||
db_session.commit()
|
||||
@ -88,6 +91,7 @@ def update_slack_bot_config(
|
||||
slack_bot_config_id: int,
|
||||
persona_id: int | None,
|
||||
channel_config: ChannelConfig,
|
||||
response_type: SlackBotResponseType,
|
||||
db_session: Session,
|
||||
) -> SlackBotConfig:
|
||||
slack_bot_config = db_session.scalar(
|
||||
@ -105,6 +109,7 @@ def update_slack_bot_config(
|
||||
# will encounter `violates foreign key constraint` errors
|
||||
slack_bot_config.persona_id = persona_id
|
||||
slack_bot_config.channel_config = channel_config
|
||||
slack_bot_config.response_type = response_type
|
||||
|
||||
# if the persona has changed, then clean up the old persona
|
||||
if persona_id != existing_persona_id and existing_persona_id:
|
||||
|
@ -3,10 +3,18 @@ from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from typing import cast
|
||||
|
||||
from langchain.schema.messages import BaseMessage
|
||||
from langchain.schema.messages import HumanMessage
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.chat_utils import build_chat_system_message
|
||||
from danswer.chat.chat_utils import compute_max_document_tokens
|
||||
from danswer.chat.chat_utils import extract_citations_from_stream
|
||||
from danswer.chat.chat_utils import get_chunks_for_qa
|
||||
from danswer.chat.chat_utils import llm_doc_from_inference_chunk
|
||||
from danswer.chat.chat_utils import map_document_id_order
|
||||
from danswer.chat.chat_utils import reorganize_citations
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
from danswer.chat.models import DanswerContext
|
||||
from danswer.chat.models import DanswerContexts
|
||||
@ -26,16 +34,23 @@ from danswer.db.chat import get_persona_by_id
|
||||
from danswer.db.chat import get_prompt_by_id
|
||||
from danswer.db.chat import translate_db_message_to_chat_message_detail
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
from danswer.db.models import Prompt
|
||||
from danswer.db.models import User
|
||||
from danswer.document_index.factory import get_default_document_index
|
||||
from danswer.indexing.models import InferenceChunk
|
||||
from danswer.llm.factory import get_default_llm
|
||||
from danswer.llm.utils import get_default_llm_token_encode
|
||||
from danswer.llm.utils import get_default_llm_tokenizer
|
||||
from danswer.one_shot_answer.factory import get_question_answer_model
|
||||
from danswer.one_shot_answer.models import DirectQARequest
|
||||
from danswer.one_shot_answer.models import OneShotQAResponse
|
||||
from danswer.one_shot_answer.models import QueryRephrase
|
||||
from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.one_shot_answer.qa_block import no_gen_ai_response
|
||||
from danswer.one_shot_answer.qa_utils import combine_message_thread
|
||||
from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
|
||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||
from danswer.prompts.prompt_utils import build_task_prompt_reminders
|
||||
from danswer.search.models import RerankMetricsContainer
|
||||
from danswer.search.models import RetrievalMetricsContainer
|
||||
from danswer.search.models import SavedSearchDoc
|
||||
@ -51,6 +66,118 @@ from danswer.utils.timing import log_generator_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
AnswerObjectIterator = Iterator[
|
||||
QueryRephrase
|
||||
| QADocsResponse
|
||||
| LLMRelevanceFilterResponse
|
||||
| DanswerAnswerPiece
|
||||
| DanswerQuotes
|
||||
| DanswerContexts
|
||||
| StreamingError
|
||||
| ChatMessageDetail
|
||||
| CitationInfo
|
||||
]
|
||||
|
||||
|
||||
def quote_based_qa(
|
||||
prompt: Prompt,
|
||||
query_message: ThreadMessage,
|
||||
history_str: str,
|
||||
context_chunks: list[InferenceChunk],
|
||||
llm_override: str | None,
|
||||
timeout: int,
|
||||
use_chain_of_thought: bool,
|
||||
return_contexts: bool,
|
||||
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
|
||||
) -> AnswerObjectIterator:
|
||||
qa_model = get_question_answer_model(
|
||||
prompt=prompt,
|
||||
timeout=timeout,
|
||||
chain_of_thought=use_chain_of_thought,
|
||||
llm_version=llm_override,
|
||||
)
|
||||
|
||||
full_prompt_str = (
|
||||
qa_model.build_prompt(
|
||||
query=query_message.message,
|
||||
history_str=history_str,
|
||||
context_chunks=context_chunks,
|
||||
)
|
||||
if qa_model is not None
|
||||
else "Gen AI Disabled"
|
||||
)
|
||||
|
||||
response_packets = (
|
||||
qa_model.answer_question_stream(
|
||||
prompt=full_prompt_str,
|
||||
llm_context_docs=context_chunks,
|
||||
metrics_callback=llm_metrics_callback,
|
||||
)
|
||||
if qa_model is not None
|
||||
else no_gen_ai_response()
|
||||
)
|
||||
|
||||
if qa_model is not None and return_contexts:
|
||||
contexts = DanswerContexts(
|
||||
contexts=[
|
||||
DanswerContext(
|
||||
content=context_chunk.content,
|
||||
document_id=context_chunk.document_id,
|
||||
semantic_identifier=context_chunk.semantic_identifier,
|
||||
blurb=context_chunk.semantic_identifier,
|
||||
)
|
||||
for context_chunk in context_chunks
|
||||
]
|
||||
)
|
||||
|
||||
response_packets = itertools.chain(response_packets, [contexts])
|
||||
|
||||
yield from response_packets
|
||||
|
||||
|
||||
def citation_based_qa(
|
||||
prompt: Prompt,
|
||||
query_message: ThreadMessage,
|
||||
history_str: str,
|
||||
context_chunks: list[InferenceChunk],
|
||||
llm_override: str | None,
|
||||
timeout: int,
|
||||
) -> AnswerObjectIterator:
|
||||
llm_tokenizer = get_default_llm_tokenizer()
|
||||
|
||||
system_prompt_or_none, _ = build_chat_system_message(
|
||||
prompt=prompt,
|
||||
context_exists=True,
|
||||
llm_tokenizer_encode_func=llm_tokenizer.encode,
|
||||
)
|
||||
|
||||
task_prompt_with_reminder = build_task_prompt_reminders(prompt)
|
||||
|
||||
context_docs_str = build_complete_context_str(context_chunks)
|
||||
user_message = HumanMessage(
|
||||
content=CITATIONS_PROMPT.format(
|
||||
task_prompt=task_prompt_with_reminder,
|
||||
user_query=query_message.message,
|
||||
history_block=history_str,
|
||||
context_docs_str=context_docs_str,
|
||||
)
|
||||
)
|
||||
|
||||
llm = get_default_llm(
|
||||
timeout=timeout,
|
||||
gen_ai_model_version_override=llm_override,
|
||||
)
|
||||
|
||||
llm_prompt: list[BaseMessage] = [user_message]
|
||||
if system_prompt_or_none is not None:
|
||||
llm_prompt = [system_prompt_or_none] + llm_prompt
|
||||
|
||||
llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in context_chunks]
|
||||
doc_id_to_rank_map = map_document_id_order(llm_docs)
|
||||
|
||||
tokens = llm.stream(llm_prompt)
|
||||
yield from extract_citations_from_stream(tokens, llm_docs, doc_id_to_rank_map)
|
||||
|
||||
|
||||
def stream_answer_objects(
|
||||
query_req: DirectQARequest,
|
||||
@ -66,20 +193,12 @@ def stream_answer_objects(
|
||||
default_chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE,
|
||||
timeout: int = QA_TIMEOUT,
|
||||
bypass_acl: bool = False,
|
||||
use_citations: bool = False,
|
||||
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
||||
| None = None,
|
||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||
llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None,
|
||||
) -> Iterator[
|
||||
QueryRephrase
|
||||
| QADocsResponse
|
||||
| LLMRelevanceFilterResponse
|
||||
| DanswerAnswerPiece
|
||||
| DanswerQuotes
|
||||
| DanswerContexts
|
||||
| StreamingError
|
||||
| ChatMessageDetail
|
||||
]:
|
||||
) -> AnswerObjectIterator:
|
||||
"""Streams in order:
|
||||
1. [always] Retrieved documents, stops flow if nothing is found
|
||||
2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
|
||||
@ -216,63 +335,51 @@ def stream_answer_objects(
|
||||
persona_id=query_req.persona_id, user_id=user_id, db_session=db_session
|
||||
)
|
||||
llm_override = persona.llm_model_version_override
|
||||
|
||||
qa_model = get_question_answer_model(
|
||||
prompt=prompt,
|
||||
timeout=timeout,
|
||||
chain_of_thought=query_req.chain_of_thought,
|
||||
llm_version=llm_override,
|
||||
)
|
||||
|
||||
full_prompt_str = (
|
||||
qa_model.build_prompt(
|
||||
query=query_msg.message, history_str=history_str, context_chunks=llm_chunks
|
||||
)
|
||||
if qa_model is not None
|
||||
else "Gen AI Disabled"
|
||||
if prompt is None:
|
||||
if not chat_session.persona.prompts:
|
||||
raise RuntimeError(
|
||||
"Persona does not have any prompts - this should never happen"
|
||||
)
|
||||
prompt = chat_session.persona.prompts[0]
|
||||
|
||||
# Create the first User query message
|
||||
new_user_message = create_new_chat_message(
|
||||
chat_session_id=chat_session.id,
|
||||
parent_message=root_message,
|
||||
prompt_id=query_req.prompt_id,
|
||||
message=full_prompt_str,
|
||||
token_count=len(llm_tokenizer(full_prompt_str)),
|
||||
message=query_msg.message,
|
||||
token_count=len(llm_tokenizer(query_msg.message)),
|
||||
message_type=MessageType.USER,
|
||||
db_session=db_session,
|
||||
commit=True,
|
||||
)
|
||||
|
||||
response_packets = (
|
||||
qa_model.answer_question_stream(
|
||||
prompt=full_prompt_str,
|
||||
llm_context_docs=llm_chunks,
|
||||
metrics_callback=llm_metrics_callback,
|
||||
if use_citations:
|
||||
qa_stream = citation_based_qa(
|
||||
prompt=prompt,
|
||||
query_message=query_msg,
|
||||
history_str=history_str,
|
||||
context_chunks=llm_chunks,
|
||||
llm_override=llm_override,
|
||||
timeout=timeout,
|
||||
)
|
||||
if qa_model is not None
|
||||
else no_gen_ai_response()
|
||||
else:
|
||||
qa_stream = quote_based_qa(
|
||||
prompt=prompt,
|
||||
query_message=query_msg,
|
||||
history_str=history_str,
|
||||
context_chunks=llm_chunks,
|
||||
llm_override=llm_override,
|
||||
timeout=timeout,
|
||||
use_chain_of_thought=False,
|
||||
return_contexts=False,
|
||||
llm_metrics_callback=llm_metrics_callback,
|
||||
)
|
||||
|
||||
if qa_model is not None and query_req.return_contexts:
|
||||
contexts = DanswerContexts(
|
||||
contexts=[
|
||||
DanswerContext(
|
||||
content=context_doc.content,
|
||||
document_id=context_doc.document_id,
|
||||
semantic_identifier=context_doc.semantic_identifier,
|
||||
blurb=context_doc.semantic_identifier,
|
||||
)
|
||||
for context_doc in llm_chunks
|
||||
]
|
||||
)
|
||||
|
||||
response_packets = itertools.chain(response_packets, [contexts])
|
||||
|
||||
# Capture outputs and errors
|
||||
llm_output = ""
|
||||
error: str | None = None
|
||||
for packet in response_packets:
|
||||
for packet in qa_stream:
|
||||
logger.debug(packet)
|
||||
|
||||
if isinstance(packet, DanswerAnswerPiece):
|
||||
@ -333,6 +440,7 @@ def get_search_answer(
|
||||
answer_generation_timeout: int = QA_TIMEOUT,
|
||||
enable_reflexion: bool = False,
|
||||
bypass_acl: bool = False,
|
||||
use_citations: bool = False,
|
||||
retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None]
|
||||
| None = None,
|
||||
rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
|
||||
@ -348,6 +456,7 @@ def get_search_answer(
|
||||
max_history_tokens=max_history_tokens,
|
||||
db_session=db_session,
|
||||
bypass_acl=bypass_acl,
|
||||
use_citations=use_citations,
|
||||
timeout=answer_generation_timeout,
|
||||
retrieval_metrics_callback=retrieval_metrics_callback,
|
||||
rerank_metrics_callback=rerank_metrics_callback,
|
||||
@ -366,6 +475,11 @@ def get_search_answer(
|
||||
qa_response.llm_chunks_indices = packet.relevant_chunk_indices
|
||||
elif isinstance(packet, DanswerQuotes):
|
||||
qa_response.quotes = packet
|
||||
elif isinstance(packet, CitationInfo):
|
||||
if qa_response.citations:
|
||||
qa_response.citations.append(packet)
|
||||
else:
|
||||
qa_response.citations = [packet]
|
||||
elif isinstance(packet, DanswerContexts):
|
||||
qa_response.contexts = packet
|
||||
elif isinstance(packet, StreamingError):
|
||||
@ -384,4 +498,10 @@ def get_search_answer(
|
||||
else:
|
||||
qa_response.answer_valid = True
|
||||
|
||||
if use_citations and qa_response.answer and qa_response.citations:
|
||||
# Reorganize citation nums to be in the same order as the answer
|
||||
qa_response.answer, qa_response.citations = reorganize_citations(
|
||||
qa_response.answer, qa_response.citations
|
||||
)
|
||||
|
||||
return qa_response
|
||||
|
@ -4,6 +4,7 @@ from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import root_validator
|
||||
|
||||
from danswer.chat.models import CitationInfo
|
||||
from danswer.chat.models import DanswerContexts
|
||||
from danswer.chat.models import DanswerQuotes
|
||||
from danswer.chat.models import QADocsResponse
|
||||
@ -51,6 +52,7 @@ class OneShotQAResponse(BaseModel):
|
||||
answer: str | None = None
|
||||
rephrase: str | None = None
|
||||
quotes: DanswerQuotes | None = None
|
||||
citations: list[CitationInfo] | None = None
|
||||
docs: QADocsResponse | None = None
|
||||
llm_chunks_indices: list[int] | None = None
|
||||
error_msg: str | None = None
|
||||
|
@ -4,7 +4,6 @@ from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from typing import cast
|
||||
|
||||
from danswer.chat.chat_utils import build_complete_context_str
|
||||
from danswer.chat.models import AnswerQuestionStreamReturn
|
||||
from danswer.chat.models import DanswerAnswer
|
||||
from danswer.chat.models import DanswerAnswerPiece
|
||||
@ -33,6 +32,7 @@ from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTE
|
||||
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
||||
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT
|
||||
from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT
|
||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.text_processing import clean_up_code_blocks
|
||||
from danswer.utils.text_processing import escape_newlines
|
||||
|
@ -17,8 +17,6 @@ Remember to provide inline citations in the format [1], [2], [3], etc.
|
||||
ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
|
||||
|
||||
|
||||
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
|
||||
|
||||
CHAT_USER_PROMPT = f"""
|
||||
Refer to the following context documents when responding to me.{{optional_ignore_statement}}
|
||||
CONTEXT:
|
||||
|
@ -12,3 +12,18 @@ QUOTE_PAT = "Quote:"
|
||||
QUOTES_PAT_PLURAL = "Quotes:"
|
||||
INVALID_PAT = "Invalid:"
|
||||
SOURCES_KEY = "sources"
|
||||
|
||||
DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."
|
||||
|
||||
REQUIRE_CITATION_STATEMENT = """
|
||||
Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \
|
||||
DO NOT provide a reference section at the end and DO NOT provide any links following the citations.
|
||||
""".rstrip()
|
||||
|
||||
NO_CITATION_STATEMENT = """
|
||||
Do not provide any citations even if there are examples in the chat history.
|
||||
""".rstrip()
|
||||
|
||||
CITATION_REMINDER = """
|
||||
Remember to provide inline citations in the format [1], [2], [3], etc.
|
||||
"""
|
||||
|
@ -2,6 +2,7 @@
|
||||
# It is used also for the one shot direct QA flow
|
||||
import json
|
||||
|
||||
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.constants import FINAL_QUERY_PAT
|
||||
from danswer.prompts.constants import GENERAL_SEP_PAT
|
||||
from danswer.prompts.constants import QUESTION_PAT
|
||||
@ -96,6 +97,22 @@ SAMPLE RESPONSE:
|
||||
""".strip()
|
||||
|
||||
|
||||
# similar to the chat flow, but with the option of including a
|
||||
# "conversation history" block
|
||||
CITATIONS_PROMPT = f"""
|
||||
Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT}
|
||||
CONTEXT:
|
||||
{GENERAL_SEP_PAT}
|
||||
{{context_docs_str}}
|
||||
{GENERAL_SEP_PAT}
|
||||
|
||||
{{history_block}}{{task_prompt}}
|
||||
|
||||
{QUESTION_PAT.upper()}
|
||||
{{user_query}}
|
||||
"""
|
||||
|
||||
|
||||
# For weak LLM which only takes one chunk and cannot output json
|
||||
# Also not requiring quotes as it tends to not work
|
||||
WEAK_LLM_PROMPT = f"""
|
||||
|
@ -1,5 +1,15 @@
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.models import Prompt
|
||||
from danswer.indexing.models import InferenceChunk
|
||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||
from danswer.prompts.constants import CODE_BLOCK_PAT
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
|
||||
|
||||
def get_current_llm_day_time() -> str:
|
||||
current_datetime = datetime.now()
|
||||
@ -7,3 +17,78 @@ def get_current_llm_day_time() -> str:
|
||||
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
|
||||
day_of_week = current_datetime.strftime("%A")
|
||||
return f"The current day and time is {day_of_week} {formatted_datetime}"
|
||||
|
||||
|
||||
def build_task_prompt_reminders(
|
||||
prompt: Prompt,
|
||||
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
|
||||
citation_str: str = CITATION_REMINDER,
|
||||
language_hint_str: str = LANGUAGE_HINT,
|
||||
) -> str:
|
||||
base_task = prompt.task_prompt
|
||||
citation_or_nothing = citation_str if prompt.include_citations else ""
|
||||
language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else ""
|
||||
return base_task + citation_or_nothing + language_hint_or_nothing
|
||||
|
||||
|
||||
# Maps connector enum string to a more natural language representation for the LLM
|
||||
# If not on the list, uses the original but slightly cleaned up, see below
|
||||
CONNECTOR_NAME_MAP = {
|
||||
"web": "Website",
|
||||
"requesttracker": "Request Tracker",
|
||||
"github": "GitHub",
|
||||
"file": "File Upload",
|
||||
}
|
||||
|
||||
|
||||
def clean_up_source(source_str: str) -> str:
|
||||
if source_str in CONNECTOR_NAME_MAP:
|
||||
return CONNECTOR_NAME_MAP[source_str]
|
||||
return source_str.replace("_", " ").title()
|
||||
|
||||
|
||||
def build_doc_context_str(
|
||||
semantic_identifier: str,
|
||||
source_type: DocumentSource,
|
||||
content: str,
|
||||
metadata_dict: dict[str, str | list[str]],
|
||||
updated_at: datetime | None,
|
||||
ind: int,
|
||||
include_metadata: bool = True,
|
||||
) -> str:
|
||||
context_str = ""
|
||||
if include_metadata:
|
||||
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
|
||||
context_str += f"Source: {clean_up_source(source_type)}\n"
|
||||
|
||||
for k, v in metadata_dict.items():
|
||||
if isinstance(v, list):
|
||||
v_str = ", ".join(v)
|
||||
context_str += f"{k.capitalize()}: {v_str}\n"
|
||||
else:
|
||||
context_str += f"{k.capitalize()}: {v}\n"
|
||||
|
||||
if updated_at:
|
||||
update_str = updated_at.strftime("%B %d, %Y %H:%M")
|
||||
context_str += f"Updated: {update_str}\n"
|
||||
context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n"
|
||||
return context_str
|
||||
|
||||
|
||||
def build_complete_context_str(
|
||||
context_docs: Sequence[LlmDoc | InferenceChunk],
|
||||
include_metadata: bool = True,
|
||||
) -> str:
|
||||
context_str = ""
|
||||
for ind, doc in enumerate(context_docs, start=1):
|
||||
context_str += build_doc_context_str(
|
||||
semantic_identifier=doc.semantic_identifier,
|
||||
source_type=doc.source_type,
|
||||
content=doc.content,
|
||||
metadata_dict=doc.metadata,
|
||||
updated_at=doc.updated_at,
|
||||
ind=ind,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
return context_str.strip()
|
||||
|
@ -2,8 +2,8 @@ from danswer.llm.utils import check_number_of_tokens
|
||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||
from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||
|
||||
|
@ -5,12 +5,12 @@ from fastapi import Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.users import current_user
|
||||
from danswer.chat.chat_utils import build_doc_context_str
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
from danswer.db.engine import get_session
|
||||
from danswer.db.models import User
|
||||
from danswer.document_index.factory import get_default_document_index
|
||||
from danswer.llm.utils import get_default_llm_token_encode
|
||||
from danswer.prompts.prompt_utils import build_doc_context_str
|
||||
from danswer.search.access_filters import build_access_filters_for_user
|
||||
from danswer.search.models import IndexFilters
|
||||
from danswer.server.documents.models import ChunkInfo
|
||||
|
@ -9,6 +9,8 @@ from danswer.configs.constants import AuthType
|
||||
from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS
|
||||
from danswer.db.models import AllowedAnswerFilters
|
||||
from danswer.db.models import ChannelConfig
|
||||
from danswer.db.models import SlackBotConfig as SlackBotConfigModel
|
||||
from danswer.db.models import SlackBotResponseType
|
||||
from danswer.server.features.persona.models import PersonaSnapshot
|
||||
|
||||
|
||||
@ -81,6 +83,7 @@ class SlackBotConfigCreationRequest(BaseModel):
|
||||
answer_filters: list[AllowedAnswerFilters] = []
|
||||
# list of user emails
|
||||
follow_up_tags: list[str] | None = None
|
||||
response_type: SlackBotResponseType
|
||||
|
||||
@validator("answer_filters", pre=True)
|
||||
def validate_filters(cls, value: list[str]) -> list[str]:
|
||||
@ -104,6 +107,22 @@ class SlackBotConfig(BaseModel):
|
||||
id: int
|
||||
persona: PersonaSnapshot | None
|
||||
channel_config: ChannelConfig
|
||||
response_type: SlackBotResponseType
|
||||
|
||||
@classmethod
|
||||
def from_model(
|
||||
cls, slack_bot_config_model: SlackBotConfigModel
|
||||
) -> "SlackBotConfig":
|
||||
return cls(
|
||||
id=slack_bot_config_model.id,
|
||||
persona=(
|
||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
||||
if slack_bot_config_model.persona
|
||||
else None
|
||||
),
|
||||
channel_config=slack_bot_config_model.channel_config,
|
||||
response_type=slack_bot_config_model.response_type,
|
||||
)
|
||||
|
||||
|
||||
class ModelVersionResponse(BaseModel):
|
||||
|
@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config
|
||||
from danswer.db.slack_bot_config import remove_slack_bot_config
|
||||
from danswer.db.slack_bot_config import update_slack_bot_config
|
||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
from danswer.server.features.persona.models import PersonaSnapshot
|
||||
from danswer.server.manage.models import SlackBotConfig
|
||||
from danswer.server.manage.models import SlackBotConfigCreationRequest
|
||||
from danswer.server.manage.models import SlackBotTokens
|
||||
@ -108,17 +107,10 @@ def create_slack_bot_config(
|
||||
slack_bot_config_model = insert_slack_bot_config(
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=slack_bot_config_creation_request.response_type,
|
||||
db_session=db_session,
|
||||
)
|
||||
return SlackBotConfig(
|
||||
id=slack_bot_config_model.id,
|
||||
persona=(
|
||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
||||
if slack_bot_config_model.persona
|
||||
else None
|
||||
),
|
||||
channel_config=slack_bot_config_model.channel_config,
|
||||
)
|
||||
return SlackBotConfig.from_model(slack_bot_config_model)
|
||||
|
||||
|
||||
@router.patch("/admin/slack-bot/config/{slack_bot_config_id}")
|
||||
@ -170,17 +162,10 @@ def patch_slack_bot_config(
|
||||
slack_bot_config_id=slack_bot_config_id,
|
||||
persona_id=persona_id,
|
||||
channel_config=channel_config,
|
||||
response_type=slack_bot_config_creation_request.response_type,
|
||||
db_session=db_session,
|
||||
)
|
||||
return SlackBotConfig(
|
||||
id=slack_bot_config_model.id,
|
||||
persona=(
|
||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
||||
if slack_bot_config_model.persona
|
||||
else None
|
||||
),
|
||||
channel_config=slack_bot_config_model.channel_config,
|
||||
)
|
||||
return SlackBotConfig.from_model(slack_bot_config_model)
|
||||
|
||||
|
||||
@router.delete("/admin/slack-bot/config/{slack_bot_config_id}")
|
||||
@ -201,15 +186,7 @@ def list_slack_bot_configs(
|
||||
) -> list[SlackBotConfig]:
|
||||
slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session)
|
||||
return [
|
||||
SlackBotConfig(
|
||||
id=slack_bot_config_model.id,
|
||||
persona=(
|
||||
PersonaSnapshot.from_model(slack_bot_config_model.persona)
|
||||
if slack_bot_config_model.persona
|
||||
else None
|
||||
),
|
||||
channel_config=slack_bot_config_model.channel_config,
|
||||
)
|
||||
SlackBotConfig.from_model(slack_bot_config_model)
|
||||
for slack_bot_config_model in slack_bot_config_models
|
||||
]
|
||||
|
||||
|
BIN
backend/slackbot_images/Confluence.png
Normal file
BIN
backend/slackbot_images/Confluence.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1013 B |
BIN
backend/slackbot_images/File.png
Normal file
BIN
backend/slackbot_images/File.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 KiB |
BIN
backend/slackbot_images/Guru.png
Normal file
BIN
backend/slackbot_images/Guru.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.0 KiB |
BIN
backend/slackbot_images/Jira.png
Normal file
BIN
backend/slackbot_images/Jira.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 829 B |
3
backend/slackbot_images/README.md
Normal file
3
backend/slackbot_images/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
|
||||
This folder contains images needed by the Danswer Slack Bot. When possible, we use the images
|
||||
within `web/public`, but sometimes those images do not work for the Slack Bot.
|
BIN
backend/slackbot_images/Web.png
Normal file
BIN
backend/slackbot_images/Web.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
BIN
backend/slackbot_images/Zendesk.png
Normal file
BIN
backend/slackbot_images/Zendesk.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({
|
||||
!isPersonaASlackBotPersona(existingSlackBotConfig.persona)
|
||||
? existingSlackBotConfig.persona.id
|
||||
: null,
|
||||
response_type: existingSlackBotConfig?.response_type || "citations",
|
||||
}}
|
||||
validationSchema={Yup.object().shape({
|
||||
channel_names: Yup.array().of(Yup.string()),
|
||||
response_type: Yup.string()
|
||||
.oneOf(["quotes", "citations"])
|
||||
.required(),
|
||||
answer_validity_check_enabled: Yup.boolean().required(),
|
||||
questionmark_prefilter_enabled: Yup.boolean().required(),
|
||||
respond_tag_only: Yup.boolean().required(),
|
||||
@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
|
||||
<SelectorFormField
|
||||
name="response_type"
|
||||
label="Response Format"
|
||||
subtext={
|
||||
<>
|
||||
If set to Citations, DanswerBot will respond with a direct
|
||||
answer with inline citations. It will also provide links
|
||||
to these cited documents below the answer. When in doubt,
|
||||
choose this option.
|
||||
<br />
|
||||
<br />
|
||||
If set to Quotes, DanswerBot will respond with a direct
|
||||
answer as well as with quotes pulled from the context
|
||||
documents to support that answer. DanswerBot will also
|
||||
give a list of relevant documents. Choose this option if
|
||||
you want a very detailed response AND/OR a list of
|
||||
relevant documents would be useful just in case the LLM
|
||||
missed anything.
|
||||
</>
|
||||
}
|
||||
options={[
|
||||
{ name: "Citations", value: "citations" },
|
||||
{ name: "Quotes", value: "quotes" },
|
||||
]}
|
||||
/>
|
||||
|
||||
<Divider />
|
||||
|
||||
<SectionHeader>When should DanswerBot respond?</SectionHeader>
|
||||
|
@ -1,4 +1,8 @@
|
||||
import { ChannelConfig, SlackBotTokens } from "@/lib/types";
|
||||
import {
|
||||
ChannelConfig,
|
||||
SlackBotResponseType,
|
||||
SlackBotTokens,
|
||||
} from "@/lib/types";
|
||||
import { Persona } from "../personas/interfaces";
|
||||
|
||||
interface SlackBotConfigCreationRequest {
|
||||
@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest {
|
||||
respond_team_member_list: string[];
|
||||
follow_up_tags?: string[];
|
||||
usePersona: boolean;
|
||||
response_type: SlackBotResponseType;
|
||||
}
|
||||
|
||||
const buildFiltersFromCreationRequest = (
|
||||
@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = (
|
||||
...(creationRequest.usePersona
|
||||
? { persona_id: creationRequest.persona_id }
|
||||
: { document_sets: creationRequest.document_sets }),
|
||||
response_type: creationRequest.response_type,
|
||||
});
|
||||
};
|
||||
|
||||
|
@ -231,7 +231,7 @@ interface SelectorFormFieldProps {
|
||||
name: string;
|
||||
label?: string;
|
||||
options: StringOrNumberOption[];
|
||||
subtext?: string;
|
||||
subtext?: string | JSX.Element;
|
||||
includeDefault?: boolean;
|
||||
}
|
||||
|
||||
|
@ -370,10 +370,13 @@ export interface ChannelConfig {
|
||||
follow_up_tags?: string[];
|
||||
}
|
||||
|
||||
export type SlackBotResponseType = "quotes" | "citations";
|
||||
|
||||
export interface SlackBotConfig {
|
||||
id: number;
|
||||
persona: Persona | null;
|
||||
channel_config: ChannelConfig;
|
||||
response_type: SlackBotResponseType;
|
||||
}
|
||||
|
||||
export interface SlackBotTokens {
|
||||
|
Loading…
x
Reference in New Issue
Block a user