mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-22 17:16:20 +02:00
Clean up Slack Bot formatting (#455)
This commit is contained in:
@@ -12,9 +12,7 @@ from danswer.bots.slack.utils import build_feedback_block_id
|
|||||||
from danswer.bots.slack.utils import translate_vespa_highlight_to_slack
|
from danswer.bots.slack.utils import translate_vespa_highlight_to_slack
|
||||||
from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
|
from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
|
||||||
from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK
|
from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK
|
||||||
from danswer.configs.constants import DocumentSource
|
|
||||||
from danswer.configs.constants import SearchFeedbackType
|
from danswer.configs.constants import SearchFeedbackType
|
||||||
from danswer.connectors.slack.utils import UserIdReplacer
|
|
||||||
from danswer.direct_qa.interfaces import DanswerQuote
|
from danswer.direct_qa.interfaces import DanswerQuote
|
||||||
from danswer.server.models import SearchDoc
|
from danswer.server.models import SearchDoc
|
||||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||||
@@ -71,35 +69,6 @@ def build_doc_feedback_block(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _build_custom_semantic_identifier(
|
|
||||||
semantic_identifier: str, match_str: str, source: str
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
On slack, since we just show the semantic identifier rather than semantic + blurb, we need
|
|
||||||
to do some custom formatting to make sure the semantic identifier is unique and meaningful.
|
|
||||||
"""
|
|
||||||
if source == DocumentSource.SLACK.value:
|
|
||||||
truncated_blurb = (
|
|
||||||
f"{match_str[:_MAX_BLURB_LEN]}..."
|
|
||||||
if len(match_str) > _MAX_BLURB_LEN
|
|
||||||
else match_str
|
|
||||||
)
|
|
||||||
# NOTE: removing tags so that we don't accidentally tag users in Slack +
|
|
||||||
# so that it can be used as part of a <link|text> link
|
|
||||||
truncated_blurb = UserIdReplacer.replace_tags_basic(truncated_blurb)
|
|
||||||
truncated_blurb = UserIdReplacer.replace_channels_basic(truncated_blurb)
|
|
||||||
truncated_blurb = UserIdReplacer.replace_special_mentions(truncated_blurb)
|
|
||||||
truncated_blurb = UserIdReplacer.replace_links(truncated_blurb)
|
|
||||||
# stop as soon as we see a newline, since these break the link
|
|
||||||
truncated_blurb = truncated_blurb.split("\n")[0]
|
|
||||||
if truncated_blurb:
|
|
||||||
return f"#{semantic_identifier}: {truncated_blurb}"
|
|
||||||
else:
|
|
||||||
return f"#{semantic_identifier}"
|
|
||||||
|
|
||||||
return semantic_identifier
|
|
||||||
|
|
||||||
|
|
||||||
def build_documents_blocks(
|
def build_documents_blocks(
|
||||||
documents: list[SearchDoc],
|
documents: list[SearchDoc],
|
||||||
query_event_id: int,
|
query_event_id: int,
|
||||||
@@ -144,40 +113,6 @@ def build_documents_blocks(
|
|||||||
return section_blocks
|
return section_blocks
|
||||||
|
|
||||||
|
|
||||||
def build_blurb_quotes_block(
|
|
||||||
quotes: list[DanswerQuote],
|
|
||||||
) -> tuple[list[Block], list[str]]:
|
|
||||||
quote_lines: list[str] = []
|
|
||||||
doc_identifiers: list[str] = []
|
|
||||||
for quote in quotes:
|
|
||||||
doc_id = quote.document_id
|
|
||||||
doc_link = quote.link
|
|
||||||
doc_name = quote.semantic_identifier
|
|
||||||
if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
|
|
||||||
doc_identifiers.append(doc_id)
|
|
||||||
custom_semantic_identifier = _build_custom_semantic_identifier(
|
|
||||||
semantic_identifier=doc_name,
|
|
||||||
match_str=quote.blurb,
|
|
||||||
source=quote.source_type,
|
|
||||||
)
|
|
||||||
quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
|
|
||||||
|
|
||||||
if not quote_lines:
|
|
||||||
return [], []
|
|
||||||
|
|
||||||
return (
|
|
||||||
[
|
|
||||||
SectionBlock(
|
|
||||||
fields=[
|
|
||||||
"*Sources:*",
|
|
||||||
*quote_lines,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
],
|
|
||||||
doc_identifiers,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def build_quotes_block(
|
def build_quotes_block(
|
||||||
quotes: list[DanswerQuote],
|
quotes: list[DanswerQuote],
|
||||||
) -> list[Block]:
|
) -> list[Block]:
|
||||||
@@ -206,19 +141,12 @@ def build_quotes_block(
|
|||||||
single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
|
single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
|
||||||
link = doc_to_link[doc_id]
|
link = doc_to_link[doc_id]
|
||||||
sem_id = doc_to_sem_id[doc_id]
|
sem_id = doc_to_sem_id[doc_id]
|
||||||
quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}")
|
quote_lines.append(f"<{link}|{sem_id}>:\n{single_quote_str}")
|
||||||
|
|
||||||
if not doc_to_quotes:
|
if not doc_to_quotes:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return [
|
return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))]
|
||||||
SectionBlock(
|
|
||||||
fields=[
|
|
||||||
"*Relevant Snippets:*",
|
|
||||||
*quote_lines,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def build_qa_response_blocks(
|
def build_qa_response_blocks(
|
||||||
|
@@ -12,6 +12,7 @@ from slack_sdk.models.metadata import Metadata
|
|||||||
from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
|
from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
|
||||||
from danswer.configs.constants import ID_SEPARATOR
|
from danswer.configs.constants import ID_SEPARATOR
|
||||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||||
|
from danswer.connectors.slack.utils import UserIdReplacer
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
from danswer.utils.text_processing import replace_whitespaces_w_space
|
from danswer.utils.text_processing import replace_whitespaces_w_space
|
||||||
|
|
||||||
@@ -91,8 +92,7 @@ def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]:
|
|||||||
|
|
||||||
def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
|
def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
|
||||||
def _replace_highlight(s: str) -> str:
|
def _replace_highlight(s: str) -> str:
|
||||||
s = re.sub(r"</hi>(?=\S)", "", s)
|
s = re.sub(r"(?<=[^\s])<hi>(.*?)</hi>", r"\1", s)
|
||||||
s = re.sub(r"(?<=\S)<hi>", "", s)
|
|
||||||
s = s.replace("</hi>", "*").replace("<hi>", "*")
|
s = s.replace("</hi>", "*").replace("<hi>", "*")
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@@ -110,3 +110,11 @@ def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -
|
|||||||
combined = combined[: remaining - 3] + "..."
|
combined = combined[: remaining - 3] + "..."
|
||||||
|
|
||||||
return combined
|
return combined
|
||||||
|
|
||||||
|
|
||||||
|
def remove_slack_text_interactions(slack_str: str) -> str:
|
||||||
|
slack_str = UserIdReplacer.replace_tags_basic(slack_str)
|
||||||
|
slack_str = UserIdReplacer.replace_channels_basic(slack_str)
|
||||||
|
slack_str = UserIdReplacer.replace_special_mentions(slack_str)
|
||||||
|
slack_str = UserIdReplacer.replace_links(slack_str)
|
||||||
|
return slack_str
|
||||||
|
@@ -196,9 +196,7 @@ def _index_vespa_chunks(
|
|||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
fields: dict[str, Any],
|
fields: dict[str, Any],
|
||||||
) -> Response:
|
) -> Response:
|
||||||
logger.debug(
|
logger.debug(f'Indexing to URL "{url}"')
|
||||||
f"Hitting URL '{url}', with headers '{headers}', with fields '{fields}'"
|
|
||||||
)
|
|
||||||
res = requests.post(url, headers=headers, json={"fields": fields})
|
res = requests.post(url, headers=headers, json={"fields": fields})
|
||||||
try:
|
try:
|
||||||
res.raise_for_status()
|
res.raise_for_status()
|
||||||
|
Reference in New Issue
Block a user