Clean up Slack Bot formatting (#455)

2025-04-07 19:38:19 +02:00 · 2023-09-17 22:47:33 -07:00 · 2023-09-17 22:47:33 -07:00 · 5b1109d5c1
commit 5b1109d5c1
parent b337a521f8
3 changed files with 13 additions and 79 deletions
--- a/backend/danswer/bots/slack/blocks.py
+++ b/backend/danswer/bots/slack/blocks.py
@ -12,9 +12,7 @@ from danswer.bots.slack.utils import build_feedback_block_id
 from danswer.bots.slack.utils import translate_vespa_highlight_to_slack
 from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
 from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK
-from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import SearchFeedbackType
-from danswer.connectors.slack.utils import UserIdReplacer
 from danswer.direct_qa.interfaces import DanswerQuote
 from danswer.server.models import SearchDoc
 from danswer.utils.text_processing import replace_whitespaces_w_space
@ -71,35 +69,6 @@ def build_doc_feedback_block(
    )


-def _build_custom_semantic_identifier(
-    semantic_identifier: str, match_str: str, source: str
-) -> str:
-    """
-    On slack, since we just show the semantic identifier rather than semantic + blurb, we need
-    to do some custom formatting to make sure the semantic identifier is unique and meaningful.
-    """
-    if source == DocumentSource.SLACK.value:
-        truncated_blurb = (
-            f"{match_str[:_MAX_BLURB_LEN]}..."
-            if len(match_str) > _MAX_BLURB_LEN
-            else match_str
-        )
-        # NOTE: removing tags so that we don't accidentally tag users in Slack +
-        # so that it can be used as part of a <link|text> link
-        truncated_blurb = UserIdReplacer.replace_tags_basic(truncated_blurb)
-        truncated_blurb = UserIdReplacer.replace_channels_basic(truncated_blurb)
-        truncated_blurb = UserIdReplacer.replace_special_mentions(truncated_blurb)
-        truncated_blurb = UserIdReplacer.replace_links(truncated_blurb)
-        # stop as soon as we see a newline, since these break the link
-        truncated_blurb = truncated_blurb.split("\n")[0]
-        if truncated_blurb:
-            return f"#{semantic_identifier}: {truncated_blurb}"
-        else:
-            return f"#{semantic_identifier}"
-
-    return semantic_identifier
-
-
 def build_documents_blocks(
    documents: list[SearchDoc],
    query_event_id: int,
@ -144,40 +113,6 @@ def build_documents_blocks(
    return section_blocks


-def build_blurb_quotes_block(
-    quotes: list[DanswerQuote],
-) -> tuple[list[Block], list[str]]:
-    quote_lines: list[str] = []
-    doc_identifiers: list[str] = []
-    for quote in quotes:
-        doc_id = quote.document_id
-        doc_link = quote.link
-        doc_name = quote.semantic_identifier
-        if doc_link and doc_name and doc_id and doc_id not in doc_identifiers:
-            doc_identifiers.append(doc_id)
-            custom_semantic_identifier = _build_custom_semantic_identifier(
-                semantic_identifier=doc_name,
-                match_str=quote.blurb,
-                source=quote.source_type,
-            )
-            quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>")
-
-    if not quote_lines:
-        return [], []
-
-    return (
-        [
-            SectionBlock(
-                fields=[
-                    "*Sources:*",
-                    *quote_lines,
-                ]
-            )
-        ],
-        doc_identifiers,
-    )
-
-
 def build_quotes_block(
    quotes: list[DanswerQuote],
 ) -> list[Block]:
@ -206,19 +141,12 @@ def build_quotes_block(
        single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes])
        link = doc_to_link[doc_id]
        sem_id = doc_to_sem_id[doc_id]
-        quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}")
+        quote_lines.append(f"<{link}|{sem_id}>:\n{single_quote_str}")

    if not doc_to_quotes:
        return []

-    return [
-        SectionBlock(
-            fields=[
-                "*Relevant Snippets:*",
-                *quote_lines,
-            ]
-        )
-    ]
+    return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))]


 def build_qa_response_blocks(
--- a/backend/danswer/bots/slack/utils.py
+++ b/backend/danswer/bots/slack/utils.py
@ -12,6 +12,7 @@ from slack_sdk.models.metadata import Metadata
 from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
 from danswer.configs.constants import ID_SEPARATOR
 from danswer.connectors.slack.utils import make_slack_api_rate_limited
+from danswer.connectors.slack.utils import UserIdReplacer
 from danswer.utils.logger import setup_logger
 from danswer.utils.text_processing import replace_whitespaces_w_space

@ -91,8 +92,7 @@ def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]:

 def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str:
    def _replace_highlight(s: str) -> str:
-        s = re.sub(r"</hi>(?=\S)", "", s)
-        s = re.sub(r"(?<=\S)<hi>", "", s)
+        s = re.sub(r"(?<=[^\s])<hi>(.*?)</hi>", r"\1", s)
        s = s.replace("</hi>", "*").replace("<hi>", "*")
        return s

@ -110,3 +110,11 @@ def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -
        combined = combined[: remaining - 3] + "..."

    return combined
+
+
+def remove_slack_text_interactions(slack_str: str) -> str:
+    slack_str = UserIdReplacer.replace_tags_basic(slack_str)
+    slack_str = UserIdReplacer.replace_channels_basic(slack_str)
+    slack_str = UserIdReplacer.replace_special_mentions(slack_str)
+    slack_str = UserIdReplacer.replace_links(slack_str)
+    return slack_str
--- a/backend/danswer/datastores/vespa/store.py
+++ b/backend/danswer/datastores/vespa/store.py
@ -196,9 +196,7 @@ def _index_vespa_chunks(
            headers: dict[str, str],
            fields: dict[str, Any],
        ) -> Response:
-            logger.debug(
-                f"Hitting URL '{url}', with headers '{headers}', with fields '{fields}'"
-            )
+            logger.debug(f'Indexing to URL "{url}"')
            res = requests.post(url, headers=headers, json={"fields": fields})
            try:
                res.raise_for_status()