diff --git a/backend/danswer/bots/slack/blocks.py b/backend/danswer/bots/slack/blocks.py index 926c2289d..ed871745a 100644 --- a/backend/danswer/bots/slack/blocks.py +++ b/backend/danswer/bots/slack/blocks.py @@ -12,9 +12,7 @@ from danswer.bots.slack.utils import build_feedback_block_id from danswer.bots.slack.utils import translate_vespa_highlight_to_slack from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY from danswer.configs.app_configs import ENABLE_SLACK_DOC_FEEDBACK -from danswer.configs.constants import DocumentSource from danswer.configs.constants import SearchFeedbackType -from danswer.connectors.slack.utils import UserIdReplacer from danswer.direct_qa.interfaces import DanswerQuote from danswer.server.models import SearchDoc from danswer.utils.text_processing import replace_whitespaces_w_space @@ -71,35 +69,6 @@ def build_doc_feedback_block( ) -def _build_custom_semantic_identifier( - semantic_identifier: str, match_str: str, source: str -) -> str: - """ - On slack, since we just show the semantic identifier rather than semantic + blurb, we need - to do some custom formatting to make sure the semantic identifier is unique and meaningful. - """ - if source == DocumentSource.SLACK.value: - truncated_blurb = ( - f"{match_str[:_MAX_BLURB_LEN]}..." - if len(match_str) > _MAX_BLURB_LEN - else match_str - ) - # NOTE: removing tags so that we don't accidentally tag users in Slack + - # so that it can be used as part of a link - truncated_blurb = UserIdReplacer.replace_tags_basic(truncated_blurb) - truncated_blurb = UserIdReplacer.replace_channels_basic(truncated_blurb) - truncated_blurb = UserIdReplacer.replace_special_mentions(truncated_blurb) - truncated_blurb = UserIdReplacer.replace_links(truncated_blurb) - # stop as soon as we see a newline, since these break the link - truncated_blurb = truncated_blurb.split("\n")[0] - if truncated_blurb: - return f"#{semantic_identifier}: {truncated_blurb}" - else: - return f"#{semantic_identifier}" - - return semantic_identifier - - def build_documents_blocks( documents: list[SearchDoc], query_event_id: int, @@ -144,40 +113,6 @@ def build_documents_blocks( return section_blocks -def build_blurb_quotes_block( - quotes: list[DanswerQuote], -) -> tuple[list[Block], list[str]]: - quote_lines: list[str] = [] - doc_identifiers: list[str] = [] - for quote in quotes: - doc_id = quote.document_id - doc_link = quote.link - doc_name = quote.semantic_identifier - if doc_link and doc_name and doc_id and doc_id not in doc_identifiers: - doc_identifiers.append(doc_id) - custom_semantic_identifier = _build_custom_semantic_identifier( - semantic_identifier=doc_name, - match_str=quote.blurb, - source=quote.source_type, - ) - quote_lines.append(f"- <{doc_link}|{custom_semantic_identifier}>") - - if not quote_lines: - return [], [] - - return ( - [ - SectionBlock( - fields=[ - "*Sources:*", - *quote_lines, - ] - ) - ], - doc_identifiers, - ) - - def build_quotes_block( quotes: list[DanswerQuote], ) -> list[Block]: @@ -206,19 +141,12 @@ def build_quotes_block( single_quote_str = "\n".join([f"```{q_str}```" for q_str in longest_quotes]) link = doc_to_link[doc_id] sem_id = doc_to_sem_id[doc_id] - quote_lines.append(f"<{link}|{sem_id}>\n{single_quote_str}") + quote_lines.append(f"<{link}|{sem_id}>:\n{single_quote_str}") if not doc_to_quotes: return [] - return [ - SectionBlock( - fields=[ - "*Relevant Snippets:*", - *quote_lines, - ] - ) - ] + return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))] def build_qa_response_blocks( diff --git a/backend/danswer/bots/slack/utils.py b/backend/danswer/bots/slack/utils.py index 1fa140188..c1bc0facf 100644 --- a/backend/danswer/bots/slack/utils.py +++ b/backend/danswer/bots/slack/utils.py @@ -12,6 +12,7 @@ from slack_sdk.models.metadata import Metadata from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES from danswer.configs.constants import ID_SEPARATOR from danswer.connectors.slack.utils import make_slack_api_rate_limited +from danswer.connectors.slack.utils import UserIdReplacer from danswer.utils.logger import setup_logger from danswer.utils.text_processing import replace_whitespaces_w_space @@ -91,8 +92,7 @@ def decompose_block_id(block_id: str) -> tuple[int, str | None, int | None]: def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str: def _replace_highlight(s: str) -> str: - s = re.sub(r"(?=\S)", "", s) - s = re.sub(r"(?<=\S)", "", s) + s = re.sub(r"(?<=[^\s])(.*?)", r"\1", s) s = s.replace("", "*").replace("", "*") return s @@ -110,3 +110,11 @@ def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) - combined = combined[: remaining - 3] + "..." return combined + + +def remove_slack_text_interactions(slack_str: str) -> str: + slack_str = UserIdReplacer.replace_tags_basic(slack_str) + slack_str = UserIdReplacer.replace_channels_basic(slack_str) + slack_str = UserIdReplacer.replace_special_mentions(slack_str) + slack_str = UserIdReplacer.replace_links(slack_str) + return slack_str diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py index 373bb909d..0bf60399d 100644 --- a/backend/danswer/datastores/vespa/store.py +++ b/backend/danswer/datastores/vespa/store.py @@ -196,9 +196,7 @@ def _index_vespa_chunks( headers: dict[str, str], fields: dict[str, Any], ) -> Response: - logger.debug( - f"Hitting URL '{url}', with headers '{headers}', with fields '{fields}'" - ) + logger.debug(f'Indexing to URL "{url}"') res = requests.post(url, headers=headers, json={"fields": fields}) try: res.raise_for_status()