import logging import random import re import string import time import uuid from typing import Any from typing import cast from retry import retry from slack_sdk import WebClient from slack_sdk.errors import SlackApiError from slack_sdk.models.blocks import Block from slack_sdk.models.blocks import SectionBlock from slack_sdk.models.metadata import Metadata from slack_sdk.socket_mode import SocketModeClient from onyx.configs.app_configs import DISABLE_TELEMETRY from onyx.configs.constants import ID_SEPARATOR from onyx.configs.constants import MessageType from onyx.configs.onyxbot_configs import DANSWER_BOT_FEEDBACK_VISIBILITY from onyx.configs.onyxbot_configs import DANSWER_BOT_MAX_QPM from onyx.configs.onyxbot_configs import DANSWER_BOT_MAX_WAIT_TIME from onyx.configs.onyxbot_configs import DANSWER_BOT_NUM_RETRIES from onyx.configs.onyxbot_configs import ( DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD, ) from onyx.configs.onyxbot_configs import ( DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS, ) from onyx.connectors.slack.utils import make_slack_api_rate_limited from onyx.connectors.slack.utils import SlackTextCleaner from onyx.db.engine import get_session_with_tenant from onyx.db.users import get_user_by_email from onyx.llm.exceptions import GenAIDisabledException from onyx.llm.factory import get_default_llms from onyx.llm.utils import dict_based_prompt_to_langchain_prompt from onyx.llm.utils import message_to_string from onyx.onyxbot.slack.constants import FeedbackVisibility from onyx.onyxbot.slack.models import ThreadMessage from onyx.prompts.miscellaneous_prompts import SLACK_LANGUAGE_REPHRASE_PROMPT from onyx.utils.logger import setup_logger from onyx.utils.telemetry import optional_telemetry from onyx.utils.telemetry import RecordType from onyx.utils.text_processing import replace_whitespaces_w_space logger = setup_logger() _DANSWER_BOT_SLACK_BOT_ID: str | None = None _DANSWER_BOT_MESSAGE_COUNT: int = 0 _DANSWER_BOT_COUNT_START_TIME: float = time.time() def get_onyx_bot_slack_bot_id(web_client: WebClient) -> Any: global _DANSWER_BOT_SLACK_BOT_ID if _DANSWER_BOT_SLACK_BOT_ID is None: _DANSWER_BOT_SLACK_BOT_ID = web_client.auth_test().get("user_id") return _DANSWER_BOT_SLACK_BOT_ID def check_message_limit() -> bool: """ This isnt a perfect solution. High traffic at the end of one period and start of another could cause the limit to be exceeded. """ if DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD == 0: return True global _DANSWER_BOT_MESSAGE_COUNT global _DANSWER_BOT_COUNT_START_TIME time_since_start = time.time() - _DANSWER_BOT_COUNT_START_TIME if time_since_start > DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS: _DANSWER_BOT_MESSAGE_COUNT = 0 _DANSWER_BOT_COUNT_START_TIME = time.time() if (_DANSWER_BOT_MESSAGE_COUNT + 1) > DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD: logger.error( f"OnyxBot has reached the message limit {DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD}" f" for the time period {DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS} seconds." " These limits are configurable in backend/onyx/configs/onyxbot_configs.py" ) return False _DANSWER_BOT_MESSAGE_COUNT += 1 return True def rephrase_slack_message(msg: str) -> str: def _get_rephrase_message() -> list[dict[str, str]]: messages = [ { "role": "user", "content": SLACK_LANGUAGE_REPHRASE_PROMPT.format(query=msg), }, ] return messages try: llm, _ = get_default_llms(timeout=5) except GenAIDisabledException: logger.warning("Unable to rephrase Slack user message, Gen AI disabled") return msg messages = _get_rephrase_message() filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) return model_output def update_emote_react( emoji: str, channel: str, message_ts: str | None, remove: bool, client: WebClient, ) -> None: try: if not message_ts: logger.error( f"Tried to remove a react in {channel} but no message specified" ) return func = client.reactions_remove if remove else client.reactions_add slack_call = make_slack_api_rate_limited(func) # type: ignore slack_call( name=emoji, channel=channel, timestamp=message_ts, ) except SlackApiError as e: if remove: logger.error(f"Failed to remove Reaction due to: {e}") else: logger.error(f"Was not able to react to user message due to: {e}") def remove_onyx_bot_tag(message_str: str, client: WebClient) -> str: bot_tag_id = get_onyx_bot_slack_bot_id(web_client=client) return re.sub(rf"<@{bot_tag_id}>\s", "", message_str) def _check_for_url_in_block(block: Block) -> bool: """ Check if the block has a key that contains "url" in it """ block_dict = block.to_dict() def check_dict_for_url(d: dict) -> bool: for key, value in d.items(): if "url" in key.lower(): return True if isinstance(value, dict): if check_dict_for_url(value): return True elif isinstance(value, list): for item in value: if isinstance(item, dict) and check_dict_for_url(item): return True return False return check_dict_for_url(block_dict) def _build_error_block(error_message: str) -> Block: """ Build an error block to display in slack so that the user can see the error without completely breaking """ display_text = ( "There was an error displaying all of the Onyx answers." f" Please let an admin or an onyx developer know. Error: {error_message}" ) return SectionBlock(text=display_text) @retry( tries=DANSWER_BOT_NUM_RETRIES, delay=0.25, backoff=2, logger=cast(logging.Logger, logger), ) def respond_in_thread( client: WebClient, channel: str, thread_ts: str | None, text: str | None = None, blocks: list[Block] | None = None, receiver_ids: list[str] | None = None, metadata: Metadata | None = None, unfurl: bool = True, ) -> list[str]: if not text and not blocks: raise ValueError("One of `text` or `blocks` must be provided") message_ids: list[str] = [] if not receiver_ids: slack_call = make_slack_api_rate_limited(client.chat_postMessage) try: response = slack_call( channel=channel, text=text, blocks=blocks, thread_ts=thread_ts, metadata=metadata, unfurl_links=unfurl, unfurl_media=unfurl, ) except Exception as e: logger.warning(f"Failed to post message: {e} \n blocks: {blocks}") logger.warning("Trying again without blocks that have urls") if not blocks: raise e blocks_without_urls = [ block for block in blocks if not _check_for_url_in_block(block) ] blocks_without_urls.append(_build_error_block(str(e))) # Try again wtihout blocks containing url response = slack_call( channel=channel, text=text, blocks=blocks_without_urls, thread_ts=thread_ts, metadata=metadata, unfurl_links=unfurl, unfurl_media=unfurl, ) message_ids.append(response["message_ts"]) else: slack_call = make_slack_api_rate_limited(client.chat_postEphemeral) for receiver in receiver_ids: try: response = slack_call( channel=channel, user=receiver, text=text, blocks=blocks, thread_ts=thread_ts, metadata=metadata, unfurl_links=unfurl, unfurl_media=unfurl, ) except Exception as e: logger.warning(f"Failed to post message: {e} \n blocks: {blocks}") logger.warning("Trying again without blocks that have urls") if not blocks: raise e blocks_without_urls = [ block for block in blocks if not _check_for_url_in_block(block) ] blocks_without_urls.append(_build_error_block(str(e))) # Try again wtihout blocks containing url response = slack_call( channel=channel, user=receiver, text=text, blocks=blocks_without_urls, thread_ts=thread_ts, metadata=metadata, unfurl_links=unfurl, unfurl_media=unfurl, ) message_ids.append(response["message_ts"]) return message_ids def build_feedback_id( message_id: int, document_id: str | None = None, document_rank: int | None = None, ) -> str: unique_prefix = "".join(random.choice(string.ascii_letters) for _ in range(10)) if document_id is not None: if not document_id or document_rank is None: raise ValueError("Invalid document, missing information") if ID_SEPARATOR in document_id: raise ValueError( "Separator pattern should not already exist in document id" ) feedback_id = ID_SEPARATOR.join( [str(message_id), document_id, str(document_rank)] ) else: feedback_id = str(message_id) return unique_prefix + ID_SEPARATOR + feedback_id def build_continue_in_web_ui_id( message_id: int, ) -> str: unique_prefix = str(uuid.uuid4())[:10] return unique_prefix + ID_SEPARATOR + str(message_id) def decompose_action_id(feedback_id: str) -> tuple[int, str | None, int | None]: """Decompose into query_id, document_id, document_rank, see above function""" try: components = feedback_id.split(ID_SEPARATOR) if len(components) != 2 and len(components) != 4: raise ValueError("Feedback ID does not contain right number of elements") if len(components) == 2: return int(components[-1]), None, None return int(components[1]), components[2], int(components[3]) except Exception as e: logger.error(e) raise ValueError("Received invalid Feedback Identifier") def get_view_values(state_values: dict[str, Any]) -> dict[str, str]: """Extract view values Args: state_values (dict): The Slack view-submission values Returns: dict: keys/values of the view state content """ view_values = {} for _, view_data in state_values.items(): for k, v in view_data.items(): if ( "selected_option" in v and isinstance(v["selected_option"], dict) and "value" in v["selected_option"] ): view_values[k] = v["selected_option"]["value"] elif "selected_options" in v and isinstance(v["selected_options"], list): view_values[k] = [ x["value"] for x in v["selected_options"] if "value" in x ] elif "selected_date" in v: view_values[k] = v["selected_date"] elif "value" in v: view_values[k] = v["value"] return view_values def translate_vespa_highlight_to_slack(match_strs: list[str], used_chars: int) -> str: def _replace_highlight(s: str) -> str: s = re.sub(r"(?<=[^\s])(.*?)", r"\1", s) s = s.replace("", "*").replace("", "*") return s final_matches = [ replace_whitespaces_w_space(_replace_highlight(match_str)).strip() for match_str in match_strs if match_str ] combined = "... ".join(final_matches) # Slack introduces "Show More" after 300 on desktop which is ugly # But don't trim the message if there is still a highlight after 300 chars remaining = 300 - used_chars if len(combined) > remaining and "*" not in combined[remaining:]: combined = combined[: remaining - 3] + "..." return combined def remove_slack_text_interactions(slack_str: str) -> str: slack_str = SlackTextCleaner.replace_tags_basic(slack_str) slack_str = SlackTextCleaner.replace_channels_basic(slack_str) slack_str = SlackTextCleaner.replace_special_mentions(slack_str) slack_str = SlackTextCleaner.replace_special_catchall(slack_str) slack_str = SlackTextCleaner.add_zero_width_whitespace_after_tag(slack_str) return slack_str def get_channel_from_id(client: WebClient, channel_id: str) -> dict[str, Any]: response = client.conversations_info(channel=channel_id) response.validate() return response["channel"] def get_channel_name_from_id( client: WebClient, channel_id: str ) -> tuple[str | None, bool]: try: channel_info = get_channel_from_id(client, channel_id) name = channel_info.get("name") is_dm = any([channel_info.get("is_im"), channel_info.get("is_mpim")]) return name, is_dm except SlackApiError as e: logger.exception(f"Couldn't fetch channel name from id: {channel_id}") raise e def fetch_slack_user_ids_from_emails( user_emails: list[str], client: WebClient ) -> tuple[list[str], list[str]]: user_ids: list[str] = [] failed_to_find: list[str] = [] for email in user_emails: try: user = client.users_lookupByEmail(email=email) user_ids.append(user.data["user"]["id"]) # type: ignore except Exception: logger.error(f"Was not able to find slack user by email: {email}") failed_to_find.append(email) return user_ids, failed_to_find def fetch_user_ids_from_groups( given_names: list[str], client: WebClient ) -> tuple[list[str], list[str]]: user_ids: list[str] = [] failed_to_find: list[str] = [] try: response = client.usergroups_list() if not isinstance(response.data, dict): logger.error("Error fetching user groups") return user_ids, given_names all_group_data = response.data.get("usergroups", []) name_id_map = {d["name"]: d["id"] for d in all_group_data} handle_id_map = {d["handle"]: d["id"] for d in all_group_data} for given_name in given_names: group_id = name_id_map.get(given_name) or handle_id_map.get( given_name.lstrip("@") ) if not group_id: failed_to_find.append(given_name) continue try: response = client.usergroups_users_list(usergroup=group_id) if isinstance(response.data, dict): user_ids.extend(response.data.get("users", [])) else: failed_to_find.append(given_name) except Exception as e: logger.error(f"Error fetching user group ids: {str(e)}") failed_to_find.append(given_name) except Exception as e: logger.error(f"Error fetching user groups: {str(e)}") failed_to_find = given_names return user_ids, failed_to_find def fetch_group_ids_from_names( given_names: list[str], client: WebClient ) -> tuple[list[str], list[str]]: group_data: list[str] = [] failed_to_find: list[str] = [] try: response = client.usergroups_list() if not isinstance(response.data, dict): logger.error("Error fetching user groups") return group_data, given_names all_group_data = response.data.get("usergroups", []) name_id_map = {d["name"]: d["id"] for d in all_group_data} handle_id_map = {d["handle"]: d["id"] for d in all_group_data} for given_name in given_names: id = handle_id_map.get(given_name.lstrip("@")) id = id or name_id_map.get(given_name) if id: group_data.append(id) else: failed_to_find.append(given_name) except Exception as e: failed_to_find = given_names logger.error(f"Error fetching user groups: {str(e)}") return group_data, failed_to_find def fetch_user_semantic_id_from_id( user_id: str | None, client: WebClient ) -> str | None: if not user_id: return None response = make_slack_api_rate_limited(client.users_info)(user=user_id) if not response["ok"]: return None user: dict = cast(dict[Any, dict], response.data).get("user", {}) return ( user.get("real_name") or user.get("name") or user.get("profile", {}).get("email") ) def read_slack_thread( channel: str, thread: str, client: WebClient ) -> list[ThreadMessage]: thread_messages: list[ThreadMessage] = [] response = client.conversations_replies(channel=channel, ts=thread) replies = cast(dict, response.data).get("messages", []) for reply in replies: if "user" in reply and "bot_id" not in reply: message = reply["text"] user_sem_id = ( fetch_user_semantic_id_from_id(reply.get("user"), client) or "Unknown User" ) message_type = MessageType.USER else: self_slack_bot_id = get_onyx_bot_slack_bot_id(client) if reply.get("user") == self_slack_bot_id: # OnyxBot response message_type = MessageType.ASSISTANT user_sem_id = "Assistant" # OnyxBot responses have both text and blocks # The useful content is in the blocks, specifically the first block unless there are # auto-detected filters blocks = reply.get("blocks") if not blocks: logger.warning(f"OnyxBot response has no blocks: {reply}") continue message = blocks[0].get("text", {}).get("text") # If auto-detected filters are on, use the second block for the actual answer # The first block is the auto-detected filters if message.startswith("_Filters"): if len(blocks) < 2: logger.warning(f"Only filter blocks found: {reply}") continue # This is the OnyxBot answer format, if there is a change to how we respond, # this will need to be updated to get the correct "answer" portion message = reply["blocks"][1].get("text", {}).get("text") else: # Other bots are not counted as the LLM response which only comes from Onyx message_type = MessageType.USER bot_user_name = fetch_user_semantic_id_from_id( reply.get("user"), client ) user_sem_id = bot_user_name or "Unknown" + " Bot" # For other bots, just use the text as we have no way of knowing that the # useful portion is message = reply.get("text") if not message: message = blocks[0].get("text", {}).get("text") if not message: logger.warning("Skipping Slack thread message, no text found") continue message = remove_onyx_bot_tag(message, client=client) thread_messages.append( ThreadMessage(message=message, sender=user_sem_id, role=message_type) ) return thread_messages def slack_usage_report( action: str, sender_id: str | None, client: WebClient, tenant_id: str | None ) -> None: if DISABLE_TELEMETRY: return onyx_user = None sender_email = None try: sender_email = client.users_info(user=sender_id).data["user"]["profile"]["email"] # type: ignore except Exception: logger.warning("Unable to find sender email") if sender_email is not None: with get_session_with_tenant(tenant_id) as db_session: onyx_user = get_user_by_email(email=sender_email, db_session=db_session) optional_telemetry( record_type=RecordType.USAGE, data={"action": action}, user_id=str(onyx_user.id) if onyx_user else "Non-Onyx-Or-No-Auth-User", ) class SlackRateLimiter: def __init__(self) -> None: self.max_qpm: int | None = DANSWER_BOT_MAX_QPM self.max_wait_time = DANSWER_BOT_MAX_WAIT_TIME self.active_question = 0 self.last_reset_time = time.time() self.waiting_questions: list[int] = [] def refill(self) -> None: # If elapsed time is greater than the period, reset the active question count if (time.time() - self.last_reset_time) > 60: self.active_question = 0 self.last_reset_time = time.time() def notify( self, client: WebClient, channel: str, position: int, thread_ts: str | None ) -> None: respond_in_thread( client=client, channel=channel, receiver_ids=None, text=f"Your question has been queued. You are in position {position}.\n" f"Please wait a moment :hourglass_flowing_sand:", thread_ts=thread_ts, ) def is_available(self) -> bool: if self.max_qpm is None: return True self.refill() return self.active_question < self.max_qpm def acquire_slot(self) -> None: self.active_question += 1 def init_waiter(self) -> tuple[int, int]: func_randid = random.getrandbits(128) self.waiting_questions.append(func_randid) position = self.waiting_questions.index(func_randid) + 1 return func_randid, position def waiter(self, func_randid: int) -> None: if self.max_qpm is None: return wait_time = 0 while ( self.active_question >= self.max_qpm or self.waiting_questions[0] != func_randid ): if wait_time > self.max_wait_time: raise TimeoutError time.sleep(2) wait_time += 2 self.refill() del self.waiting_questions[0] def get_feedback_visibility() -> FeedbackVisibility: try: return FeedbackVisibility(DANSWER_BOT_FEEDBACK_VISIBILITY.lower()) except ValueError: return FeedbackVisibility.PRIVATE class TenantSocketModeClient(SocketModeClient): def __init__( self, tenant_id: str | None, slack_bot_id: int, *args: Any, **kwargs: Any ): super().__init__(*args, **kwargs) self.tenant_id = tenant_id self.slack_bot_id = slack_bot_id